ref: ba44e1753b26420a59c9065f10d7da07349552e7
dir: /tables/text_compression.py/
kTextAlphabet = [
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
# codes 0x3E and up
"!", "?", "-", ".", ",",
# codes 0x43 and up
"[...]", ">", "(", ")",
# codes 0x47 and up
"[Ankh]", "[Waves]", "[Snake]", "[LinkL]", "[LinkR]",
"\"", "[Up]", "[Down]", "[Left]", "[Right]", "'",
# codes 0x52 and up
"[1HeartL]", "[1HeartR]", "[2HeartL]", "[3HeartL]",
"[3HeartR]", "[4HeartL]", "[4HeartR]",
" ", "<", "[A]", "[B]", "[X]", "[Y]",
]
kText_CommandLengths = [1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 1, ]
kText_CommandNames = [
"NextPic",
"Choose",
"Item",
"Name",
"Window",
"Number",
"Position",
"ScrollSpd",
"Selchg",
"Crash",
"Choose3",
"Choose2",
"Scroll",
"1",
"2",
"3",
"Color",
"Wait",
"Sound",
"Speed",
"Mark",
"Mark2",
"Clear",
"Waitkey",
"EndMessage"
]
kTextDictionary = [ 0x59, 0x59, 0x59, 0x59,
0x59, 0x59, 0x59,
0x59, 0x59,
0x51, 0x2c, 0x59,
0x1a, 0x27, 0x1d, 0x59,
0x1a, 0x2b, 0x1e, 0x59,
0x1a, 0x25, 0x25, 0x59,
0x1a, 0x22, 0x27,
0x1a, 0x27, 0x1d,
0x1a, 0x2d, 0x59,
0x1a, 0x2c, 0x2d,
0x1a, 0x27,
0x1a, 0x2d,
0x1b, 0x25, 0x1e,
0x1b, 0x1a,
0x1b, 0x1e,
0x1b, 0x28,
0x1c, 0x1a, 0x27, 0x59,
0x1c, 0x21, 0x1e,
0x1c, 0x28, 0x26,
0x1c, 0x24,
0x1d, 0x1e, 0x2c,
0x1d, 0x22,
0x1d, 0x28,
0x1e, 0x27, 0x59,
0x1e, 0x2b, 0x59,
0x1e, 0x1a, 0x2b,
0x1e, 0x27, 0x2d,
0x1e, 0x1d, 0x59,
0x1e, 0x27,
0x1e, 0x2b,
0x1e, 0x2f,
0x1f, 0x28, 0x2b,
0x1f, 0x2b, 0x28,
0x20, 0x22, 0x2f, 0x1e, 0x59,
0x20, 0x1e, 0x2d,
0x20, 0x28,
0x21, 0x1a, 0x2f, 0x1e,
0x21, 0x1a, 0x2c,
0x21, 0x1e, 0x2b,
0x21, 0x22,
0x21, 0x1a,
0x22, 0x20, 0x21, 0x2d, 0x59,
0x22, 0x27, 0x20, 0x59,
0x22, 0x27,
0x22, 0x2c,
0x22, 0x2d,
0x23, 0x2e, 0x2c, 0x2d,
0x24, 0x27, 0x28, 0x30,
0x25, 0x32, 0x59,
0x25, 0x1a,
0x25, 0x28,
0x26, 0x1a, 0x27,
0x26, 0x1a,
0x26, 0x1e,
0x26, 0x2e,
0x27, 0x51, 0x2d, 0x59,
0x27, 0x28, 0x27,
0x27, 0x28, 0x2d,
0x28, 0x29, 0x1e, 0x27,
0x28, 0x2e, 0x27, 0x1d,
0x28, 0x2e, 0x2d, 0x59,
0x28, 0x1f,
0x28, 0x27,
0x28, 0x2b,
0x29, 0x1e, 0x2b,
0x29, 0x25, 0x1e,
0x29, 0x28, 0x30,
0x29, 0x2b, 0x28,
0x2b, 0x1e, 0x59,
0x2b, 0x1e,
0x2c, 0x28, 0x26, 0x1e,
0x2c, 0x1e,
0x2c, 0x21,
0x2c, 0x28,
0x2c, 0x2d,
0x2d, 0x1e, 0x2b, 0x59,
0x2d, 0x21, 0x22, 0x27,
0x2d, 0x1e, 0x2b,
0x2d, 0x21, 0x1a,
0x2d, 0x21, 0x1e,
0x2d, 0x21, 0x22,
0x2d, 0x28,
0x2d, 0x2b,
0x2e, 0x29,
0x2f, 0x1e, 0x2b,
0x30, 0x22, 0x2d, 0x21,
0x30, 0x1a,
0x30, 0x1e,
0x30, 0x21,
0x30, 0x22,
0x32, 0x28, 0x2e,
0x7, 0x1e, 0x2b,
0x13, 0x21, 0x1a,
0x13, 0x21, 0x1e,
0x13, 0x21, 0x22,
0x18, 0x28, 0x2e,
]
kTextDictionary_Idx = [
0, 4, 7, 9, 12, 16, 20, 24, 27, 30, 33, 36, 38, 40, 43, 45, 47, 49, 53, 56, 59, 61, 64, 66, 68, 71, 74, 77, 80, 83, 85, 87, 89, 92, 95, 100, 103, 105, 109, 112, 115, 117, 119, 124, 128, 130, 132, 134, 138, 142, 145, 147, 149, 152, 154, 156, 158, 162, 165, 168, 172, 176, 180, 182, 184, 186, 189, 192, 195, 198, 201, 203, 207, 209, 211, 213, 215, 219, 223, 226, 229, 232, 235, 237, 239, 241, 244, 248, 250, 252, 254, 256, 259, 262, 265, 268, 271, 274
]
def make_dict():
r, rinv = {}, {}
for i in range(len(kTextDictionary_Idx) - 1):
ln = kTextDictionary_Idx[i + 1] - kTextDictionary_Idx[i]
idx = kTextDictionary_Idx[i]
s = "".join(kTextAlphabet[kTextDictionary[idx+i]] for i in range(ln))
r[i] = s
rinv[s] = i
return r, rinv
kTextDictionary_Ascii, kTextDictionary_AsciiBack = make_dict()
def decode_strings(get_byte):
p = 0x9c8000
result = []
while True:
org_p = p
#print('0x%x' % p)
s = ''
srcdata = []
while True:
c = get_byte(p)
srcdata.append(c)
l = kText_CommandLengths[c - 0x67] if c >= 0x67 and c < 0x80 else 1
p += l
if c == 0x7f:
break
if c < 0x67:
s += kTextAlphabet[c]
elif c < 0x80:
if l == 2:
srcdata.append(get_byte(p-1))
s += '[%s %.2d]' % (kText_CommandNames[c - 0x67], get_byte(p-1))
else:
s += '[%s]' % kText_CommandNames[c - 0x67]
elif c == 0x80:
p = 0x8edf40
s = None
break
elif c > 0x80 and c < 0x88:
assert 0
elif c == 0xff:
return result
else:
s += kTextDictionary_Ascii[c - 0x88]
if s != None:
result.append((s, srcdata))
def print_strings(f, get_byte):
for i, s in enumerate(decode_strings(get_byte)):
print('%s: %s' % (i + 1, s[0]), file = f)
def find_string_char_at(s, i):
a = s[i:]
for k, v in kTextDictionary_AsciiBack.items():
if a.startswith(k):
return [v + 0x88], len(k)
for i, s in enumerate(kTextAlphabet):
if a.startswith(s):
return [i], len(s)
if a.startswith('['):
cmd = a[1:a.index(']')]
if cmd in kText_CommandNames:
i = kText_CommandNames.index(cmd)
return [i + 0x67], len(cmd) + 2
for i, s in enumerate(kText_CommandNames):
if kText_CommandLengths[i] == 2 and cmd.startswith(s):
e = cmd[len(s):].strip()
return [i + 0x67, int(e)], len(cmd) + 2
print('substr %s not found' % a)
assert 0
def compress_string(s):
# find the greedy best match
i = 0
r = []
while i < len(s):
what, num = find_string_char_at(s, i)
r.extend(what)
i += num
r.append(0x7f)
return r
def verify(get_byte):
for i, (decoded, original) in enumerate(decode_strings(get_byte)):
c = compress_string(decoded)
if c != original:
print('String %s not match: %s, %s' % (decoded, c, original))
break
else:
pass