Skip to content
Prev Previous commit
Next Next commit
bpo-40328: partial refactoring
  • Loading branch information
corona10 committed Apr 20, 2020
commit 2e8a7e3ce07de1062cad94ea47174a09bf95da90
20 changes: 8 additions & 12 deletions Modules/cjkcodecs/tools/genmap_japanese.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,8 @@ def loadmap_jisx0213(fo):
print_decmap(omap, filler, "jisx0212", jisx0212decmap)

print("Generating JIS X 0208 && JIS X 0212 encode map...")
filler = BufferedFiller()
genmap_encode(filler, "jisxcommon", jisx0208_0212encmap)
print_encmap(omap, filler, "jisxcommon", jisx0208_0212encmap)
writer = EncodeMapWriter(omap, "jisxcommon", jisx0208_0212encmap)
writer.generate()

print("Generating CP932 Extension decode map...")
filler = BufferedFiller()
Expand All @@ -183,9 +182,8 @@ def loadmap_jisx0213(fo):
print_decmap(omap, filler, "cp932ext", cp932decmap)

print("Generating CP932 Extension encode map...")
filler = BufferedFiller()
genmap_encode(filler, "cp932ext", cp932encmap)
print_encmap(omap, filler, "cp932ext", cp932encmap)
writer = EncodeMapWriter(omap, "cp932ext", cp932encmap)
writer.generate()

print("Generating JIS X 0213 Plane 1 BMP decode map...")
filler = BufferedFiller()
Expand All @@ -198,9 +196,8 @@ def loadmap_jisx0213(fo):
print_decmap(omap, filler, "jisx0213_2_bmp", jis4decmap)

print("Generating JIS X 0213 BMP encode map...")
filler = BufferedFiller()
genmap_encode(filler, "jisx0213_bmp", jisx0213bmpencmap)
print_encmap(omap, filler, "jisx0213_bmp", jisx0213bmpencmap)
writer = EncodeMapWriter(omap, "jisx0213_bmp", jisx0213bmpencmap)
writer.generate()

print("Generating JIS X 0213 Plane 1 EMP decode map...")
filler = BufferedFiller()
Expand All @@ -215,9 +212,8 @@ def loadmap_jisx0213(fo):
print_decmap(omap, filler, "jisx0213_2_emp", jis4_2_decmap)

print("Generating JIS X 0213 EMP encode map...")
filler = BufferedFiller()
genmap_encode(filler, "jisx0213_emp", jisx0213empencmap)
print_encmap(omap, filler, "jisx0213_emp", jisx0213empencmap)
writer = EncodeMapWriter(omap, "jisx0213_emp", jisx0213empencmap)
writer.generate()

with open('mappings_jisx0213_pair.h', 'w') as omap:
print_autogen(omap, os.path.basename(__file__))
Expand Down
6 changes: 3 additions & 3 deletions Modules/cjkcodecs/tools/genmap_korean.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@
print_decmap(omap, filler, "cp949ext", uhcdecmap)

print("Generating CP949 (includes KS X 1001) encode map...")
filler = BufferedFiller()
genmap_encode(filler, "cp949", cp949encmap)
print_encmap(omap, filler, "cp949", cp949encmap)
writer = EncodeMapWriter(omap, "cp949", cp949encmap)
writer.generate()


print("Done!")
10 changes: 4 additions & 6 deletions Modules/cjkcodecs/tools/genmap_schinese.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,8 @@ def parse_gb18030map(fo):
print_decmap(omap, filler, "gbkext", gbkdecmap)

print("Generating GB2312 && GBK encode map...")
filler = BufferedFiller()
genmap_encode(filler, "gbcommon", gb2312_gbkencmap)
print_encmap(omap, filler, "gbcommon", gb2312_gbkencmap)
writer = EncodeMapWriter(omap, "gbcommon", gb2312_gbkencmap)
writer.generate()

print("Generating GB18030 extension decode map...")
filler = BufferedFiller()
Expand All @@ -110,9 +109,8 @@ def parse_gb18030map(fo):
print_decmap(omap, filler, "gb18030ext", gb18030decmap)

print("Generating GB18030 extension encode map...")
filler = BufferedFiller()
genmap_encode(filler, "gb18030ext", gb18030encmap)
print_encmap(omap, filler, "gb18030ext", gb18030encmap)
writer = EncodeMapWriter(omap, "gb18030ext", gb18030encmap)
writer.generate()

print("Generating GB18030 Unicode BMP Mapping Ranges...")
ranges = [[-1, -1, -1]]
Expand Down
59 changes: 8 additions & 51 deletions Modules/cjkcodecs/tools/genmap_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def itertokens(self):
def parsedata(self, data):
return eval('"'+data.split()[0]+'"')


class EncodeMapWriter:
filler_class = BufferedFiller
elemtype = 'DBCHAR'
Expand All @@ -69,9 +70,12 @@ class EncodeMapWriter:
def __init__(self, fp, prefix, m):
self.file = fp
self.prefix = prefix
self.m = m
self.filler = self.filler_class()
self.buildmap(m)
self.printmap(m)

def generate(self):
self.buildmap(self.m)
self.printmap(self.m)

def buildmap(self, emap):
for c1 in range(0, 256):
Expand All @@ -97,6 +101,7 @@ def buildmap(self, emap):
self.write_multic(c2map[v])
else:
raise ValueError

def write_nochar(self):
self.filler.write('N,')

Expand All @@ -109,7 +114,7 @@ def write_char(self, point):
def printmap(self, fmap):
self.file.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
self.filler.printout(self.file)
self.file.write("};\n")
self.file.write("};\n\n")
self.file.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")

for i in range(256):
Expand All @@ -124,32 +129,6 @@ def printmap(self, fmap):
self.filler.printout(self.file)
self.file.write("};\n\n")

# XXX: convert all usages of this function to EncodeMapWriter
def genmap_encode(filler, prefix, emap):
for c1 in range(0, 256):
if c1 not in emap:
continue
c2map = emap[c1]
rc2values = [k for k in c2map.keys()]
rc2values.sort()
if not rc2values:
continue

c2map[prefix] = True
c2map['min'] = rc2values[0]
c2map['max'] = rc2values[-1]
c2map['midx'] = len(filler)

for v in range(rc2values[0], rc2values[-1] + 1):
if v not in c2map:
filler.write('N,')
elif isinstance(c2map[v], int):
filler.write(str(c2map[v]) + ',')
elif isinstance(c2map[v], tuple):
filler.write('M,')
else:
raise ValueError

def open_mapping_file(path, source):
try:
f = open(path)
Expand All @@ -160,28 +139,6 @@ def open_mapping_file(path, source):
def print_autogen(fo, source):
fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')

def print_encmap(fo, filler, fmapprefix, fmap, f2map={}, f2mapprefix=''):
fo.write(f"static const DBCHAR __{fmapprefix}_encmap[{len(filler)}] = {{\n")
filler.printout(fo)
fo.write("};\n\n")
fo.write(f"static const struct unim_index {fmapprefix}_encmap[256] = {{\n")

for i in range(256):
if i in fmap and fmapprefix in fmap[i]:
m = fmap
prefix = fmapprefix
elif i in f2map and f2mapprefix in f2map[i]:
m = f2map
prefix = f2mapprefix
else:
filler.write("{", "0,", "0,", "0", "},")
continue

filler.write("{", "__%s_encmap" % prefix, "+", "%d" % m[i]['midx'],
",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
filler.printout(fo)
fo.write("};\n\n")

def genmap_decode(filler, prefix, c1range, c2range, dmap, onlymask=(),
wide=0):
c2width = c2range[1] - c2range[0] + 1
Expand Down