@@ -103,7 +103,7 @@ def translate(translation_file, i18ns):
103103def frequent_ngrams (corpus , sz , n ):
104104 return collections .Counter (corpus [i :i + sz ] for i in range (len (corpus )- sz )).most_common (n )
105105
106- def ngrams_to_pua (translation , ngrams ):
106+ def encode_ngrams (translation , ngrams ):
107107 if len (ngrams ) > 32 :
108108 start = 0xe000
109109 else :
@@ -112,7 +112,7 @@ def ngrams_to_pua(translation, ngrams):
112112 translation = translation .replace (g , chr (start + i ))
113113 return translation
114114
115- def pua_to_ngrams (compressed , ngrams ):
115+ def decode_ngrams (compressed , ngrams ):
116116 if len (ngrams ) > 32 :
117117 start , end = 0xe000 , 0xf8ff
118118 else :
@@ -123,7 +123,7 @@ def compute_huffman_coding(translations, qstrs, compression_filename):
123123 all_strings = [x [1 ] for x in translations ]
124124 all_strings_concat = "" .join (all_strings )
125125 ngrams = [i [0 ] for i in frequent_ngrams (all_strings_concat , 2 , 32 )]
126- all_strings_concat = ngrams_to_pua (all_strings_concat , ngrams )
126+ all_strings_concat = encode_ngrams (all_strings_concat , ngrams )
127127 counts = collections .Counter (all_strings_concat )
128128 cb = huffman .codebook (counts .items ())
129129 values = []
@@ -211,7 +211,7 @@ def decompress(encoding_table, encoded, encoded_length_bits):
211211 searched_length += lengths [bit_length ]
212212
213213 v = values [searched_length + bits - max_code ]
214- v = pua_to_ngrams (v , ngrams )
214+ v = decode_ngrams (v , ngrams )
215215 i += len (v .encode ('utf-8' ))
216216 dec .append (v )
217217 return '' .join (dec )
@@ -220,7 +220,7 @@ def compress(encoding_table, decompressed, encoded_length_bits, len_translation_
220220 if not isinstance (decompressed , str ):
221221 raise TypeError ()
222222 values , lengths , ngrams = encoding_table
223- decompressed = ngrams_to_pua (decompressed , ngrams )
223+ decompressed = encode_ngrams (decompressed , ngrams )
224224 enc = bytearray (len (decompressed ) * 3 )
225225 #print(decompressed)
226226 #print(lengths)
0 commit comments