forked from japsu/phpbb-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_confusables.py
More file actions
75 lines (59 loc) · 2.1 KB
/
process_confusables.py
File metadata and controls
75 lines (59 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
# encoding: utf-8
# vim: shiftwidth=4 expandtab
#
# phpbb-python © Copyright 2010 Santtu Pajukanta
# http://pajukanta.fi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 2 or later of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://opensource.org/licenses/gpl-license.php>.
#
import sys
from pprint import pprint
STARTS_WITH = '<?php return array('
ENDS_WITH = ');'
HEADER = """
#
# WARNING! AUTOGENERATED FROM phpBB3/includes/utf/data/confusables.php
# DO NOT EDIT MANUALLY!
#
# Use phpbb-python/bin/process_confusables.py instead.
#
confusables = """
def parse_character(char):
if not (char.startswith("'") and char.endswith("'")):
raise ValueError(char)
return char[1:-1].decode("UTF-8")
def parse_confusables(input_file):
data = input_file.read()
if not (data.startswith(STARTS_WITH) and data.endswith(ENDS_WITH)):
raise ValueError
data = data[len(STARTS_WITH):-len(ENDS_WITH)]
for pair in data.split(','):
try:
before, after = pair.split('=>')
except ValueError:
continue
before = parse_character(before)
after = parse_character(after)
yield (before, after)
def print_confusables(confusables, output_file):
if type(confusables) is not dict:
confusables = dict(confusables)
output_file.write(HEADER)
pprint(confusables, stream=output_file)
def main(input_filename, output_filename):
with open(output_filename, 'wb') as output_file:
with open(input_filename, 'rb') as input_file:
print_confusables(parse_confusables(input_file), output_file)
if __name__ == "__main__":
main(*sys.argv[1:])