File tree Expand file tree Collapse file tree 3 files changed +441
-0
lines changed
Expand file tree Collapse file tree 3 files changed +441
-0
lines changed Original file line number Diff line number Diff line change 1+ encodings = frozenset ((
2+ "ansi_x3.4-1968" ,
3+ "iso-ir-6" ,
4+ "ansi_x3.4-1986" ,
5+ "iso_646.irv:1991" ,
6+ "ascii" ,
7+ "iso646-us" ,
8+ "us-ascii" ,
9+ "us" ,
10+ "ibm367" ,
11+ "cp367" ,
12+ "csascii" ,
13+ "ks_c_5601-1987" ,
14+ "korean" ,
15+ "iso-2022-kr" ,
16+ "csiso2022kr" ,
17+ "euc-kr" ,
18+ "iso-2022-jp" ,
19+ "csiso2022jp" ,
20+ "iso-2022-jp-2" ,
21+ "iso-ir-58" ,
22+ "chinese" ,
23+ "csiso58gb231280" ,
24+ "iso_8859-1:1987" ,
25+ "iso-ir-100" ,
26+ "iso_8859-1" ,
27+ "iso-8859-1" ,
28+ "latin1" ,
29+ "l1" ,
30+ "ibm819" ,
31+ "cp819" ,
32+ "csisolatin1" ,
33+ "iso_8859-2:1987" ,
34+ "iso-ir-101" ,
35+ "iso_8859-2" ,
36+ "iso-8859-2" ,
37+ "latin2" ,
38+ "l2" ,
39+ "csisolatin2" ,
40+ "iso_8859-3:1988" ,
41+ "iso-ir-109" ,
42+ "iso_8859-3" ,
43+ "iso-8859-3" ,
44+ "latin3" ,
45+ "l3" ,
46+ "csisolatin3" ,
47+ "iso_8859-4:1988" ,
48+ "iso-ir-110" ,
49+ "iso_8859-4" ,
50+ "iso-8859-4" ,
51+ "latin4" ,
52+ "l4" ,
53+ "csisolatin4" ,
54+ "iso_8859-6:1987" ,
55+ "iso-ir-127" ,
56+ "iso_8859-6" ,
57+ "iso-8859-6" ,
58+ "ecma-114" ,
59+ "asmo-708" ,
60+ "arabic" ,
61+ "csisolatinarabic" ,
62+ "iso_8859-7:1987" ,
63+ "iso-ir-126" ,
64+ "iso_8859-7" ,
65+ "iso-8859-7" ,
66+ "elot_928" ,
67+ "ecma-118" ,
68+ "greek" ,
69+ "greek8" ,
70+ "csisolatingreek" ,
71+ "iso_8859-8:1988" ,
72+ "iso-ir-138" ,
73+ "iso_8859-8" ,
74+ "iso-8859-8" ,
75+ "hebrew" ,
76+ "csisolatinhebrew" ,
77+ "iso_8859-5:1988" ,
78+ "iso-ir-144" ,
79+ "iso_8859-5" ,
80+ "iso-8859-5" ,
81+ "cyrillic" ,
82+ "csisolatincyrillic" ,
83+ "iso_8859-9:1989" ,
84+ "iso-ir-148" ,
85+ "iso_8859-9" ,
86+ "iso-8859-9" ,
87+ "latin5" ,
88+ "l5" ,
89+ "csisolatin5" ,
90+ "iso-8859-10" ,
91+ "iso-ir-157" ,
92+ "l6" ,
93+ "iso_8859-10:1992" ,
94+ "csisolatin6" ,
95+ "latin6" ,
96+ "hp-roman8" ,
97+ "roman8" ,
98+ "r8" ,
99+ "ibm037" ,
100+ "cp037" ,
101+ "ebcdic-cp-us" ,
102+ "ebcdic-cp-ca" ,
103+ "ebcdic-cp-wt" ,
104+ "ebcdic-cp-nl" ,
105+ "csibm037" ,
106+ "ibm424" ,
107+ "cp424" ,
108+ "ebcdic-cp-he" ,
109+ "csibm424" ,
110+ "ibm437" ,
111+ "cp437" ,
112+ "437" ,
113+ "cspc8codepage437" ,
114+ "ibm500" ,
115+ "cp500" ,
116+ "ebcdic-cp-be" ,
117+ "ebcdic-cp-ch" ,
118+ "csibm500" ,
119+ "ibm775" ,
120+ "cp775" ,
121+ "cspc775baltic" ,
122+ "ibm850" ,
123+ "cp850" ,
124+ "850" ,
125+ "cspc850multilingual" ,
126+ "ibm852" ,
127+ "cp852" ,
128+ "852" ,
129+ "cspcp852" ,
130+ "ibm855" ,
131+ "cp855" ,
132+ "855" ,
133+ "csibm855" ,
134+ "ibm857" ,
135+ "cp857" ,
136+ "857" ,
137+ "csibm857" ,
138+ "ibm860" ,
139+ "cp860" ,
140+ "860" ,
141+ "csibm860" ,
142+ "ibm861" ,
143+ "cp861" ,
144+ "861" ,
145+ "cp-is" ,
146+ "csibm861" ,
147+ "ibm862" ,
148+ "cp862" ,
149+ "862" ,
150+ "cspc862latinhebrew" ,
151+ "ibm863" ,
152+ "cp863" ,
153+ "863" ,
154+ "csibm863" ,
155+ "ibm864" ,
156+ "cp864" ,
157+ "csibm864" ,
158+ "ibm865" ,
159+ "cp865" ,
160+ "865" ,
161+ "csibm865" ,
162+ "ibm866" ,
163+ "cp866" ,
164+ "866" ,
165+ "csibm866" ,
166+ "ibm869" ,
167+ "cp869" ,
168+ "869" ,
169+ "cp-gr" ,
170+ "csibm869" ,
171+ "ibm1026" ,
172+ "cp1026" ,
173+ "csibm1026" ,
174+ "koi8-r" ,
175+ "cskoi8r" ,
176+ "koi8-u" ,
177+ "big5-hkscs" ,
178+ "ptcp154" ,
179+ "csptcp154" ,
180+ "pt154" ,
181+ "cp154" ,
182+ "utf-7" ,
183+ "utf-16be" ,
184+ "utf-16le" ,
185+ "utf-16" ,
186+ "utf-8" ,
187+ "iso-8859-13" ,
188+ "iso-8859-14" ,
189+ "iso-ir-199" ,
190+ "iso_8859-14:1998" ,
191+ "iso_8859-14" ,
192+ "latin8" ,
193+ "iso-celtic" ,
194+ "l8" ,
195+ "iso-8859-15" ,
196+ "iso_8859-15" ,
197+ "iso-8859-16" ,
198+ "iso-ir-226" ,
199+ "iso_8859-16:2001" ,
200+ "iso_8859-16" ,
201+ "latin10" ,
202+ "l10" ,
203+ "gbk" ,
204+ "cp936" ,
205+ "ms936" ,
206+ "gb18030" ,
207+ "shift_jis" ,
208+ "ms_kanji" ,
209+ "csshiftjis" ,
210+ "euc-jp" ,
211+ "gb2312" ,
212+ "big5" ,
213+ "csbig5" ,
214+ "windows-1250" ,
215+ "windows-1251" ,
216+ "windows-1252" ,
217+ "windows-1253" ,
218+ "windows-1254" ,
219+ "windows-1255" ,
220+ "windows-1256" ,
221+ "windows-1257" ,
222+ "windows-1258" ,
223+ "tis-620" ,
224+ "hz-gb-2312" ,
225+ ))
Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python
2+ import sys
3+ import urllib2
4+ import codecs
5+
6+ def main ():
7+ encodings = []
8+ f = urllib2 .urlopen (sys .argv [1 ])
9+ for line in f :
10+ if line .startswith ("Name: " ) or line .startswith ("Alias: " ):
11+ enc = line .split ()[1 ]
12+ try :
13+ codecs .lookup (enc )
14+ if enc .lower not in encodings :
15+ encodings .append (enc .lower ())
16+ except LookupError :
17+ pass
18+ sys .stdout .write ("encodings = frozenset((\n " )
19+ for enc in encodings :
20+ sys .stdout .write (' "%s",\n ' % enc )
21+ sys .stdout .write (' ))' )
22+
23+ if __name__ == "__main__" :
24+ main ()
You can’t perform that action at this time.
0 commit comments