|
105 | 105 |
|
106 | 106 | from .decoder import JSONDecoder, JSONDecodeError |
107 | 107 | from .encoder import JSONEncoder |
| 108 | +import codecs |
108 | 109 |
|
109 | 110 | _default_encoder = JSONEncoder( |
110 | 111 | skipkeys=False, |
@@ -240,6 +241,35 @@ def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, |
240 | 241 | _default_decoder = JSONDecoder(object_hook=None, object_pairs_hook=None) |
241 | 242 |
|
242 | 243 |
|
| 244 | +def detect_encoding(b): |
| 245 | + bstartswith = b.startswith |
| 246 | + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): |
| 247 | + return 'utf-32' |
| 248 | + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): |
| 249 | + return 'utf-16' |
| 250 | + if bstartswith(codecs.BOM_UTF8): |
| 251 | + return 'utf-8-sig' |
| 252 | + |
| 253 | + if len(b) >= 4: |
| 254 | + if not b[0]: |
| 255 | + # 00 00 -- -- - utf-32-be |
| 256 | + # 00 XX -- -- - utf-16-be |
| 257 | + return 'utf-16-be' if b[1] else 'utf-32-be' |
| 258 | + if not b[1]: |
| 259 | + # XX 00 00 00 - utf-32-le |
| 260 | + # XX 00 XX XX - utf-16-le |
| 261 | + return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' |
| 262 | + elif len(b) == 2: |
| 263 | + if not b[0]: |
| 264 | + # 00 XX - utf-16-be |
| 265 | + return 'utf-16-be' |
| 266 | + if not b[1]: |
| 267 | + # XX 00 - utf-16-le |
| 268 | + return 'utf-16-le' |
| 269 | + # default |
| 270 | + return 'utf-8' |
| 271 | + |
| 272 | + |
243 | 273 | def load(fp, *, cls=None, object_hook=None, parse_float=None, |
244 | 274 | parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): |
245 | 275 | """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing |
@@ -270,8 +300,8 @@ def load(fp, *, cls=None, object_hook=None, parse_float=None, |
270 | 300 |
|
271 | 301 | def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, |
272 | 302 | parse_int=None, parse_constant=None, object_pairs_hook=None, **kw): |
273 | | - """Deserialize ``s`` (a ``str`` instance containing a JSON |
274 | | - document) to a Python object. |
| 303 | + """Deserialize ``s`` (a ``str``, ``bytes`` or ``bytearray`` instance |
| 304 | + containing a JSON document) to a Python object. |
275 | 305 |
|
276 | 306 | ``object_hook`` is an optional function that will be called with the |
277 | 307 | result of any object literal decode (a ``dict``). The return value of |
@@ -307,12 +337,16 @@ def loads(s, *, encoding=None, cls=None, object_hook=None, parse_float=None, |
307 | 337 | The ``encoding`` argument is ignored and deprecated. |
308 | 338 |
|
309 | 339 | """ |
310 | | - if not isinstance(s, str): |
311 | | - raise TypeError('the JSON object must be str, not {!r}'.format( |
312 | | - s.__class__.__name__)) |
313 | | - if s.startswith(u'\ufeff'): |
314 | | - raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", |
315 | | - s, 0) |
| 340 | + if isinstance(s, str): |
| 341 | + if s.startswith('\ufeff'): |
| 342 | + raise JSONDecodeError("Unexpected UTF-8 BOM (decode using utf-8-sig)", |
| 343 | + s, 0) |
| 344 | + else: |
| 345 | + if not isinstance(s, (bytes, bytearray)): |
| 346 | + raise TypeError('the JSON object must be str, bytes or bytearray, ' |
| 347 | + 'not {!r}'.format(s.__class__.__name__)) |
| 348 | + s = s.decode(detect_encoding(s), 'surrogatepass') |
| 349 | + |
316 | 350 | if (cls is None and object_hook is None and |
317 | 351 | parse_int is None and parse_float is None and |
318 | 352 | parse_constant is None and object_pairs_hook is None and not kw): |
|
0 commit comments