11/*
2- * This file is part of the Micro Python project, http://micropython.org/
2+ * This file is part of the MicroPython project, http://micropython.org/
33 *
44 * The MIT License (MIT)
55 *
6- * Copyright (c) 2014 Damien P. George
6+ * Copyright (c) 2014-2016 Damien P. George
77 *
88 * Permission is hereby granted, free of charge, to any person obtaining a copy
99 * of this software and associated documentation files (the "Software"), to deal
2828
2929#include "py/nlr.h"
3030#include "py/objlist.h"
31+ #include "py/objstringio.h"
3132#include "py/parsenum.h"
3233#include "py/runtime.h"
34+ #include "py/stream.h"
3335
3436#if MICROPY_PY_UJSON
3537
@@ -42,7 +44,7 @@ STATIC mp_obj_t mod_ujson_dumps(mp_obj_t obj) {
4244}
4345STATIC MP_DEFINE_CONST_FUN_OBJ_1 (mod_ujson_dumps_obj , mod_ujson_dumps );
4446
45- // This function implements a simple non-recursive JSON parser.
47+ // The function below implements a simple non-recursive JSON parser.
4648//
4749// The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
4850// The parser here will parse any valid JSON and return the correct
@@ -52,13 +54,35 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_1(mod_ujson_dumps_obj, mod_ujson_dumps);
5254// input is outside it's specs.
5355//
5456// Most of the work is parsing the primitives (null, false, true, numbers,
55- // strings). It does 1 pass over the input string and so is easily extended to
56- // being able to parse from a non-seekable stream. It tries to be fast and
57+ // strings). It does 1 pass over the input stream. It tries to be fast and
5758// small in code size, while not using more RAM than necessary.
58- STATIC mp_obj_t mod_ujson_loads (mp_obj_t obj ) {
59- mp_uint_t len ;
60- const char * s = mp_obj_str_get_data (obj , & len );
61- const char * top = s + len ;
59+
60+ typedef struct _ujson_stream_t {
61+ mp_obj_t stream_obj ;
62+ mp_uint_t (* read )(mp_obj_t obj , void * buf , mp_uint_t size , int * errcode );
63+ int errcode ;
64+ byte cur ;
65+ } ujson_stream_t ;
66+
67+ #define S_EOF (0) // null is not allowed in json stream so is ok as EOF marker
68+ #define S_END (s ) ((s).cur == S_EOF)
69+ #define S_CUR (s ) ((s).cur)
70+ #define S_NEXT (s ) (ujson_stream_next(&(s)))
71+
72+ STATIC byte ujson_stream_next (ujson_stream_t * s ) {
73+ mp_uint_t ret = s -> read (s -> stream_obj , & s -> cur , 1 , & s -> errcode );
74+ if (s -> errcode != 0 ) {
75+ mp_raise_OSError (s -> errcode );
76+ }
77+ if (ret == 0 ) {
78+ s -> cur = S_EOF ;
79+ }
80+ return s -> cur ;
81+ }
82+
83+ STATIC mp_obj_t mod_ujson_load (mp_obj_t stream_obj ) {
84+ const mp_stream_p_t * stream_p = mp_get_stream_raise (stream_obj , MP_STREAM_OP_READ );
85+ ujson_stream_t s = {stream_obj , stream_p -> read , 0 , 0 };
6286 vstr_t vstr ;
6387 vstr_init (& vstr , 8 );
6488 mp_obj_list_t stack ; // we use a list as a simple stack for nested JSON
@@ -67,64 +91,64 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
6791 mp_obj_t stack_top = MP_OBJ_NULL ;
6892 mp_obj_type_t * stack_top_type = NULL ;
6993 mp_obj_t stack_key = MP_OBJ_NULL ;
94+ S_NEXT (s );
7095 for (;;) {
7196 cont :
72- if (s == top ) {
97+ if (S_END ( s ) ) {
7398 break ;
7499 }
75100 mp_obj_t next = MP_OBJ_NULL ;
76101 bool enter = false;
77- switch (* s ) {
102+ byte cur = S_CUR (s );
103+ S_NEXT (s );
104+ switch (cur ) {
78105 case ',' :
79106 case ':' :
80107 case ' ' :
81108 case '\t' :
82109 case '\n' :
83110 case '\r' :
84- s += 1 ;
85111 goto cont ;
86112 case 'n' :
87- if (s + 3 < top && s [ 1 ] == 'u' && s [ 2 ] == 'l' && s [ 3 ] == 'l' ) {
88- s += 4 ;
113+ if (S_CUR ( s ) == 'u' && S_NEXT ( s ) == 'l' && S_NEXT ( s ) == 'l' ) {
114+ S_NEXT ( s ) ;
89115 next = mp_const_none ;
90116 } else {
91117 goto fail ;
92118 }
93119 break ;
94120 case 'f' :
95- if (s + 4 < top && s [ 1 ] == 'a' && s [ 2 ] == 'l' && s [ 3 ] == 's' && s [ 4 ] == 'e' ) {
96- s += 5 ;
121+ if (S_CUR ( s ) == 'a' && S_NEXT ( s ) == 'l' && S_NEXT ( s ) == 's' && S_NEXT ( s ) == 'e' ) {
122+ S_NEXT ( s ) ;
97123 next = mp_const_false ;
98124 } else {
99125 goto fail ;
100126 }
101127 break ;
102128 case 't' :
103- if (s + 3 < top && s [ 1 ] == 'r' && s [ 2 ] == 'u' && s [ 3 ] == 'e' ) {
104- s += 4 ;
129+ if (S_CUR ( s ) == 'r' && S_NEXT ( s ) == 'u' && S_NEXT ( s ) == 'e' ) {
130+ S_NEXT ( s ) ;
105131 next = mp_const_true ;
106132 } else {
107133 goto fail ;
108134 }
109135 break ;
110136 case '"' :
111137 vstr_reset (& vstr );
112- for (s ++ ; s < top && * s != '"' ;) {
113- byte c = * s ;
138+ for (; ! S_END ( s ) && S_CUR ( s ) != '"' ;) {
139+ byte c = S_CUR ( s ) ;
114140 if (c == '\\' ) {
115- s ++ ;
116- c = * s ;
141+ c = S_NEXT (s );
117142 switch (c ) {
118143 case 'b' : c = 0x08 ; break ;
119144 case 'f' : c = 0x0c ; break ;
120145 case 'n' : c = 0x0a ; break ;
121146 case 'r' : c = 0x0d ; break ;
122147 case 't' : c = 0x09 ; break ;
123148 case 'u' : {
124- if (s + 4 >= top ) { goto fail ; }
125149 mp_uint_t num = 0 ;
126150 for (int i = 0 ; i < 4 ; i ++ ) {
127- c = (* ++ s | 0x20 ) - '0' ;
151+ c = (S_NEXT ( s ) | 0x20 ) - '0' ;
128152 if (c > 9 ) {
129153 c -= ('a' - ('9' + 1 ));
130154 }
@@ -137,27 +161,29 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
137161 }
138162 vstr_add_byte (& vstr , c );
139163 str_cont :
140- s ++ ;
164+ S_NEXT ( s ) ;
141165 }
142- if (s == top ) {
166+ if (S_END ( s ) ) {
143167 goto fail ;
144168 }
145- s ++ ;
169+ S_NEXT ( s ) ;
146170 next = mp_obj_new_str (vstr .buf , vstr .len , false);
147171 break ;
148172 case '-' :
149173 case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : {
150174 bool flt = false;
151175 vstr_reset (& vstr );
152- for (; s < top ; s ++ ) {
153- if (* s == '.' || * s == 'E' || * s == 'e' ) {
176+ for (;;) {
177+ vstr_add_byte (& vstr , cur );
178+ cur = S_CUR (s );
179+ if (cur == '.' || cur == 'E' || cur == 'e' ) {
154180 flt = true;
155- } else if (* s == '-' || unichar_isdigit (* s )) {
181+ } else if (cur == '-' || unichar_isdigit (cur )) {
156182 // pass
157183 } else {
158184 break ;
159185 }
160- vstr_add_byte ( & vstr , * s );
186+ S_NEXT ( s );
161187 }
162188 if (flt ) {
163189 next = mp_parse_num_decimal (vstr .buf , vstr .len , false, false, NULL );
@@ -169,16 +195,13 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
169195 case '[' :
170196 next = mp_obj_new_list (0 , NULL );
171197 enter = true;
172- s += 1 ;
173198 break ;
174199 case '{' :
175200 next = mp_obj_new_dict (0 );
176201 enter = true;
177- s += 1 ;
178202 break ;
179203 case '}' :
180204 case ']' : {
181- s += 1 ;
182205 if (stack_top == MP_OBJ_NULL ) {
183206 // no object at all
184207 goto fail ;
@@ -231,10 +254,10 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
231254 }
232255 success :
233256 // eat trailing whitespace
234- while (s < top && unichar_isspace (* s )) {
235- s ++ ;
257+ while (unichar_isspace (S_CUR ( s ) )) {
258+ S_NEXT ( s ) ;
236259 }
237- if (s < top ) {
260+ if (! S_END ( s ) ) {
238261 // unexpected chars
239262 goto fail ;
240263 }
@@ -248,11 +271,21 @@ STATIC mp_obj_t mod_ujson_loads(mp_obj_t obj) {
248271 fail :
249272 nlr_raise (mp_obj_new_exception_msg (& mp_type_ValueError , "syntax error in JSON" ));
250273}
274+ STATIC MP_DEFINE_CONST_FUN_OBJ_1 (mod_ujson_load_obj , mod_ujson_load );
275+
276+ STATIC mp_obj_t mod_ujson_loads (mp_obj_t obj ) {
277+ mp_uint_t len ;
278+ const char * buf = mp_obj_str_get_data (obj , & len );
279+ vstr_t vstr = {len , len , (char * )buf , true};
280+ mp_obj_stringio_t sio = {{& mp_type_stringio }, & vstr , 0 };
281+ return mod_ujson_load (& sio );
282+ }
251283STATIC MP_DEFINE_CONST_FUN_OBJ_1 (mod_ujson_loads_obj , mod_ujson_loads );
252284
253285STATIC const mp_rom_map_elem_t mp_module_ujson_globals_table [] = {
254286 { MP_ROM_QSTR (MP_QSTR___name__ ), MP_ROM_QSTR (MP_QSTR_ujson ) },
255287 { MP_ROM_QSTR (MP_QSTR_dumps ), MP_ROM_PTR (& mod_ujson_dumps_obj ) },
288+ { MP_ROM_QSTR (MP_QSTR_load ), MP_ROM_PTR (& mod_ujson_load_obj ) },
256289 { MP_ROM_QSTR (MP_QSTR_loads ), MP_ROM_PTR (& mod_ujson_loads_obj ) },
257290};
258291
0 commit comments