@@ -14,9 +14,11 @@ typedef struct _mp_obj_str_t {
1414 mp_obj_base_t base ;
1515 machine_uint_t hash : 16 ; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
1616 machine_uint_t len : 16 ; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
17- byte data [] ;
17+ const byte * data ;
1818} mp_obj_str_t ;
1919
20+ const mp_obj_t mp_const_empty_bytes ;
21+
2022// use this macro to extract the string hash
2123#define GET_STR_HASH (str_obj_in , str_hash ) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
2224
@@ -28,6 +30,7 @@ typedef struct _mp_obj_str_t {
2830
2931STATIC mp_obj_t mp_obj_new_str_iterator (mp_obj_t str );
3032STATIC mp_obj_t mp_obj_new_bytes_iterator (mp_obj_t str );
33+ STATIC mp_obj_t str_new (const mp_obj_type_t * type , const byte * data , uint len );
3134
3235/******************************************************************************/
3336/* str */
@@ -78,6 +81,109 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env,
7881 }
7982}
8083
84+ STATIC mp_obj_t str_make_new (mp_obj_t type_in , uint n_args , uint n_kw , const mp_obj_t * args ) {
85+ switch (n_args ) {
86+ case 0 :
87+ return MP_OBJ_NEW_QSTR (MP_QSTR_ );
88+
89+ case 1 :
90+ {
91+ vstr_t * vstr = vstr_new ();
92+ mp_obj_print_helper ((void (* )(void * , const char * , ...))vstr_printf , vstr , args [0 ], PRINT_STR );
93+ mp_obj_t s = mp_obj_new_str ((byte * )vstr -> buf , vstr -> len , false);
94+ vstr_free (vstr );
95+ return s ;
96+ }
97+
98+ case 2 :
99+ case 3 :
100+ {
101+ // TODO: validate 2nd/3rd args
102+ if (!MP_OBJ_IS_TYPE (args [0 ], & bytes_type )) {
103+ nlr_jump (mp_obj_new_exception_msg (& mp_type_TypeError , "bytes expected" ));
104+ }
105+ GET_STR_DATA_LEN (args [0 ], str_data , str_len );
106+ GET_STR_HASH (args [0 ], str_hash );
107+ mp_obj_str_t * o = str_new (& str_type , NULL , str_len );
108+ o -> data = str_data ;
109+ o -> hash = str_hash ;
110+ return o ;
111+ }
112+
113+ default :
114+ nlr_jump (mp_obj_new_exception_msg (& mp_type_TypeError , "str takes at most 3 arguments" ));
115+ }
116+ }
117+
118+ STATIC mp_obj_t bytes_make_new (mp_obj_t type_in , uint n_args , uint n_kw , const mp_obj_t * args ) {
119+ if (n_args == 0 ) {
120+ return mp_const_empty_bytes ;
121+ }
122+
123+ if (MP_OBJ_IS_STR (args [0 ])) {
124+ if (n_args < 2 || n_args > 3 ) {
125+ goto wrong_args ;
126+ }
127+ GET_STR_DATA_LEN (args [0 ], str_data , str_len );
128+ GET_STR_HASH (args [0 ], str_hash );
129+ mp_obj_str_t * o = str_new (& bytes_type , NULL , str_len );
130+ o -> data = str_data ;
131+ o -> hash = str_hash ;
132+ return o ;
133+ }
134+
135+ if (n_args > 1 ) {
136+ goto wrong_args ;
137+ }
138+
139+ if (MP_OBJ_IS_SMALL_INT (args [0 ])) {
140+ uint len = MP_OBJ_SMALL_INT_VALUE (args [0 ]);
141+ byte * data ;
142+
143+ mp_obj_t o = mp_obj_str_builder_start (& bytes_type , len , & data );
144+ memset (data , 0 , len );
145+ return mp_obj_str_builder_end (o );
146+ }
147+
148+ int len ;
149+ byte * data ;
150+ vstr_t * vstr = NULL ;
151+ mp_obj_t o = NULL ;
152+ // Try to create array of exact len if initializer len is known
153+ mp_obj_t len_in = mp_obj_len_maybe (args [0 ]);
154+ if (len_in == MP_OBJ_NULL ) {
155+ len = -1 ;
156+ vstr = vstr_new ();
157+ } else {
158+ len = MP_OBJ_SMALL_INT_VALUE (len_in );
159+ o = mp_obj_str_builder_start (& bytes_type , len , & data );
160+ }
161+
162+ mp_obj_t iterable = rt_getiter (args [0 ]);
163+ mp_obj_t item ;
164+ while ((item = rt_iternext (iterable )) != mp_const_stop_iteration ) {
165+ if (len == -1 ) {
166+ vstr_add_char (vstr , MP_OBJ_SMALL_INT_VALUE (item ));
167+ } else {
168+ * data ++ = MP_OBJ_SMALL_INT_VALUE (item );
169+ }
170+ }
171+
172+ if (len == -1 ) {
173+ vstr_shrink (vstr );
174+ // TODO: Optimize, borrow buffer from vstr
175+ len = vstr_len (vstr );
176+ o = mp_obj_str_builder_start (& bytes_type , len , & data );
177+ memcpy (data , vstr_str (vstr ), len );
178+ vstr_free (vstr );
179+ }
180+
181+ return mp_obj_str_builder_end (o );
182+
183+ wrong_args :
184+ nlr_jump (mp_obj_new_exception_msg (& mp_type_TypeError , "wrong number of arguments" ));
185+ }
186+
81187// like strstr but with specified length and allows \0 bytes
82188// TODO replace with something more efficient/standard
83189STATIC const byte * find_subbytes (const byte * haystack , uint hlen , const byte * needle , uint nlen ) {
@@ -619,6 +725,7 @@ const mp_obj_type_t str_type = {
619725 { & mp_type_type },
620726 .name = MP_QSTR_str ,
621727 .print = str_print ,
728+ .make_new = str_make_new ,
622729 .binary_op = str_binary_op ,
623730 .getiter = mp_obj_new_str_iterator ,
624731 .methods = str_type_methods ,
@@ -630,34 +737,45 @@ const mp_obj_type_t bytes_type = {
630737 { & mp_type_type },
631738 .name = MP_QSTR_bytes ,
632739 .print = str_print ,
740+ .make_new = bytes_make_new ,
633741 .binary_op = str_binary_op ,
634742 .getiter = mp_obj_new_bytes_iterator ,
635743 .methods = str_type_methods ,
636744};
637745
746+ // the zero-length bytes
747+ STATIC const mp_obj_str_t empty_bytes_obj = {{& bytes_type }, 0 , 0 , NULL };
748+ const mp_obj_t mp_const_empty_bytes = (mp_obj_t )& empty_bytes_obj ;
749+
638750mp_obj_t mp_obj_str_builder_start (const mp_obj_type_t * type , uint len , byte * * data ) {
639- mp_obj_str_t * o = m_new_obj_var (mp_obj_str_t , byte , len + 1 );
751+ mp_obj_str_t * o = m_new_obj (mp_obj_str_t );
640752 o -> base .type = type ;
641753 o -> len = len ;
642- * data = o -> data ;
754+ byte * p = m_new (byte , len + 1 );
755+ o -> data = p ;
756+ * data = p ;
643757 return o ;
644758}
645759
646760mp_obj_t mp_obj_str_builder_end (mp_obj_t o_in ) {
647- assert (MP_OBJ_IS_STR (o_in ));
648761 mp_obj_str_t * o = o_in ;
649762 o -> hash = qstr_compute_hash (o -> data , o -> len );
650- o -> data [o -> len ] = '\0' ; // for now we add null for compatibility with C ASCIIZ strings
763+ byte * p = (byte * )o -> data ;
764+ p [o -> len ] = '\0' ; // for now we add null for compatibility with C ASCIIZ strings
651765 return o ;
652766}
653767
654768STATIC mp_obj_t str_new (const mp_obj_type_t * type , const byte * data , uint len ) {
655- mp_obj_str_t * o = m_new_obj_var (mp_obj_str_t , byte , len + 1 );
769+ mp_obj_str_t * o = m_new_obj (mp_obj_str_t );
656770 o -> base .type = type ;
657- o -> hash = qstr_compute_hash (data , len );
658771 o -> len = len ;
659- memcpy (o -> data , data , len * sizeof (byte ));
660- o -> data [len ] = '\0' ; // for now we add null for compatibility with C ASCIIZ strings
772+ if (data ) {
773+ o -> hash = qstr_compute_hash (data , len );
774+ byte * p = m_new (byte , len + 1 );
775+ o -> data = p ;
776+ memcpy (p , data , len * sizeof (byte ));
777+ p [len ] = '\0' ; // for now we add null for compatibility with C ASCIIZ strings
778+ }
661779 return o ;
662780}
663781
0 commit comments