@@ -28,16 +28,21 @@ typedef struct _mp_obj_str_t {
2828// use this macro to extract the string data and length
2929#define GET_STR_DATA_LEN (str_obj_in , str_data , str_len ) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
3030
31- static mp_obj_t mp_obj_new_str_iterator (mp_obj_t str , int cur );
31+ static mp_obj_t mp_obj_new_str_iterator (mp_obj_t str );
32+ static mp_obj_t mp_obj_new_bytes_iterator (mp_obj_t str );
3233
3334/******************************************************************************/
3435/* str */
3536
3637void str_print (void (* print )(void * env , const char * fmt , ...), void * env , mp_obj_t self_in , mp_print_kind_t kind ) {
3738 GET_STR_DATA_LEN (self_in , str_data , str_len );
38- if (kind == PRINT_STR ) {
39+ bool is_bytes = MP_OBJ_IS_TYPE (self_in , & bytes_type );
40+ if (kind == PRINT_STR && !is_bytes ) {
3941 print (env , "%.*s" , str_len , str_data );
4042 } else {
43+ if (is_bytes ) {
44+ print (env , "b" );
45+ }
4146 // TODO need to escape chars etc
4247 print (env , "'%.*s'" , str_len , str_data );
4348 }
@@ -71,7 +76,11 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
7176 // ["no", "yes"][1 == 2] is common idiom
7277 if (MP_OBJ_IS_SMALL_INT (rhs_in )) {
7378 uint index = mp_get_index (mp_obj_get_type (lhs_in ), lhs_len , rhs_in );
74- return mp_obj_new_str (lhs_data + index , 1 , true);
79+ if (MP_OBJ_IS_TYPE (lhs_in , & bytes_type )) {
80+ return MP_OBJ_NEW_SMALL_INT (lhs_data [index ]);
81+ } else {
82+ return mp_obj_new_str (lhs_data + index , 1 , true);
83+ }
7584#if MICROPY_ENABLE_SLICE
7685 } else if (MP_OBJ_IS_TYPE (rhs_in , & slice_type )) {
7786 machine_int_t start , stop , step ;
@@ -120,7 +129,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
120129
121130 // code for non-qstr
122131 byte * data ;
123- mp_obj_t s = mp_obj_str_builder_start (alloc_len , & data );
132+ mp_obj_t s = mp_obj_str_builder_start (mp_obj_get_type ( lhs_in ), alloc_len , & data );
124133 memcpy (data , lhs_data , lhs_len );
125134 memcpy (data + lhs_len , rhs_data , rhs_len );
126135 return mp_obj_str_builder_end (s );
@@ -143,7 +152,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
143152 }
144153 int n = MP_OBJ_SMALL_INT_VALUE (rhs_in );
145154 byte * data ;
146- mp_obj_t s = mp_obj_str_builder_start (lhs_len * n , & data );
155+ mp_obj_t s = mp_obj_str_builder_start (mp_obj_get_type ( lhs_in ), lhs_len * n , & data );
147156 mp_seq_multiply (lhs_data , sizeof (* lhs_data ), lhs_len , n , data );
148157 return mp_obj_str_builder_end (s );
149158 }
@@ -152,10 +161,6 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
152161 return MP_OBJ_NULL ; // op not supported
153162}
154163
155- static mp_obj_t str_getiter (mp_obj_t o_in ) {
156- return mp_obj_new_str_iterator (o_in , 0 );
157- }
158-
159164mp_obj_t str_join (mp_obj_t self_in , mp_obj_t arg ) {
160165 assert (MP_OBJ_IS_STR (self_in ));
161166
@@ -188,7 +193,7 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
188193
189194 // make joined string
190195 byte * data ;
191- mp_obj_t joined_str = mp_obj_str_builder_start (required_len , & data );
196+ mp_obj_t joined_str = mp_obj_str_builder_start (mp_obj_get_type ( self_in ), required_len , & data );
192197 for (int i = 0 ; i < seq_len ; i ++ ) {
193198 if (i > 0 ) {
194199 memcpy (data , sep_str , sep_len );
@@ -393,13 +398,23 @@ const mp_obj_type_t str_type = {
393398 "str" ,
394399 .print = str_print ,
395400 .binary_op = str_binary_op ,
396- .getiter = str_getiter ,
401+ .getiter = mp_obj_new_str_iterator ,
402+ .methods = str_type_methods ,
403+ };
404+
405+ // Reuses most of methods from str
406+ const mp_obj_type_t bytes_type = {
407+ { & mp_const_type },
408+ "bytes" ,
409+ .print = str_print ,
410+ .binary_op = str_binary_op ,
411+ .getiter = mp_obj_new_bytes_iterator ,
397412 .methods = str_type_methods ,
398413};
399414
400- mp_obj_t mp_obj_str_builder_start (uint len , byte * * data ) {
415+ mp_obj_t mp_obj_str_builder_start (const mp_obj_type_t * type , uint len , byte * * data ) {
401416 mp_obj_str_t * o = m_new_obj_var (mp_obj_str_t , byte , len + 1 );
402- o -> base .type = & str_type ;
417+ o -> base .type = type ;
403418 o -> len = len ;
404419 * data = o -> data ;
405420 return o ;
@@ -413,6 +428,16 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
413428 return o ;
414429}
415430
431+ static mp_obj_t str_new (const mp_obj_type_t * type , const byte * data , uint len ) {
432+ mp_obj_str_t * o = m_new_obj_var (mp_obj_str_t , byte , len + 1 );
433+ o -> base .type = type ;
434+ o -> hash = qstr_compute_hash (data , len );
435+ o -> len = len ;
436+ memcpy (o -> data , data , len * sizeof (byte ));
437+ o -> data [len ] = '\0' ; // for now we add null for compatibility with C ASCIIZ strings
438+ return o ;
439+ }
440+
416441mp_obj_t mp_obj_new_str (const byte * data , uint len , bool make_qstr_if_not_already ) {
417442 qstr q = qstr_find_strn (data , len );
418443 if (q != MP_QSTR_NULL ) {
@@ -423,16 +448,14 @@ mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_alread
423448 return MP_OBJ_NEW_QSTR (qstr_from_strn ((const char * )data , len ));
424449 } else {
425450 // no existing qstr, don't make one
426- mp_obj_str_t * o = m_new_obj_var (mp_obj_str_t , byte , len + 1 );
427- o -> base .type = & str_type ;
428- o -> hash = qstr_compute_hash (data , len );
429- o -> len = len ;
430- memcpy (o -> data , data , len * sizeof (byte ));
431- o -> data [len ] = '\0' ; // for now we add null for compatibility with C ASCIIZ strings
432- return o ;
451+ return str_new (& str_type , data , len );
433452 }
434453}
435454
455+ mp_obj_t mp_obj_new_bytes (const byte * data , uint len ) {
456+ return str_new (& bytes_type , data , len );
457+ }
458+
436459bool mp_obj_str_equal (mp_obj_t s1 , mp_obj_t s2 ) {
437460 if (MP_OBJ_IS_QSTR (s1 ) && MP_OBJ_IS_QSTR (s2 )) {
438461 return s1 == s2 ;
@@ -522,10 +545,36 @@ static const mp_obj_type_t str_it_type = {
522545 .iternext = str_it_iternext ,
523546};
524547
525- mp_obj_t mp_obj_new_str_iterator (mp_obj_t str , int cur ) {
548+ mp_obj_t bytes_it_iternext (mp_obj_t self_in ) {
549+ mp_obj_str_it_t * self = self_in ;
550+ GET_STR_DATA_LEN (self -> str , str , len );
551+ if (self -> cur < len ) {
552+ mp_obj_t o_out = MP_OBJ_NEW_SMALL_INT (str [self -> cur ]);
553+ self -> cur += 1 ;
554+ return o_out ;
555+ } else {
556+ return mp_const_stop_iteration ;
557+ }
558+ }
559+
560+ static const mp_obj_type_t bytes_it_type = {
561+ { & mp_const_type },
562+ "bytes_iterator" ,
563+ .iternext = bytes_it_iternext ,
564+ };
565+
566+ mp_obj_t mp_obj_new_str_iterator (mp_obj_t str ) {
526567 mp_obj_str_it_t * o = m_new_obj (mp_obj_str_it_t );
527568 o -> base .type = & str_it_type ;
528569 o -> str = str ;
529- o -> cur = cur ;
570+ o -> cur = 0 ;
571+ return o ;
572+ }
573+
574+ mp_obj_t mp_obj_new_bytes_iterator (mp_obj_t str ) {
575+ mp_obj_str_it_t * o = m_new_obj (mp_obj_str_it_t );
576+ o -> base .type = & bytes_it_type ;
577+ o -> str = str ;
578+ o -> cur = 0 ;
530579 return o ;
531580}
0 commit comments