@@ -186,19 +186,26 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
186186
187187// like strstr but with specified length and allows \0 bytes
188188// TODO replace with something more efficient/standard
189- STATIC const byte * find_subbytes (const byte * haystack , uint hlen , const byte * needle , uint nlen ) {
189+ STATIC const byte * find_subbytes (const byte * haystack , machine_uint_t hlen , const byte * needle , machine_uint_t nlen , machine_int_t direction ) {
190190 if (hlen >= nlen ) {
191- for (uint i = 0 ; i <= hlen - nlen ; i ++ ) {
192- bool found = true;
193- for (uint j = 0 ; j < nlen ; j ++ ) {
194- if (haystack [i + j ] != needle [j ]) {
195- found = false;
196- break ;
197- }
191+ machine_uint_t str_index , str_index_end ;
192+ if (direction > 0 ) {
193+ str_index = 0 ;
194+ str_index_end = hlen - nlen ;
195+ } else {
196+ str_index = hlen - nlen ;
197+ str_index_end = 0 ;
198+ }
199+ for (;;) {
200+ if (memcmp (& haystack [str_index ], needle , nlen ) == 0 ) {
201+ //found
202+ return haystack + str_index ;
198203 }
199- if (found ) {
200- return haystack + i ;
204+ if (str_index == str_index_end ) {
205+ //not found
206+ break ;
201207 }
208+ str_index += direction ;
202209 }
203210 }
204211 return NULL ;
@@ -260,7 +267,7 @@ STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
260267 /* NOTE `a in b` is `b.__contains__(a)` */
261268 if (MP_OBJ_IS_STR (rhs_in )) {
262269 GET_STR_DATA_LEN (rhs_in , rhs_data , rhs_len );
263- return MP_BOOL (find_subbytes (lhs_data , lhs_len , rhs_data , rhs_len ) != NULL );
270+ return MP_BOOL (find_subbytes (lhs_data , lhs_len , rhs_data , rhs_len , 1 ) != NULL );
264271 }
265272 break ;
266273
@@ -382,7 +389,7 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
382389 return res ;
383390}
384391
385- STATIC mp_obj_t str_find (uint n_args , const mp_obj_t * args ) {
392+ STATIC mp_obj_t str_finder (uint n_args , const mp_obj_t * args , machine_int_t direction ) {
386393 assert (2 <= n_args && n_args <= 4 );
387394 assert (MP_OBJ_IS_STR (args [0 ]));
388395 assert (MP_OBJ_IS_STR (args [1 ]));
@@ -399,20 +406,24 @@ STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
399406 end = mp_get_index (& str_type , haystack_len , args [3 ], true);
400407 }
401408
402- const byte * p = find_subbytes (haystack + start , haystack_len - start , needle , needle_len );
409+ const byte * p = find_subbytes (haystack + start , end - start , needle , needle_len , direction );
403410 if (p == NULL ) {
404411 // not found
405412 return MP_OBJ_NEW_SMALL_INT (-1 );
406413 } else {
407414 // found
408- machine_int_t pos = p - haystack ;
409- if (pos + needle_len > end ) {
410- pos = -1 ;
411- }
412- return MP_OBJ_NEW_SMALL_INT (pos );
415+ return MP_OBJ_NEW_SMALL_INT (p - haystack );
413416 }
414417}
415418
419+ STATIC mp_obj_t str_find (uint n_args , const mp_obj_t * args ) {
420+ return str_finder (n_args , args , 1 );
421+ }
422+
423+ STATIC mp_obj_t str_rfind (uint n_args , const mp_obj_t * args ) {
424+ return str_finder (n_args , args , -1 );
425+ }
426+
416427// TODO: (Much) more variety in args
417428STATIC mp_obj_t str_startswith (mp_obj_t self_in , mp_obj_t arg ) {
418429 GET_STR_DATA_LEN (self_in , str , str_len );
@@ -423,15 +434,6 @@ STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) {
423434 return MP_BOOL (memcmp (str , prefix , prefix_len ) == 0 );
424435}
425436
426- STATIC bool chr_in_str (const byte * const str , const machine_uint_t str_len , int c ) {
427- for (machine_uint_t i = 0 ; i < str_len ; i ++ ) {
428- if (str [i ] == c ) {
429- return true;
430- }
431- }
432- return false;
433- }
434-
435437STATIC mp_obj_t str_strip (uint n_args , const mp_obj_t * args ) {
436438 assert (1 <= n_args && n_args <= 2 );
437439 assert (MP_OBJ_IS_STR (args [0 ]));
@@ -456,7 +458,7 @@ STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
456458 bool first_good_char_pos_set = false;
457459 machine_uint_t last_good_char_pos = 0 ;
458460 for (machine_uint_t i = 0 ; i < orig_str_len ; i ++ ) {
459- if (! chr_in_str (chars_to_del , chars_to_del_len , orig_str [i ]) ) {
461+ if (find_subbytes (chars_to_del , chars_to_del_len , & orig_str [i ], 1 , 1 ) == NULL ) {
460462 last_good_char_pos = i ;
461463 if (!first_good_char_pos_set ) {
462464 first_good_char_pos = i ;
@@ -546,7 +548,7 @@ STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) {
546548 const byte * old_occurrence ;
547549 const byte * offset_ptr = str ;
548550 machine_uint_t offset_num = 0 ;
549- while ((old_occurrence = find_subbytes (offset_ptr , str_len - offset_num , old , old_len )) != NULL ) {
551+ while ((old_occurrence = find_subbytes (offset_ptr , str_len - offset_num , old , old_len , 1 )) != NULL ) {
550552 // copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence
551553 if (data != NULL ) {
552554 memcpy (data + replaced_str_index , offset_ptr , old_occurrence - offset_ptr );
@@ -646,27 +648,12 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t di
646648 result [2 ] = self_in ;
647649 }
648650
649- if (str_len >= sep_len ) {
650- machine_uint_t str_index , str_index_end ;
651- if (direction > 0 ) {
652- str_index = 0 ;
653- str_index_end = str_len - sep_len ;
654- } else {
655- str_index = str_len - sep_len ;
656- str_index_end = 0 ;
657- }
658- for (;;) {
659- if (memcmp (& str [str_index ], sep , sep_len ) == 0 ) {
660- result [0 ] = mp_obj_new_str (str , str_index , false);
661- result [1 ] = arg ;
662- result [2 ] = mp_obj_new_str (str + str_index + sep_len , str_len - str_index - sep_len , false);
663- break ;
664- }
665- if (str_index == str_index_end ) {
666- break ;
667- }
668- str_index += direction ;
669- }
651+ const byte * position_ptr = find_subbytes (str , str_len , sep , sep_len , direction );
652+ if (position_ptr != NULL ) {
653+ machine_uint_t position = position_ptr - str ;
654+ result [0 ] = mp_obj_new_str (str , position , false);
655+ result [1 ] = arg ;
656+ result [2 ] = mp_obj_new_str (str + position + sep_len , str_len - position - sep_len , false);
670657 }
671658
672659 return mp_obj_new_tuple (3 , result );
@@ -695,6 +682,7 @@ STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, in
695682}
696683
697684STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN (str_find_obj , 2 , 4 , str_find );
685+ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN (str_rfind_obj , 2 , 4 , str_rfind );
698686STATIC MP_DEFINE_CONST_FUN_OBJ_2 (str_join_obj , str_join );
699687STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN (str_split_obj , 1 , 3 , str_split );
700688STATIC MP_DEFINE_CONST_FUN_OBJ_2 (str_startswith_obj , str_startswith );
@@ -707,6 +695,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
707695
708696STATIC const mp_method_t str_type_methods [] = {
709697 { "find" , & str_find_obj },
698+ { "rfind" , & str_rfind_obj },
710699 { "join" , & str_join_obj },
711700 { "split" , & str_split_obj },
712701 { "startswith" , & str_startswith_obj },
0 commit comments