Skip to content

Commit 17a5a83

Browse files
committed
Implement str.rfind() and add tests for it.
1 parent 8562de6 commit 17a5a83

2 files changed

Lines changed: 62 additions & 50 deletions

File tree

py/objstr.c

Lines changed: 39 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -186,19 +186,26 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
186186

187187
// like strstr but with specified length and allows \0 bytes
188188
// TODO replace with something more efficient/standard
189-
STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
189+
STATIC const byte *find_subbytes(const byte *haystack, machine_uint_t hlen, const byte *needle, machine_uint_t nlen, machine_int_t direction) {
190190
if (hlen >= nlen) {
191-
for (uint i = 0; i <= hlen - nlen; i++) {
192-
bool found = true;
193-
for (uint j = 0; j < nlen; j++) {
194-
if (haystack[i + j] != needle[j]) {
195-
found = false;
196-
break;
197-
}
191+
machine_uint_t str_index, str_index_end;
192+
if (direction > 0) {
193+
str_index = 0;
194+
str_index_end = hlen - nlen;
195+
} else {
196+
str_index = hlen - nlen;
197+
str_index_end = 0;
198+
}
199+
for (;;) {
200+
if (memcmp(&haystack[str_index], needle, nlen) == 0) {
201+
//found
202+
return haystack + str_index;
198203
}
199-
if (found) {
200-
return haystack + i;
204+
if (str_index == str_index_end) {
205+
//not found
206+
break;
201207
}
208+
str_index += direction;
202209
}
203210
}
204211
return NULL;
@@ -260,7 +267,7 @@ STATIC mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
260267
/* NOTE `a in b` is `b.__contains__(a)` */
261268
if (MP_OBJ_IS_STR(rhs_in)) {
262269
GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
263-
return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len) != NULL);
270+
return MP_BOOL(find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len, 1) != NULL);
264271
}
265272
break;
266273

@@ -382,7 +389,7 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
382389
return res;
383390
}
384391

385-
STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
392+
STATIC mp_obj_t str_finder(uint n_args, const mp_obj_t *args, machine_int_t direction) {
386393
assert(2 <= n_args && n_args <= 4);
387394
assert(MP_OBJ_IS_STR(args[0]));
388395
assert(MP_OBJ_IS_STR(args[1]));
@@ -399,20 +406,24 @@ STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
399406
end = mp_get_index(&str_type, haystack_len, args[3], true);
400407
}
401408

402-
const byte *p = find_subbytes(haystack + start, haystack_len - start, needle, needle_len);
409+
const byte *p = find_subbytes(haystack + start, end - start, needle, needle_len, direction);
403410
if (p == NULL) {
404411
// not found
405412
return MP_OBJ_NEW_SMALL_INT(-1);
406413
} else {
407414
// found
408-
machine_int_t pos = p - haystack;
409-
if (pos + needle_len > end) {
410-
pos = -1;
411-
}
412-
return MP_OBJ_NEW_SMALL_INT(pos);
415+
return MP_OBJ_NEW_SMALL_INT(p - haystack);
413416
}
414417
}
415418

419+
STATIC mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
420+
return str_finder(n_args, args, 1);
421+
}
422+
423+
STATIC mp_obj_t str_rfind(uint n_args, const mp_obj_t *args) {
424+
return str_finder(n_args, args, -1);
425+
}
426+
416427
// TODO: (Much) more variety in args
417428
STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) {
418429
GET_STR_DATA_LEN(self_in, str, str_len);
@@ -423,15 +434,6 @@ STATIC mp_obj_t str_startswith(mp_obj_t self_in, mp_obj_t arg) {
423434
return MP_BOOL(memcmp(str, prefix, prefix_len) == 0);
424435
}
425436

426-
STATIC bool chr_in_str(const byte* const str, const machine_uint_t str_len, int c) {
427-
for (machine_uint_t i = 0; i < str_len; i++) {
428-
if (str[i] == c) {
429-
return true;
430-
}
431-
}
432-
return false;
433-
}
434-
435437
STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
436438
assert(1 <= n_args && n_args <= 2);
437439
assert(MP_OBJ_IS_STR(args[0]));
@@ -456,7 +458,7 @@ STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
456458
bool first_good_char_pos_set = false;
457459
machine_uint_t last_good_char_pos = 0;
458460
for (machine_uint_t i = 0; i < orig_str_len; i++) {
459-
if (!chr_in_str(chars_to_del, chars_to_del_len, orig_str[i])) {
461+
if (find_subbytes(chars_to_del, chars_to_del_len, &orig_str[i], 1, 1) == NULL) {
460462
last_good_char_pos = i;
461463
if (!first_good_char_pos_set) {
462464
first_good_char_pos = i;
@@ -546,7 +548,7 @@ STATIC mp_obj_t str_replace(uint n_args, const mp_obj_t *args) {
546548
const byte *old_occurrence;
547549
const byte *offset_ptr = str;
548550
machine_uint_t offset_num = 0;
549-
while ((old_occurrence = find_subbytes(offset_ptr, str_len - offset_num, old, old_len)) != NULL) {
551+
while ((old_occurrence = find_subbytes(offset_ptr, str_len - offset_num, old, old_len, 1)) != NULL) {
550552
// copy from just after end of last occurrence of to-be-replaced string to right before start of next occurrence
551553
if (data != NULL) {
552554
memcpy(data + replaced_str_index, offset_ptr, old_occurrence - offset_ptr);
@@ -646,27 +648,12 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t di
646648
result[2] = self_in;
647649
}
648650

649-
if (str_len >= sep_len) {
650-
machine_uint_t str_index, str_index_end;
651-
if (direction > 0) {
652-
str_index = 0;
653-
str_index_end = str_len - sep_len;
654-
} else {
655-
str_index = str_len - sep_len;
656-
str_index_end = 0;
657-
}
658-
for (;;) {
659-
if (memcmp(&str[str_index], sep, sep_len) == 0) {
660-
result[0] = mp_obj_new_str(str, str_index, false);
661-
result[1] = arg;
662-
result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false);
663-
break;
664-
}
665-
if (str_index == str_index_end) {
666-
break;
667-
}
668-
str_index += direction;
669-
}
651+
const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
652+
if (position_ptr != NULL) {
653+
machine_uint_t position = position_ptr - str;
654+
result[0] = mp_obj_new_str(str, position, false);
655+
result[1] = arg;
656+
result[2] = mp_obj_new_str(str + position + sep_len, str_len - position - sep_len, false);
670657
}
671658

672659
return mp_obj_new_tuple(3, result);
@@ -695,6 +682,7 @@ STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, in
695682
}
696683

697684
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
685+
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rfind_obj, 2, 4, str_rfind);
698686
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
699687
STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split);
700688
STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_startswith_obj, str_startswith);
@@ -707,6 +695,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
707695

708696
STATIC const mp_method_t str_type_methods[] = {
709697
{ "find", &str_find_obj },
698+
{ "rfind", &str_rfind_obj },
710699
{ "join", &str_join_obj },
711700
{ "split", &str_split_obj },
712701
{ "startswith", &str_startswith_obj },

tests/basics/string_rfind.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
print("hello world".rfind("ll"))
2+
print("hello world".rfind("ll", None))
3+
print("hello world".rfind("ll", 1))
4+
print("hello world".rfind("ll", 1, None))
5+
print("hello world".rfind("ll", None, None))
6+
print("hello world".rfind("ll", 1, -1))
7+
print("hello world".rfind("ll", 1, 1))
8+
print("hello world".rfind("ll", 1, 2))
9+
print("hello world".rfind("ll", 1, 3))
10+
print("hello world".rfind("ll", 1, 4))
11+
print("hello world".rfind("ll", 1, 5))
12+
print("hello world".rfind("ll", -100))
13+
print("0000".rfind('0'))
14+
print("0000".rfind('0', 0))
15+
print("0000".rfind('0', 1))
16+
print("0000".rfind('0', 2))
17+
print("0000".rfind('0', 3))
18+
print("0000".rfind('0', 4))
19+
print("0000".rfind('0', 5))
20+
print("0000".rfind('-1', 3))
21+
print("0000".rfind('1', 3))
22+
print("0000".rfind('1', 4))
23+
print("0000".rfind('1', 5))

0 commit comments

Comments
 (0)