Skip to content

Commit cc80c4d

Browse files
committed
py/objstr: Make dedicated splitlines function, supporting diff newlines.
It now supports \n, \r and \r\n as newline separators. Adds 56 bytes to stmhal and 80 bytes to unix x86-64. Fixes issue adafruit#1689.
1 parent 1e38807 commit cc80c4d

2 files changed

Lines changed: 64 additions & 30 deletions

File tree

py/objstr.c

Lines changed: 38 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -464,9 +464,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
464464
return mp_obj_new_str_from_vstr(self_type, &vstr);
465465
}
466466

467-
enum {SPLIT = 0, KEEP = 1, SPLITLINES = 2};
468-
469-
STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args, int type) {
467+
mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
470468
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
471469
mp_int_t splits = -1;
472470
mp_obj_t sep = mp_const_none;
@@ -527,13 +525,7 @@ STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args
527525
}
528526
s++;
529527
}
530-
mp_uint_t sub_len = s - start;
531-
if (MP_LIKELY(!(sub_len == 0 && s == top && (type && SPLITLINES)))) {
532-
if (start + sub_len != top && (type & KEEP)) {
533-
sub_len++;
534-
}
535-
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, sub_len));
536-
}
528+
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
537529
if (s >= top) {
538530
break;
539531
}
@@ -547,25 +539,49 @@ STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args
547539
return res;
548540
}
549541

550-
mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
551-
return str_split_internal(n_args, args, SPLIT);
552-
}
553-
554542
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
555543
STATIC mp_obj_t str_splitlines(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
544+
enum { ARG_keepends };
556545
static const mp_arg_t allowed_args[] = {
557546
{ MP_QSTR_keepends, MP_ARG_BOOL, {.u_bool = false} },
558547
};
559548

560549
// parse args
561-
struct {
562-
mp_arg_val_t keepends;
563-
} args;
564-
mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args,
565-
MP_ARRAY_SIZE(allowed_args), allowed_args, (mp_arg_val_t*)&args);
566-
567-
mp_obj_t new_args[2] = {pos_args[0], MP_OBJ_NEW_QSTR(MP_QSTR__0x0a_)};
568-
return str_split_internal(2, new_args, SPLITLINES | (args.keepends.u_bool ? KEEP : 0));
550+
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
551+
mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
552+
553+
const mp_obj_type_t *self_type = mp_obj_get_type(pos_args[0]);
554+
mp_obj_t res = mp_obj_new_list(0, NULL);
555+
556+
GET_STR_DATA_LEN(pos_args[0], s, len);
557+
const byte *top = s + len;
558+
559+
while (s < top) {
560+
const byte *start = s;
561+
size_t match = 0;
562+
while (s < top) {
563+
if (*s == '\n') {
564+
match = 1;
565+
break;
566+
} else if (*s == '\r') {
567+
if (s[1] == '\n') {
568+
match = 2;
569+
} else {
570+
match = 1;
571+
}
572+
break;
573+
}
574+
s++;
575+
}
576+
size_t sub_len = s - start;
577+
if (args[ARG_keepends].u_bool) {
578+
sub_len += match;
579+
}
580+
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, sub_len));
581+
s += match;
582+
}
583+
584+
return res;
569585
}
570586
#endif
571587

tests/basics/string_splitlines.py

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,31 @@
1-
try:
2-
str.splitlines
3-
except:
4-
import sys
5-
print("SKIP")
6-
sys.exit()
1+
# test string.splitlines() method
72

3+
# test \n as newline
84
print("foo\nbar".splitlines())
95
print("foo\nbar\n".splitlines())
6+
print("foo and\nbar\n".splitlines())
7+
print("foo\nbar\n\n".splitlines())
8+
print("foo\n\nbar\n\n".splitlines())
9+
print("\nfoo\nbar\n".splitlines())
10+
11+
# test \r as newline
12+
print("foo\rbar\r".splitlines())
13+
print("\rfoo and\r\rbar\r".splitlines())
14+
15+
# test \r\n as newline
16+
print("foo\r\nbar\r\n".splitlines())
17+
print("\r\nfoo and\r\n\r\nbar\r\n".splitlines())
18+
19+
# test keepends arg
1020
print("foo\nbar".splitlines(True))
1121
print("foo\nbar\n".splitlines(True))
12-
print("foo\nbar".splitlines(keepends=True))
13-
print("foo\nbar\n".splitlines(keepends=True))
22+
print("foo\nbar\n\n".splitlines(True))
23+
print("foo\rbar".splitlines(keepends=True))
24+
print("foo\rbar\r\r".splitlines(keepends=True))
25+
print("foo\r\nbar".splitlines(keepends=True))
26+
print("foo\r\nbar\r\n\r\n".splitlines(keepends=True))
27+
28+
# test splitting bytes objects
29+
print(b"foo\nbar".splitlines())
30+
print(b"foo\nbar\n".splitlines())
31+
print(b"foo\r\nbar\r\n\r\n".splitlines(True))

0 commit comments

Comments
 (0)