Skip to content

Commit c55a4d8

Browse files
committed
py: Make bytes objs work with more str methods; add tests.
1 parent 7fdb8d7 commit c55a4d8

File tree

7 files changed

+183
-18
lines changed

7 files changed

+183
-18
lines changed

py/objstr.c

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, mp_uint_t n_args, const mp_o
4646
mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
4747
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
4848
STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
49-
STATIC NORETURN void arg_type_mixup(void);
5049

5150
/******************************************************************************/
5251
/* str */
@@ -525,7 +524,7 @@ STATIC mp_obj_t str_split(mp_uint_t n_args, const mp_obj_t *args) {
525524
} else {
526525
// sep given
527526
if (mp_obj_get_type(sep) != self_type) {
528-
arg_type_mixup();
527+
bad_implicit_conversion(sep);
529528
}
530529

531530
mp_uint_t sep_len;
@@ -627,7 +626,7 @@ STATIC mp_obj_t str_finder(mp_uint_t n_args, const mp_obj_t *args, mp_int_t dire
627626
assert(MP_OBJ_IS_STR_OR_BYTES(args[0]));
628627

629628
// check argument type
630-
if (!MP_OBJ_IS_STR(args[1])) {
629+
if (mp_obj_get_type(args[1]) != self_type) {
631630
bad_implicit_conversion(args[1]);
632631
}
633632

@@ -720,7 +719,7 @@ STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args)
720719
chars_to_del_len = sizeof(whitespace);
721720
} else {
722721
if (mp_obj_get_type(args[1]) != self_type) {
723-
arg_type_mixup();
722+
bad_implicit_conversion(args[1]);
724723
}
725724
GET_STR_DATA_LEN(args[1], s, l);
726725
chars_to_del = s;
@@ -759,7 +758,11 @@ STATIC mp_obj_t str_uni_strip(int type, mp_uint_t n_args, const mp_obj_t *args)
759758

760759
if (!first_good_char_pos_set) {
761760
// string is all whitespace, return ''
762-
return MP_OBJ_NEW_QSTR(MP_QSTR_);
761+
if (self_type == &mp_type_str) {
762+
return MP_OBJ_NEW_QSTR(MP_QSTR_);
763+
} else {
764+
return mp_const_empty_bytes;
765+
}
763766
}
764767

765768
assert(last_good_char_pos >= first_good_char_pos);
@@ -1470,11 +1473,13 @@ STATIC mp_obj_t str_replace(mp_uint_t n_args, const mp_obj_t *args) {
14701473

14711474
// check argument types
14721475

1473-
if (!MP_OBJ_IS_STR(args[1])) {
1476+
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
1477+
1478+
if (mp_obj_get_type(args[1]) != self_type) {
14741479
bad_implicit_conversion(args[1]);
14751480
}
14761481

1477-
if (!MP_OBJ_IS_STR(args[2])) {
1482+
if (mp_obj_get_type(args[2]) != self_type) {
14781483
bad_implicit_conversion(args[2]);
14791484
}
14801485

@@ -1543,7 +1548,7 @@ STATIC mp_obj_t str_replace(mp_uint_t n_args, const mp_obj_t *args) {
15431548
return args[0];
15441549
} else {
15451550
// substr found, allocate new string
1546-
replaced_str = mp_obj_str_builder_start(mp_obj_get_type(args[0]), replaced_str_index, &data);
1551+
replaced_str = mp_obj_str_builder_start(self_type, replaced_str_index, &data);
15471552
assert(data != NULL);
15481553
}
15491554
} else {
@@ -1561,7 +1566,7 @@ STATIC mp_obj_t str_count(mp_uint_t n_args, const mp_obj_t *args) {
15611566
assert(MP_OBJ_IS_STR_OR_BYTES(args[0]));
15621567

15631568
// check argument type
1564-
if (!MP_OBJ_IS_STR(args[1])) {
1569+
if (mp_obj_get_type(args[1]) != self_type) {
15651570
bad_implicit_conversion(args[1]);
15661571
}
15671572

@@ -1597,12 +1602,10 @@ STATIC mp_obj_t str_count(mp_uint_t n_args, const mp_obj_t *args) {
15971602
}
15981603

15991604
STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, mp_int_t direction) {
1600-
if (!MP_OBJ_IS_STR_OR_BYTES(self_in)) {
1601-
assert(0);
1602-
}
1605+
assert(MP_OBJ_IS_STR_OR_BYTES(self_in));
16031606
mp_obj_type_t *self_type = mp_obj_get_type(self_in);
16041607
if (self_type != mp_obj_get_type(arg)) {
1605-
arg_type_mixup();
1608+
bad_implicit_conversion(arg);
16061609
}
16071610

16081611
GET_STR_DATA_LEN(self_in, str, str_len);
@@ -1612,7 +1615,16 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, mp_int_t directi
16121615
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
16131616
}
16141617

1615-
mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
1618+
mp_obj_t result[3];
1619+
if (self_type == &mp_type_str) {
1620+
result[0] = MP_OBJ_NEW_QSTR(MP_QSTR_);
1621+
result[1] = MP_OBJ_NEW_QSTR(MP_QSTR_);
1622+
result[2] = MP_OBJ_NEW_QSTR(MP_QSTR_);
1623+
} else {
1624+
result[0] = mp_const_empty_bytes;
1625+
result[1] = mp_const_empty_bytes;
1626+
result[2] = mp_const_empty_bytes;
1627+
}
16161628

16171629
if (direction > 0) {
16181630
result[0] = self_in;
@@ -1953,10 +1965,6 @@ STATIC void bad_implicit_conversion(mp_obj_t self_in) {
19531965
}
19541966
}
19551967

1956-
STATIC void arg_type_mixup(void) {
1957-
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "Can't mix str and bytes arguments"));
1958-
}
1959-
19601968
mp_uint_t mp_obj_str_get_hash(mp_obj_t self_in) {
19611969
// TODO: This has too big overhead for hash accessor
19621970
if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {

tests/basics/bytes_count.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
print(b"".count(b""))
2+
print(b"".count(b"a"))
3+
print(b"a".count(b""))
4+
print(b"a".count(b"a"))
5+
print(b"a".count(b"b"))
6+
print(b"b".count(b"a"))
7+
8+
print(b"aaa".count(b""))
9+
print(b"aaa".count(b"a"))
10+
print(b"aaa".count(b"aa"))
11+
print(b"aaa".count(b"aaa"))
12+
print(b"aaa".count(b"aaaa"))
13+
14+
print(b"aaaa".count(b""))
15+
print(b"aaaa".count(b"a"))
16+
print(b"aaaa".count(b"aa"))
17+
print(b"aaaa".count(b"aaa"))
18+
print(b"aaaa".count(b"aaaa"))
19+
print(b"aaaa".count(b"aaaaa"))
20+
21+
print(b"aaa".count(b"", 1))
22+
print(b"aaa".count(b"", 2))
23+
print(b"aaa".count(b"", 3))
24+
25+
print(b"aaa".count(b"", 1, 2))
26+
27+
print(b"asdfasdfaaa".count(b"asdf", -100))
28+
print(b"asdfasdfaaa".count(b"asdf", -8))
29+
print(b"asdf".count(b's', True))
30+
print(b"asdf".count(b'a', True))
31+
print(b"asdf".count(b'a', False))
32+
print(b"asdf".count(b'a', 1 == 2))
33+
print(b"hello world".count(b'l'))
34+
print(b"hello world".count(b'l', 5))
35+
print(b"hello world".count(b'l', 3))
36+
print(b"hello world".count(b'z', 3, 6))
37+
print(b"aaaa".count(b'a'))
38+
print(b"aaaa".count(b'a', 0, 3))
39+
print(b"aaaa".count(b'a', 0, 4))
40+
print(b"aaaa".count(b'a', 0, 5))
41+
print(b"aaaa".count(b'a', 1, 5))
42+
print(b"aaaa".count(b'a', -1, 5))
43+
print(b"abbabba".count(b"abba"))
44+
45+
def t():
46+
return True
47+
48+
print(b"0000".count(b'0', t()))

tests/basics/bytes_find.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
print(b"hello world".find(b"ll"))
2+
print(b"hello world".find(b"ll", None))
3+
print(b"hello world".find(b"ll", 1))
4+
print(b"hello world".find(b"ll", 1, None))
5+
print(b"hello world".find(b"ll", None, None))
6+
print(b"hello world".find(b"ll", 1, -1))
7+
print(b"hello world".find(b"ll", 1, 1))
8+
print(b"hello world".find(b"ll", 1, 2))
9+
print(b"hello world".find(b"ll", 1, 3))
10+
print(b"hello world".find(b"ll", 1, 4))
11+
print(b"hello world".find(b"ll", 1, 5))
12+
print(b"hello world".find(b"ll", -100))
13+
print(b"0000".find(b'0'))
14+
print(b"0000".find(b'0', 0))
15+
print(b"0000".find(b'0', 1))
16+
print(b"0000".find(b'0', 2))
17+
print(b"0000".find(b'0', 3))
18+
print(b"0000".find(b'0', 4))
19+
print(b"0000".find(b'0', 5))
20+
print(b"0000".find(b'-1', 3))
21+
print(b"0000".find(b'1', 3))
22+
print(b"0000".find(b'1', 4))
23+
print(b"0000".find(b'1', 5))

tests/basics/bytes_partition.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
print(b"asdf".partition(b'g'))
2+
print(b"asdf".partition(b'a'))
3+
print(b"asdf".partition(b's'))
4+
print(b"asdf".partition(b'f'))
5+
print(b"asdf".partition(b'd'))
6+
print(b"asdf".partition(b'asd'))
7+
print(b"asdf".partition(b'sdf'))
8+
print(b"asdf".partition(b'as'))
9+
print(b"asdf".partition(b'df'))
10+
print(b"asdf".partition(b'asdf'))
11+
print(b"asdf".partition(b'asdfa'))
12+
print(b"asdf".partition(b'fasdf'))
13+
print(b"asdf".partition(b'fasdfa'))
14+
print(b"abba".partition(b'a'))
15+
print(b"abba".partition(b'b'))
16+
17+
try:
18+
print(b"asdf".partition(1))
19+
except TypeError:
20+
print("Raised TypeError")
21+
else:
22+
print("Did not raise TypeError")
23+
24+
try:
25+
print(b"asdf".partition(b''))
26+
except ValueError:
27+
print("Raised ValueError")
28+
else:
29+
print("Did not raise ValueError")

tests/basics/bytes_replace.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
print(b"".replace(b"a", b"b"))
2+
print(b"aaa".replace(b"a", b"b", 0))
3+
print(b"aaa".replace(b"a", b"b", -5))
4+
print(b"asdfasdf".replace(b"a", b"b"))
5+
print(b"aabbaabbaabbaa".replace(b"aa", b"cc", 3))
6+
print(b"a".replace(b"aa", b"bb"))
7+
print(b"testingtesting".replace(b"ing", b""))
8+
print(b"testINGtesting".replace(b"ing", b"ING!"))
9+
10+
print(b"".replace(b"", b"1"))
11+
print(b"A".replace(b"", b"1"))
12+
print(b"AB".replace(b"", b"1"))
13+
print(b"AB".replace(b"", b"12"))

tests/basics/bytes_split.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# default separator (whitespace)
2+
print(b"a b".split())
3+
print(b" a b ".split(None))
4+
print(b" a b ".split(None, 1))
5+
print(b" a b ".split(None, 2))
6+
print(b" a b c ".split(None, 1))
7+
print(b" a b c ".split(None, 0))
8+
print(b" a b c ".split(None, -1))
9+
10+
# empty separator should fail
11+
try:
12+
b"abc".split(b'')
13+
except ValueError:
14+
print("ValueError")
15+
16+
# non-empty separator
17+
print(b"abc".split(b"a"))
18+
print(b"abc".split(b"b"))
19+
print(b"abc".split(b"c"))
20+
print(b"abc".split(b"z"))
21+
print(b"abc".split(b"ab"))
22+
print(b"abc".split(b"bc"))
23+
print(b"abc".split(b"abc"))
24+
print(b"abc".split(b"abcd"))
25+
print(b"abcabc".split(b"bc"))
26+
print(b"abcabc".split(b"bc", 0))
27+
print(b"abcabc".split(b"bc", 1))
28+
print(b"abcabc".split(b"bc", 2))

tests/basics/bytes_strip.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
print(b"".strip())
2+
print(b" \t\n\r\v\f".strip())
3+
print(b" T E S T".strip())
4+
print(b"abcabc".strip(b"ce"))
5+
print(b"aaa".strip(b"b"))
6+
print(b"abc efg ".strip(b"g a"))
7+
8+
print(b' spacious '.lstrip())
9+
print(b'www.example.com'.lstrip(b'cmowz.'))
10+
11+
print(b' spacious '.rstrip())
12+
print(b'mississippi'.rstrip(b'ipz'))
13+
14+
# Test that stripping unstrippable string returns original object
15+
s = b"abc"
16+
print(id(s.strip()) == id(s))

0 commit comments

Comments
 (0)