Skip to content

Commit b32db4e

Browse files
committed
Merge branch 'master' of github.com:micropython/micropython
2 parents 8913c04 + 093b8a5 commit b32db4e

5 files changed

Lines changed: 164 additions & 33 deletions

File tree

py/builtin.c

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -375,28 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k
375375

376376
MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
377377

378-
STATIC mp_obj_t mp_builtin_str(mp_obj_t o_in) {
379-
vstr_t *vstr = vstr_new();
380-
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR);
381-
mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
382-
vstr_free(vstr);
383-
return s;
384-
}
385-
386-
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str);
387-
388-
// TODO: This should be type, this is just quick CPython compat hack
389-
STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) {
390-
if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) {
391-
assert(0);
392-
}
393-
// Currently, MicroPython strings are mix between CPython byte and unicode
394-
// strings. So, conversion is null so far.
395-
return args[0];
396-
}
397-
398-
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_bytes_obj, 1, 3, mp_builtin_bytes);
399-
400378
STATIC mp_obj_t mp_builtin_id(mp_obj_t o_in) {
401379
return mp_obj_new_int((machine_int_t)o_in);
402380
}

py/objstr.c

Lines changed: 127 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ typedef struct _mp_obj_str_t {
1414
mp_obj_base_t base;
1515
machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
1616
machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
17-
byte data[];
17+
const byte *data;
1818
} mp_obj_str_t;
1919

20+
const mp_obj_t mp_const_empty_bytes;
21+
2022
// use this macro to extract the string hash
2123
#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
2224

@@ -28,6 +30,7 @@ typedef struct _mp_obj_str_t {
2830

2931
STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
3032
STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
33+
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
3134

3235
/******************************************************************************/
3336
/* str */
@@ -78,6 +81,109 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env,
7881
}
7982
}
8083

84+
STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
85+
switch (n_args) {
86+
case 0:
87+
return MP_OBJ_NEW_QSTR(MP_QSTR_);
88+
89+
case 1:
90+
{
91+
vstr_t *vstr = vstr_new();
92+
mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
93+
mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
94+
vstr_free(vstr);
95+
return s;
96+
}
97+
98+
case 2:
99+
case 3:
100+
{
101+
// TODO: validate 2nd/3rd args
102+
if (!MP_OBJ_IS_TYPE(args[0], &bytes_type)) {
103+
nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
104+
}
105+
GET_STR_DATA_LEN(args[0], str_data, str_len);
106+
GET_STR_HASH(args[0], str_hash);
107+
mp_obj_str_t *o = str_new(&str_type, NULL, str_len);
108+
o->data = str_data;
109+
o->hash = str_hash;
110+
return o;
111+
}
112+
113+
default:
114+
nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
115+
}
116+
}
117+
118+
STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
119+
if (n_args == 0) {
120+
return mp_const_empty_bytes;
121+
}
122+
123+
if (MP_OBJ_IS_STR(args[0])) {
124+
if (n_args < 2 || n_args > 3) {
125+
goto wrong_args;
126+
}
127+
GET_STR_DATA_LEN(args[0], str_data, str_len);
128+
GET_STR_HASH(args[0], str_hash);
129+
mp_obj_str_t *o = str_new(&bytes_type, NULL, str_len);
130+
o->data = str_data;
131+
o->hash = str_hash;
132+
return o;
133+
}
134+
135+
if (n_args > 1) {
136+
goto wrong_args;
137+
}
138+
139+
if (MP_OBJ_IS_SMALL_INT(args[0])) {
140+
uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
141+
byte *data;
142+
143+
mp_obj_t o = mp_obj_str_builder_start(&bytes_type, len, &data);
144+
memset(data, 0, len);
145+
return mp_obj_str_builder_end(o);
146+
}
147+
148+
int len;
149+
byte *data;
150+
vstr_t *vstr = NULL;
151+
mp_obj_t o = NULL;
152+
// Try to create array of exact len if initializer len is known
153+
mp_obj_t len_in = mp_obj_len_maybe(args[0]);
154+
if (len_in == MP_OBJ_NULL) {
155+
len = -1;
156+
vstr = vstr_new();
157+
} else {
158+
len = MP_OBJ_SMALL_INT_VALUE(len_in);
159+
o = mp_obj_str_builder_start(&bytes_type, len, &data);
160+
}
161+
162+
mp_obj_t iterable = rt_getiter(args[0]);
163+
mp_obj_t item;
164+
while ((item = rt_iternext(iterable)) != mp_const_stop_iteration) {
165+
if (len == -1) {
166+
vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
167+
} else {
168+
*data++ = MP_OBJ_SMALL_INT_VALUE(item);
169+
}
170+
}
171+
172+
if (len == -1) {
173+
vstr_shrink(vstr);
174+
// TODO: Optimize, borrow buffer from vstr
175+
len = vstr_len(vstr);
176+
o = mp_obj_str_builder_start(&bytes_type, len, &data);
177+
memcpy(data, vstr_str(vstr), len);
178+
vstr_free(vstr);
179+
}
180+
181+
return mp_obj_str_builder_end(o);
182+
183+
wrong_args:
184+
nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
185+
}
186+
81187
// like strstr but with specified length and allows \0 bytes
82188
// TODO replace with something more efficient/standard
83189
STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
@@ -619,6 +725,7 @@ const mp_obj_type_t str_type = {
619725
{ &mp_type_type },
620726
.name = MP_QSTR_str,
621727
.print = str_print,
728+
.make_new = str_make_new,
622729
.binary_op = str_binary_op,
623730
.getiter = mp_obj_new_str_iterator,
624731
.methods = str_type_methods,
@@ -630,34 +737,45 @@ const mp_obj_type_t bytes_type = {
630737
{ &mp_type_type },
631738
.name = MP_QSTR_bytes,
632739
.print = str_print,
740+
.make_new = bytes_make_new,
633741
.binary_op = str_binary_op,
634742
.getiter = mp_obj_new_bytes_iterator,
635743
.methods = str_type_methods,
636744
};
637745

746+
// the zero-length bytes
747+
STATIC const mp_obj_str_t empty_bytes_obj = {{&bytes_type}, 0, 0, NULL};
748+
const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
749+
638750
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
639-
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
751+
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
640752
o->base.type = type;
641753
o->len = len;
642-
*data = o->data;
754+
byte *p = m_new(byte, len + 1);
755+
o->data = p;
756+
*data = p;
643757
return o;
644758
}
645759

646760
mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
647-
assert(MP_OBJ_IS_STR(o_in));
648761
mp_obj_str_t *o = o_in;
649762
o->hash = qstr_compute_hash(o->data, o->len);
650-
o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
763+
byte *p = (byte*)o->data;
764+
p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
651765
return o;
652766
}
653767

654768
STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
655-
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
769+
mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
656770
o->base.type = type;
657-
o->hash = qstr_compute_hash(data, len);
658771
o->len = len;
659-
memcpy(o->data, data, len * sizeof(byte));
660-
o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
772+
if (data) {
773+
o->hash = qstr_compute_hash(data, len);
774+
byte *p = m_new(byte, len + 1);
775+
o->data = p;
776+
memcpy(p, data, len * sizeof(byte));
777+
p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
778+
}
661779
return o;
662780
}
663781

py/runtime.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
8989

9090
// built-in types
9191
{ MP_QSTR_bool, (mp_obj_t)&bool_type },
92+
{ MP_QSTR_bytes, (mp_obj_t)&bytes_type },
9293
#if MICROPY_ENABLE_FLOAT
9394
{ MP_QSTR_complex, (mp_obj_t)&mp_type_complex },
9495
#endif
@@ -102,6 +103,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
102103
{ MP_QSTR_list, (mp_obj_t)&list_type },
103104
{ MP_QSTR_map, (mp_obj_t)&map_type },
104105
{ MP_QSTR_set, (mp_obj_t)&set_type },
106+
{ MP_QSTR_str, (mp_obj_t)&str_type },
105107
{ MP_QSTR_super, (mp_obj_t)&super_type },
106108
{ MP_QSTR_tuple, (mp_obj_t)&tuple_type },
107109
{ MP_QSTR_type, (mp_obj_t)&mp_type_type },
@@ -114,7 +116,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
114116
{ MP_QSTR_abs, (mp_obj_t)&mp_builtin_abs_obj },
115117
{ MP_QSTR_all, (mp_obj_t)&mp_builtin_all_obj },
116118
{ MP_QSTR_any, (mp_obj_t)&mp_builtin_any_obj },
117-
{ MP_QSTR_bytes, (mp_obj_t)&mp_builtin_bytes_obj },
118119
{ MP_QSTR_callable, (mp_obj_t)&mp_builtin_callable_obj },
119120
{ MP_QSTR_chr, (mp_obj_t)&mp_builtin_chr_obj },
120121
{ MP_QSTR_dir, (mp_obj_t)&mp_builtin_dir_obj },
@@ -137,7 +138,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
137138
{ MP_QSTR_repr, (mp_obj_t)&mp_builtin_repr_obj },
138139
{ MP_QSTR_sorted, (mp_obj_t)&mp_builtin_sorted_obj },
139140
{ MP_QSTR_sum, (mp_obj_t)&mp_builtin_sum_obj },
140-
{ MP_QSTR_str, (mp_obj_t)&mp_builtin_str_obj },
141141
{ MP_QSTR_bytearray, (mp_obj_t)&mp_builtin_bytearray_obj },
142142

143143
// built-in exceptions

tests/basics/bytes.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,36 @@
44
print(repr(a))
55
print(a[0], a[2])
66
print(a[-1])
7+
print(str(a, "utf-8"))
8+
print(str(a, "utf-8", "ignore"))
9+
try:
10+
str(a, "utf-8", "ignore", "toomuch")
11+
except TypeError:
12+
print("TypeError")
713

814
s = 0
915
for i in a:
1016
s += i
1117
print(s)
18+
19+
20+
print(bytes("abc", "utf-8"))
21+
print(bytes("abc", "utf-8", "replace"))
22+
try:
23+
bytes("abc")
24+
except TypeError:
25+
print("TypeError")
26+
try:
27+
bytes("abc", "utf-8", "replace", "toomuch")
28+
except TypeError:
29+
print("TypeError")
30+
31+
print(bytes(3))
32+
33+
print(bytes([3, 2, 1]))
34+
print(bytes(range(5)))
35+
36+
def gen():
37+
for i in range(4):
38+
yield i
39+
print(bytes(gen()))

tests/basics/int-long.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,10 @@
3737
print(a)
3838
a >>= 1
3939
print(a)
40+
41+
# Test referential integrity of long ints
42+
a = 0x1ffffffff
43+
b = a
44+
a += 1
45+
print(a)
46+
print(b)

0 commit comments

Comments
 (0)