Skip to content

Commit 91fb1c9

Browse files
committed
Add basic implementation of bytes type, piggybacking on str.
This reuses as much str implementation as possible, from this we can make them more separate as needed.
1 parent 2b2cb7b commit 91fb1c9

6 files changed

Lines changed: 96 additions & 24 deletions

File tree

py/obj.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ mp_obj_t mp_obj_new_int(machine_int_t value);
208208
mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value);
209209
mp_obj_t mp_obj_new_int_from_long_str(const char *s);
210210
mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already);
211+
mp_obj_t mp_obj_new_bytes(const byte* data, uint len);
211212
#if MICROPY_ENABLE_FLOAT
212213
mp_obj_t mp_obj_new_float(mp_float_t val);
213214
mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag);
@@ -280,14 +281,17 @@ void mp_obj_exception_get_traceback(mp_obj_t self_in, machine_uint_t *n, machine
280281

281282
// str
282283
extern const mp_obj_type_t str_type;
283-
mp_obj_t mp_obj_str_builder_start(uint len, byte **data);
284+
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data);
284285
mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in);
285286
bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2);
286287
uint mp_obj_str_get_hash(mp_obj_t self_in);
287288
uint mp_obj_str_get_len(mp_obj_t self_in);
288289
const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
289290
const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
290291

292+
// bytes
293+
extern const mp_obj_type_t bytes_type;
294+
291295
#if MICROPY_ENABLE_FLOAT
292296
// float
293297
extern const mp_obj_type_t float_type;

py/objstr.c

Lines changed: 71 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,21 @@ typedef struct _mp_obj_str_t {
2828
// use this macro to extract the string data and length
2929
#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
3030

31-
static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur);
31+
static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
32+
static mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
3233

3334
/******************************************************************************/
3435
/* str */
3536

3637
void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
3738
GET_STR_DATA_LEN(self_in, str_data, str_len);
38-
if (kind == PRINT_STR) {
39+
bool is_bytes = MP_OBJ_IS_TYPE(self_in, &bytes_type);
40+
if (kind == PRINT_STR && !is_bytes) {
3941
print(env, "%.*s", str_len, str_data);
4042
} else {
43+
if (is_bytes) {
44+
print(env, "b");
45+
}
4146
// TODO need to escape chars etc
4247
print(env, "'%.*s'", str_len, str_data);
4348
}
@@ -71,7 +76,11 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
7176
// ["no", "yes"][1 == 2] is common idiom
7277
if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
7378
uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in);
74-
return mp_obj_new_str(lhs_data + index, 1, true);
79+
if (MP_OBJ_IS_TYPE(lhs_in, &bytes_type)) {
80+
return MP_OBJ_NEW_SMALL_INT(lhs_data[index]);
81+
} else {
82+
return mp_obj_new_str(lhs_data + index, 1, true);
83+
}
7584
#if MICROPY_ENABLE_SLICE
7685
} else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
7786
machine_int_t start, stop, step;
@@ -120,7 +129,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
120129

121130
// code for non-qstr
122131
byte *data;
123-
mp_obj_t s = mp_obj_str_builder_start(alloc_len, &data);
132+
mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), alloc_len, &data);
124133
memcpy(data, lhs_data, lhs_len);
125134
memcpy(data + lhs_len, rhs_data, rhs_len);
126135
return mp_obj_str_builder_end(s);
@@ -143,7 +152,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
143152
}
144153
int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
145154
byte *data;
146-
mp_obj_t s = mp_obj_str_builder_start(lhs_len * n, &data);
155+
mp_obj_t s = mp_obj_str_builder_start(mp_obj_get_type(lhs_in), lhs_len * n, &data);
147156
mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
148157
return mp_obj_str_builder_end(s);
149158
}
@@ -152,10 +161,6 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
152161
return MP_OBJ_NULL; // op not supported
153162
}
154163

155-
static mp_obj_t str_getiter(mp_obj_t o_in) {
156-
return mp_obj_new_str_iterator(o_in, 0);
157-
}
158-
159164
mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
160165
assert(MP_OBJ_IS_STR(self_in));
161166

@@ -188,7 +193,7 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
188193

189194
// make joined string
190195
byte *data;
191-
mp_obj_t joined_str = mp_obj_str_builder_start(required_len, &data);
196+
mp_obj_t joined_str = mp_obj_str_builder_start(mp_obj_get_type(self_in), required_len, &data);
192197
for (int i = 0; i < seq_len; i++) {
193198
if (i > 0) {
194199
memcpy(data, sep_str, sep_len);
@@ -393,13 +398,23 @@ const mp_obj_type_t str_type = {
393398
"str",
394399
.print = str_print,
395400
.binary_op = str_binary_op,
396-
.getiter = str_getiter,
401+
.getiter = mp_obj_new_str_iterator,
402+
.methods = str_type_methods,
403+
};
404+
405+
// Reuses most of methods from str
406+
const mp_obj_type_t bytes_type = {
407+
{ &mp_const_type },
408+
"bytes",
409+
.print = str_print,
410+
.binary_op = str_binary_op,
411+
.getiter = mp_obj_new_bytes_iterator,
397412
.methods = str_type_methods,
398413
};
399414

400-
mp_obj_t mp_obj_str_builder_start(uint len, byte **data) {
415+
mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
401416
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
402-
o->base.type = &str_type;
417+
o->base.type = type;
403418
o->len = len;
404419
*data = o->data;
405420
return o;
@@ -413,6 +428,16 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
413428
return o;
414429
}
415430

431+
static mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
432+
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
433+
o->base.type = type;
434+
o->hash = qstr_compute_hash(data, len);
435+
o->len = len;
436+
memcpy(o->data, data, len * sizeof(byte));
437+
o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
438+
return o;
439+
}
440+
416441
mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already) {
417442
qstr q = qstr_find_strn(data, len);
418443
if (q != MP_QSTR_NULL) {
@@ -423,16 +448,14 @@ mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_alread
423448
return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len));
424449
} else {
425450
// no existing qstr, don't make one
426-
mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
427-
o->base.type = &str_type;
428-
o->hash = qstr_compute_hash(data, len);
429-
o->len = len;
430-
memcpy(o->data, data, len * sizeof(byte));
431-
o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
432-
return o;
451+
return str_new(&str_type, data, len);
433452
}
434453
}
435454

455+
mp_obj_t mp_obj_new_bytes(const byte* data, uint len) {
456+
return str_new(&bytes_type, data, len);
457+
}
458+
436459
bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
437460
if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) {
438461
return s1 == s2;
@@ -522,10 +545,36 @@ static const mp_obj_type_t str_it_type = {
522545
.iternext = str_it_iternext,
523546
};
524547

525-
mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur) {
548+
mp_obj_t bytes_it_iternext(mp_obj_t self_in) {
549+
mp_obj_str_it_t *self = self_in;
550+
GET_STR_DATA_LEN(self->str, str, len);
551+
if (self->cur < len) {
552+
mp_obj_t o_out = MP_OBJ_NEW_SMALL_INT(str[self->cur]);
553+
self->cur += 1;
554+
return o_out;
555+
} else {
556+
return mp_const_stop_iteration;
557+
}
558+
}
559+
560+
static const mp_obj_type_t bytes_it_type = {
561+
{ &mp_const_type },
562+
"bytes_iterator",
563+
.iternext = bytes_it_iternext,
564+
};
565+
566+
mp_obj_t mp_obj_new_str_iterator(mp_obj_t str) {
526567
mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
527568
o->base.type = &str_it_type;
528569
o->str = str;
529-
o->cur = cur;
570+
o->cur = 0;
571+
return o;
572+
}
573+
574+
mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str) {
575+
mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
576+
o->base.type = &bytes_it_type;
577+
o->str = str;
578+
o->cur = 0;
530579
return o;
531580
}

py/runtime.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,13 @@ mp_obj_t rt_load_const_str(qstr qstr) {
408408
return MP_OBJ_NEW_QSTR(qstr);
409409
}
410410

411+
mp_obj_t rt_load_const_bytes(qstr qstr) {
412+
DEBUG_OP_printf("load b'%s'\n", qstr_str(qstr));
413+
uint len;
414+
const byte *data = qstr_data(qstr, &len);
415+
return mp_obj_new_bytes(data, len);
416+
}
417+
411418
mp_obj_t rt_load_name(qstr qstr) {
412419
// logic: search locals, globals, builtins
413420
DEBUG_OP_printf("load name %s\n", qstr_str(qstr));

py/runtime.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ int rt_is_true(mp_obj_t arg);
22

33
mp_obj_t rt_load_const_dec(qstr qstr);
44
mp_obj_t rt_load_const_str(qstr qstr);
5+
mp_obj_t rt_load_const_bytes(qstr qstr);
56
mp_obj_t rt_load_name(qstr qstr);
67
mp_obj_t rt_load_global(qstr qstr);
78
mp_obj_t rt_load_build_class(void);

py/vm.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
138138

139139
case MP_BC_LOAD_CONST_BYTES:
140140
DECODE_QSTR;
141-
PUSH(rt_load_const_str(qst)); // TODO
141+
PUSH(rt_load_const_bytes(qst));
142142
break;
143143

144144
case MP_BC_LOAD_CONST_STRING:

tests/basics/bytes.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
a = b"123"
2+
print(a)
3+
print(str(a))
4+
print(repr(a))
5+
print(a[0], a[2])
6+
print(a[-1])
7+
8+
s = 0
9+
for i in a:
10+
s += i
11+
print(s)

0 commit comments

Comments
 (0)