Skip to content

Commit 2ec38a1

Browse files
committed
objstr: Be 8-bit clean even for repr().
This will allow roughly the same behavior as Python3 for non-ASCII strings, for example, print("<phrase in non-Latin script>".split()) will print list of words, not weird hex dump (like Python2 behaves). (Of course, that it will print list of words, if there're "words" in that phrase at all, separated by ASCII-compatible whitespace; that surely won't apply to every human language in existence).
1 parent e9036c2 commit 2ec38a1

4 files changed

Lines changed: 11 additions & 6 deletions

File tree

py/obj.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway conve
469469
const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
470470
const char *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
471471
mp_obj_t mp_obj_str_intern(mp_obj_t str);
472-
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len);
472+
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len, bool is_bytes);
473473

474474
#if MICROPY_PY_BUILTINS_FLOAT
475475
// float

py/objarray.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ STATIC void array_print(void (*print)(void *env, const char *fmt, ...), void *en
5858
mp_obj_array_t *o = o_in;
5959
if (o->typecode == BYTEARRAY_TYPECODE) {
6060
print(env, "bytearray(b", o->typecode);
61-
mp_str_print_quoted(print, env, o->items, o->len);
61+
mp_str_print_quoted(print, env, o->items, o->len, true);
6262
} else {
6363
print(env, "array('%c'", o->typecode);
6464
if (o->len > 0) {

py/objstr.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ STATIC bool is_str_or_bytes(mp_obj_t o) {
6464
/******************************************************************************/
6565
/* str */
6666

67-
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env, const byte *str_data, uint str_len) {
67+
void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *env,
68+
const byte *str_data, uint str_len, bool is_bytes) {
6869
// this escapes characters, but it will be very slow to print (calling print many times)
6970
bool has_single_quote = false;
7071
bool has_double_quote = false;
@@ -85,7 +86,10 @@ void mp_str_print_quoted(void (*print)(void *env, const char *fmt, ...), void *e
8586
print(env, "\\%c", quote_char);
8687
} else if (*s == '\\') {
8788
print(env, "\\\\");
88-
} else if (32 <= *s && *s <= 126) {
89+
} else if (*s >= 0x20 && *s != 0x7f && (!is_bytes || *s < 0x80)) {
90+
// In strings, anything which is not ascii control character
91+
// is printed as is, this includes characters in range 0x80-0xff
92+
// (which can be non-Latin letters, etc.)
8993
print(env, "%c", *s);
9094
} else if (*s == '\n') {
9195
print(env, "\\n");
@@ -109,7 +113,7 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env,
109113
if (is_bytes) {
110114
print(env, "b");
111115
}
112-
mp_str_print_quoted(print, env, str_data, str_len);
116+
mp_str_print_quoted(print, env, str_data, str_len, is_bytes);
113117
}
114118
}
115119

tests/basics/string-repr.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
# anything above 0xa0 is printed as Unicode by CPython
2-
for c in range(0xa1):
2+
# the abobe is CPython implementation detail, stick to ASCII
3+
for c in range(0x80):
34
print("0x%02x: %s" % (c, repr(chr(c))))

0 commit comments

Comments
 (0)