Skip to content

Commit 16677ce

Browse files
committed
py: Be more precise about unicode type and disabled unicode behaviour.
1 parent 0ecd598 commit 16677ce

File tree

3 files changed

+20
-4
lines changed

3 files changed

+20
-4
lines changed

py/lexer.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -492,11 +492,19 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
492492
}
493493
}
494494
if (c != MP_LEXER_EOF) {
495+
#if MICROPY_PY_BUILTINS_STR_UNICODE
495496
if (c < 0x110000 && !is_bytes) {
496497
vstr_add_char(&lex->vstr, c);
497498
} else if (c < 0x100 && is_bytes) {
498499
vstr_add_byte(&lex->vstr, c);
499-
} else {
500+
}
501+
#else
502+
// without unicode everything is just added as an 8-bit byte
503+
if (c < 0x100) {
504+
vstr_add_byte(&lex->vstr, c);
505+
}
506+
#endif
507+
else {
500508
assert(!"TODO: Throw an error, invalid escape code probably");
501509
}
502510
}

py/misc.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,15 @@ size_t m_get_peak_bytes_allocated(void);
9292

9393
/** unichar / UTF-8 *********************************************/
9494

95-
typedef int unichar; // TODO
95+
#if MICROPY_PY_BUILTINS_STR_UNICODE
96+
#include <stdint.h> // only include if we need it
97+
// with unicode enabled we need a type which can fit chars up to 0x10ffff
98+
typedef uint32_t unichar;
99+
#else
100+
// without unicode enabled we can only need to fit chars up to 0xff
101+
// (on 16-bit archs uint is 16-bits and more efficient than uint32_t)
102+
typedef uint unichar;
103+
#endif
96104

97105
unichar utf8_get_char(const byte *s);
98106
const byte *utf8_next_char(const byte *s);

py/modbuiltins.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -182,11 +182,11 @@ STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
182182
return mp_obj_new_str(str, len, true);
183183
#else
184184
mp_int_t ord = mp_obj_get_int(o_in);
185-
if (0 <= ord && ord <= 0x10ffff) {
185+
if (0 <= ord && ord <= 0xff) {
186186
char str[1] = {ord};
187187
return mp_obj_new_str(str, 1, true);
188188
} else {
189-
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(0x110000)"));
189+
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "chr() arg not in range(256)"));
190190
}
191191
#endif
192192
}

0 commit comments

Comments
 (0)