@@ -41,6 +41,13 @@ STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_bu
4141/******************************************************************************/
4242/* str */
4343
44+
45+ // These settings approximate CPython's printability. It is not
46+ // exhaustive and may print "unprintable" characters. All ASCII control codes
47+ // are escaped along with variable space widths and paragraph designators.
48+ // Unlike CPython, we do not escape private use codes or reserved characters.
49+ // We assume that the unicode is well formed.
50+ // CPython policy is documented here: https://github.com/python/cpython/blob/bb3e0c240bc60fe08d332ff5955d54197f79751c/Objects/unicodectype.c#L147-L159
4451STATIC void uni_print_quoted (const mp_print_t * print , const byte * str_data , uint str_len ) {
4552 // this escapes characters, but it will be very slow to print (calling print many times)
4653 bool has_single_quote = false;
@@ -61,25 +68,26 @@ STATIC void uni_print_quoted(const mp_print_t *print, const byte *str_data, uint
6168 while (s < top ) {
6269 unichar ch ;
6370 ch = utf8_get_char (s );
71+ const byte * start = s ;
6472 s = utf8_next_char (s );
6573 if (ch == quote_char ) {
6674 mp_printf (print , "\\%c" , quote_char );
6775 } else if (ch == '\\' ) {
6876 mp_print_str (print , "\\\\" );
69- } else if (32 <= ch && ch <= 126 ) {
70- mp_printf (print , "%c" , ch );
7177 } else if (ch == '\n' ) {
7278 mp_print_str (print , "\\n" );
7379 } else if (ch == '\r' ) {
7480 mp_print_str (print , "\\r" );
7581 } else if (ch == '\t' ) {
7682 mp_print_str (print , "\\t" );
77- } else if (ch < 0x100 ) {
83+ } else if (ch <= 0x1f || ( 0x7f <= ch && ch <= 0xa0 ) || ch == 0xad ) {
7884 mp_printf (print , "\\x%02x" , ch );
79- } else if (ch < 0x10000 ) {
85+ } else if (( 0x2000 <= ch && ch <= 0x200f ) || ch == 0x2028 || ch == 0x2029 ) {
8086 mp_printf (print , "\\u%04x" , ch );
8187 } else {
82- mp_printf (print , "\\U%08x" , ch );
88+ // Print the full character out.
89+ int width = s - start ;
90+ mp_print_strn (print , (const char * )start , width , 0 , ' ' , width );
8391 }
8492 }
8593 mp_printf (print , "%c" , quote_char );
0 commit comments