@@ -62,6 +62,12 @@ STATIC bool is_char_or3(mp_lexer_t *lex, byte c1, byte c2, byte c3) {
6262 return lex -> chr0 == c1 || lex -> chr0 == c2 || lex -> chr0 == c3 ;
6363}
6464
65+ #if MICROPY_PY_FSTRINGS
66+ STATIC bool is_char_or4 (mp_lexer_t * lex , byte c1 , byte c2 , byte c3 , byte c4 ) {
67+ return lex -> chr0 == c1 || lex -> chr0 == c2 || lex -> chr0 == c3 || lex -> chr0 == c4 ;
68+ }
69+ #endif
70+
6571STATIC bool is_char_following (mp_lexer_t * lex , byte c ) {
6672 return lex -> chr1 == c ;
6773}
@@ -105,7 +111,13 @@ STATIC bool is_following_odigit(mp_lexer_t *lex) {
105111
106112STATIC bool is_string_or_bytes (mp_lexer_t * lex ) {
107113 return is_char_or (lex , '\'' , '\"' )
114+ #if MICROPY_PY_FSTRINGS
115+ || (is_char_or4 (lex , 'r' , 'u' , 'b' , 'f' ) && is_char_following_or (lex , '\'' , '\"' ))
116+ || (((is_char_and (lex , 'r' , 'f' ) || is_char_and (lex , 'f' , 'r' ))
117+ && is_char_following_following_or (lex , '\'' , '\"' )))
118+ #else
108119 || (is_char_or3 (lex , 'r' , 'u' , 'b' ) && is_char_following_or (lex , '\'' , '\"' ))
120+ #endif
109121 || ((is_char_and (lex , 'r' , 'b' ) || is_char_and (lex , 'b' , 'r' ))
110122 && is_char_following_following_or (lex , '\'' , '\"' ));
111123}
@@ -132,9 +144,35 @@ STATIC void next_char(mp_lexer_t *lex) {
132144 ++ lex -> column ;
133145 }
134146
147+ // shift the input queue forward
135148 lex -> chr0 = lex -> chr1 ;
136149 lex -> chr1 = lex -> chr2 ;
137- lex -> chr2 = lex -> reader .readbyte (lex -> reader .data );
150+
151+ // and add the next byte from either the fstring args or the reader
152+ #if MICROPY_PY_FSTRINGS
153+ if (lex -> fstring_args_idx ) {
154+ // if there are saved chars, then we're currently injecting fstring args
155+ if (lex -> fstring_args_idx < lex -> fstring_args .len ) {
156+ lex -> chr2 = lex -> fstring_args .buf [lex -> fstring_args_idx ++ ];
157+ } else {
158+ // no more fstring arg bytes
159+ lex -> chr2 = '\0' ;
160+ }
161+
162+ if (lex -> chr0 == '\0' ) {
163+ // consumed all fstring data, restore saved input queue
164+ lex -> chr0 = lex -> chr0_saved ;
165+ lex -> chr1 = lex -> chr1_saved ;
166+ lex -> chr2 = lex -> chr2_saved ;
167+ // stop consuming fstring arg data
168+ vstr_reset (& lex -> fstring_args );
169+ lex -> fstring_args_idx = 0 ;
170+ }
171+ } else
172+ #endif
173+ {
174+ lex -> chr2 = lex -> reader .readbyte (lex -> reader .data );
175+ }
138176
139177 if (lex -> chr1 == '\r' ) {
140178 // CR is a new line, converted to LF
@@ -272,7 +310,7 @@ STATIC bool get_hex(mp_lexer_t *lex, size_t num_digits, mp_uint_t *result) {
272310 return true;
273311}
274312
275- STATIC void parse_string_literal (mp_lexer_t * lex , bool is_raw ) {
313+ STATIC void parse_string_literal (mp_lexer_t * lex , bool is_raw , bool is_fstring ) {
276314 // get first quoting character
277315 char quote_char = '\'' ;
278316 if (is_char (lex , '\"' )) {
@@ -293,12 +331,57 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw) {
293331 }
294332
295333 size_t n_closing = 0 ;
334+ #if MICROPY_PY_FSTRINGS
335+ if (is_fstring ) {
336+ // assume there's going to be interpolation, so prep the injection data
337+ // fstring_args_idx==0 && len(fstring_args)>0 means we're extracting the args.
338+ // only when fstring_args_idx>0 will we consume the arg data
339+ // note: lex->fstring_args will be empty already (it's reset when finished)
340+ vstr_add_str (& lex -> fstring_args , ".format(" );
341+ }
342+ #endif
343+
296344 while (!is_end (lex ) && (num_quotes > 1 || !is_char (lex , '\n' )) && n_closing < num_quotes ) {
297345 if (is_char (lex , quote_char )) {
298346 n_closing += 1 ;
299347 vstr_add_char (& lex -> vstr , CUR_CHAR (lex ));
300348 } else {
301349 n_closing = 0 ;
350+
351+ #if MICROPY_PY_FSTRINGS
352+ while (is_fstring && is_char (lex , '{' )) {
353+ next_char (lex );
354+ if (is_char (lex , '{' )) {
355+ // "{{" is passed through unchanged to be handled by str.format
356+ vstr_add_byte (& lex -> vstr , '{' );
357+ next_char (lex );
358+ } else {
359+ // remember the start of this argument (if we need it for f'{a=}').
360+ size_t i = lex -> fstring_args .len ;
361+ // extract characters inside the { until we reach the
362+ // format specifier or closing }.
363+ // (MicroPython limitation) note: this is completely unaware of
364+ // Python syntax and will not handle any expression containing '}' or ':'.
365+ // e.g. f'{"}"}' or f'{foo({})}'.
366+ while (!is_end (lex ) && !is_char_or (lex , ':' , '}' )) {
367+ // like the default case at the end of this function, stay 8-bit clean
368+ vstr_add_byte (& lex -> fstring_args , CUR_CHAR (lex ));
369+ next_char (lex );
370+ }
371+ if (lex -> fstring_args .buf [lex -> fstring_args .len - 1 ] == '=' ) {
372+ // if the last character of the arg was '=', then inject "arg=" before the '{'.
373+ // f'{a=}' --> 'a={}'.format(a)
374+ vstr_add_strn (& lex -> vstr , lex -> fstring_args .buf + i , lex -> fstring_args .len - i );
375+ // remove the trailing '='
376+ lex -> fstring_args .len -- ;
377+ }
378+ // comma-separate args
379+ vstr_add_byte (& lex -> fstring_args , ',' );
380+ }
381+ vstr_add_byte (& lex -> vstr , '{' );
382+ }
383+ #endif
384+
302385 if (is_char (lex , '\\' )) {
303386 next_char (lex );
304387 unichar c = CUR_CHAR (lex );
@@ -451,6 +534,23 @@ STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
451534}
452535
453536void mp_lexer_to_next (mp_lexer_t * lex ) {
537+ #if MICROPY_PY_FSTRINGS
538+ if (lex -> fstring_args .len && lex -> fstring_args_idx == 0 ) {
539+ // moving onto the next token means the literal string is complete.
540+ // switch into injecting the format args.
541+ vstr_add_byte (& lex -> fstring_args , ')' );
542+ lex -> chr0_saved = lex -> chr0 ;
543+ lex -> chr1_saved = lex -> chr1 ;
544+ lex -> chr2_saved = lex -> chr2 ;
545+ lex -> chr0 = lex -> fstring_args .buf [0 ];
546+ lex -> chr1 = lex -> fstring_args .buf [1 ];
547+ lex -> chr2 = lex -> fstring_args .buf [2 ];
548+ // we've already extracted 3 chars, but setting this non-zero also
549+ // means we'll start consuming the fstring data
550+ lex -> fstring_args_idx = 3 ;
551+ }
552+ #endif
553+
454554 // start new token text
455555 vstr_reset (& lex -> vstr );
456556
@@ -506,6 +606,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
506606 do {
507607 // parse type codes
508608 bool is_raw = false;
609+ bool is_fstring = false;
509610 mp_token_kind_t kind = MP_TOKEN_STRING ;
510611 int n_char = 0 ;
511612 if (is_char (lex , 'u' )) {
@@ -524,7 +625,25 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
524625 kind = MP_TOKEN_BYTES ;
525626 n_char = 2 ;
526627 }
628+ #if MICROPY_PY_FSTRINGS
629+ if (is_char_following (lex , 'f' )) {
630+ // raw-f-strings unsupported, immediately return (invalid) token.
631+ lex -> tok_kind = MP_TOKEN_FSTRING_RAW ;
632+ break ;
633+ }
634+ #endif
635+ }
636+ #if MICROPY_PY_FSTRINGS
637+ else if (is_char (lex , 'f ')) {
638+ if (is_char_following (lex , 'r' )) {
639+ // raw-f-strings unsupported, immediately return (invalid) token.
640+ lex -> tok_kind = MP_TOKEN_FSTRING_RAW ;
641+ break ;
642+ }
643+ n_char = 1 ;
644+ is_fstring = true;
527645 }
646+ #endif
528647
529648 // Set or check token kind
530649 if (lex -> tok_kind == MP_TOKEN_END ) {
@@ -543,7 +662,7 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
543662 }
544663
545664 // Parse the literal
546- parse_string_literal (lex , is_raw );
665+ parse_string_literal (lex , is_raw , is_fstring );
547666
548667 // Skip whitespace so we can check if there's another string following
549668 skip_whitespace (lex , true);
@@ -703,6 +822,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
703822 lex -> num_indent_level = 1 ;
704823 lex -> indent_level = m_new (uint16_t , lex -> alloc_indent_level );
705824 vstr_init (& lex -> vstr , 32 );
825+ #if MICROPY_PY_FSTRINGS
826+ vstr_init (& lex -> fstring_args , 0 );
827+ #endif
706828
707829 // store sentinel for first indentation level
708830 lex -> indent_level [0 ] = 0 ;
0 commit comments