@@ -13,6 +13,7 @@ use std::collections::HashMap;
1313use std:: str:: FromStr ;
1414use unic_emoji_char:: is_emoji_presentation;
1515use unicode_xid:: UnicodeXID ;
16+ use wtf8;
1617
1718#[ derive( Clone , Copy , PartialEq , Debug ) ]
1819struct IndentationLevel {
@@ -67,6 +68,7 @@ pub struct LexicalError {
6768#[ derive( Debug ) ]
6869pub enum LexicalErrorType {
6970 StringError ,
71+ UnicodeError ,
7072 NestingError ,
7173 UnrecognizedToken { tok : char } ,
7274 OtherError ( String ) ,
@@ -456,6 +458,27 @@ where
456458 }
457459 }
458460
461+ fn unicode_literal ( & mut self , literal_number : usize ) -> Result < char , LexicalError > {
462+ let mut p: u32 = 0u32 ;
463+ let unicode_error = Err ( LexicalError {
464+ error : LexicalErrorType :: UnicodeError ,
465+ location : self . get_pos ( ) ,
466+ } ) ;
467+ for i in 1 ..=literal_number {
468+ match self . next_char ( ) {
469+ Some ( c) => match c. to_digit ( 16 ) {
470+ Some ( d) => p += d << ( literal_number - i) * 4 ,
471+ None => return unicode_error,
472+ } ,
473+ None => return unicode_error,
474+ }
475+ }
476+ match wtf8:: CodePoint :: from_u32 ( p) {
477+ Some ( cp) => return Ok ( cp. to_char_lossy ( ) ) ,
478+ None => return unicode_error,
479+ }
480+ }
481+
459482 fn lex_string (
460483 & mut self ,
461484 is_bytes : bool ,
@@ -513,6 +536,9 @@ where
513536 Some ( 't' ) => {
514537 string_content. push ( '\t' ) ;
515538 }
539+ Some ( 'u' ) => string_content. push ( self . unicode_literal ( 4 ) ?) ,
540+ Some ( 'U' ) => string_content. push ( self . unicode_literal ( 8 ) ?) ,
541+ Some ( 'x' ) if !is_bytes => string_content. push ( self . unicode_literal ( 2 ) ?) ,
516542 Some ( 'v' ) => string_content. push ( '\x0b' ) ,
517543 Some ( c) => {
518544 string_content. push ( '\\' ) ;
0 commit comments