@@ -6,12 +6,49 @@ use num_bigint::BigInt;
66use num_traits:: Num ;
77use std:: collections:: HashMap ;
88use std:: str:: FromStr ;
9+ use std:: cmp:: Ordering ;
10+
11+ #[ derive( Clone , Copy , PartialEq , Debug ) ]
12+ struct IndentationLevel {
13+ tabs : usize ,
14+ spaces : usize ,
15+ }
16+
17+ impl IndentationLevel {
18+ fn new ( ) -> IndentationLevel {
19+ IndentationLevel {
20+ tabs : 0 ,
21+ spaces : 0 ,
22+ }
23+ }
24+ fn compare_strict ( & self , other : & IndentationLevel ) -> Option < Ordering > {
25+ // We only know for sure that we're smaller or bigger if tabs
26+ // and spaces both differ in the same direction. Otherwise we're
27+ // dependent on the size of tabs.
28+ if self . tabs < other. tabs {
29+ if self . spaces <= other. spaces {
30+ Some ( Ordering :: Less )
31+ } else {
32+ None
33+ }
34+ } else if self . tabs > other. tabs {
35+ if self . spaces >= other. spaces {
36+ Some ( Ordering :: Greater )
37+ } else {
38+ None
39+ }
40+
41+ } else {
42+ Some ( self . spaces . cmp ( & other. spaces ) )
43+ }
44+ }
45+ }
946
1047pub struct Lexer < T : Iterator < Item = char > > {
1148 chars : T ,
1249 at_begin_of_line : bool ,
1350 nesting : usize , // Amount of parenthesis
14- indentation_stack : Vec < usize > ,
51+ indentation_stack : Vec < IndentationLevel > ,
1552 pending : Vec < Spanned < Tok > > ,
1653 chr0 : Option < char > ,
1754 chr1 : Option < char > ,
@@ -218,7 +255,7 @@ where
218255 chars : input,
219256 at_begin_of_line : true ,
220257 nesting : 0 ,
221- indentation_stack : vec ! [ 0 ] ,
258+ indentation_stack : vec ! [ IndentationLevel :: new ( ) ] ,
222259 pending : Vec :: new ( ) ,
223260 chr0 : None ,
224261 location : Location :: new ( 0 , 0 ) ,
@@ -576,13 +613,24 @@ where
576613 self . at_begin_of_line = false ;
577614
578615 // Determine indentation:
579- let mut col: usize = 0 ;
616+ let mut spaces: usize = 0 ;
617+ let mut tabs: usize = 0 ;
580618 loop {
581619 match self . chr0 {
582620 Some ( ' ' ) => {
583621 self . next_char ( ) ;
584- col += 1 ;
585- }
622+ spaces += 1 ;
623+ } ,
624+ Some ( '\t' ) => {
625+ if spaces != 0 {
626+ // Don't allow tabs after spaces as part of indentation.
627+ // This is technically stricter than python3 but spaces before
628+ // tabs is even more insane than mixing spaces and tabs.
629+ panic ! ( "Tabs not allowed as part of indentation after spaces" ) ;
630+ }
631+ self . next_char ( ) ;
632+ tabs += 1 ;
633+ } ,
586634 Some ( '#' ) => {
587635 self . lex_comment ( ) ;
588636 self . at_begin_of_line = true ;
@@ -601,34 +649,54 @@ where
601649 }
602650 }
603651
652+ let indentation_level = IndentationLevel {
653+ spaces,
654+ tabs,
655+ } ;
656+
604657 if self . nesting == 0 {
605658 // Determine indent or dedent:
606659 let current_indentation = * self . indentation_stack . last ( ) . unwrap ( ) ;
607- if col == current_indentation {
608- // Same same
609- } else if col > current_indentation {
610- // New indentation level:
611- self . indentation_stack . push ( col) ;
612- let tok_start = self . get_pos ( ) ;
613- let tok_end = tok_start. clone ( ) ;
614- return Some ( Ok ( ( tok_start, Tok :: Indent , tok_end) ) ) ;
615- } else if col < current_indentation {
616- // One or more dedentations
617- // Pop off other levels until col is found:
618-
619- while col < * self . indentation_stack . last ( ) . unwrap ( ) {
620- self . indentation_stack . pop ( ) . unwrap ( ) ;
660+ let ordering = indentation_level. compare_strict ( & current_indentation) ;
661+ match ordering {
662+ Some ( Ordering :: Equal ) => {
663+ // Same same
664+ } ,
665+ Some ( Ordering :: Greater ) => {
666+ // New indentation level:
667+ self . indentation_stack . push ( indentation_level) ;
621668 let tok_start = self . get_pos ( ) ;
622669 let tok_end = tok_start. clone ( ) ;
623- self . pending . push ( Ok ( ( tok_start, Tok :: Dedent , tok_end) ) ) ;
670+ return Some ( Ok ( ( tok_start, Tok :: Indent , tok_end) ) ) ;
624671 }
672+ Some ( Ordering :: Less ) => {
673+ // One or more dedentations
674+ // Pop off other levels until col is found:
675+
676+ loop {
677+ let ordering = indentation_level. compare_strict ( self . indentation_stack . last ( ) . unwrap ( ) ) ;
678+ match ordering {
679+ Some ( Ordering :: Less ) => {
680+ self . indentation_stack . pop ( ) ;
681+ let tok_start = self . get_pos ( ) ;
682+ let tok_end = tok_start. clone ( ) ;
683+ self . pending . push ( Ok ( ( tok_start, Tok :: Dedent , tok_end) ) ) ;
684+ } ,
685+ None => panic ! ( "inconsistent use of tabs and spaces in indentation" ) ,
686+ _ => {
687+ break ;
688+ } ,
689+ } ;
690+ }
625691
626- if col != * self . indentation_stack . last ( ) . unwrap ( ) {
627- // TODO: handle wrong indentations
628- panic ! ( "Non matching indentation levels!" ) ;
629- }
692+ if indentation_level != * self . indentation_stack . last ( ) . unwrap ( ) {
693+ // TODO: handle wrong indentations
694+ panic ! ( "Non matching indentation levels!" ) ;
695+ }
630696
631- return Some ( self . pending . remove ( 0 ) ) ;
697+ return Some ( self . pending . remove ( 0 ) ) ;
698+ }
699+ None => panic ! ( "inconsistent use of tabs and spaces in indentation" ) ,
632700 }
633701 }
634702 }
@@ -1233,12 +1301,56 @@ mod tests {
12331301 }
12341302 }
12351303
1304+ macro_rules! test_double_dedent_with_tabs {
1305+ ( $( $name: ident: $eol: expr, ) * ) => {
1306+ $(
1307+ #[ test]
1308+ fn $name( ) {
1309+ let source = String :: from( format!( "def foo():{}\t if x:{}{}\t return 99{}{}" , $eol, $eol, $eol, $eol, $eol) ) ;
1310+ let tokens = lex_source( & source) ;
1311+ assert_eq!(
1312+ tokens,
1313+ vec![
1314+ Tok :: Def ,
1315+ Tok :: Name {
1316+ name: String :: from( "foo" ) ,
1317+ } ,
1318+ Tok :: Lpar ,
1319+ Tok :: Rpar ,
1320+ Tok :: Colon ,
1321+ Tok :: Newline ,
1322+ Tok :: Indent ,
1323+ Tok :: If ,
1324+ Tok :: Name {
1325+ name: String :: from( "x" ) ,
1326+ } ,
1327+ Tok :: Colon ,
1328+ Tok :: Newline ,
1329+ Tok :: Indent ,
1330+ Tok :: Return ,
1331+ Tok :: Int { value: BigInt :: from( 99 ) } ,
1332+ Tok :: Newline ,
1333+ Tok :: Dedent ,
1334+ Tok :: Dedent ,
1335+ ]
1336+ ) ;
1337+ }
1338+ ) *
1339+ }
1340+ }
1341+
12361342 test_double_dedent_with_eol ! {
12371343 test_double_dedent_windows_eol: WINDOWS_EOL ,
12381344 test_double_dedent_mac_eol: MAC_EOL ,
12391345 test_double_dedent_unix_eol: UNIX_EOL ,
12401346 }
12411347
1348+ test_double_dedent_with_tabs ! {
1349+ test_double_dedent_tabs_windows_eol: WINDOWS_EOL ,
1350+ test_double_dedent_tabs_mac_eol: MAC_EOL ,
1351+ test_double_dedent_tabs_unix_eol: UNIX_EOL ,
1352+ }
1353+
12421354 macro_rules! test_newline_in_brackets {
12431355 ( $( $name: ident: $eol: expr, ) * ) => {
12441356 $(
0 commit comments