@@ -26,18 +26,23 @@ const eof = 0
2626// Signal eof with Error
2727const eofError = - 1
2828
29+ // Standard python definition of a tab
30+ const tabSize = 8
31+
2932// The parser uses the type <prefix>Lex as a lexer. It must provide
3033// the methods Lex(*<prefix>SymType) int and Error(string).
3134type yyLex struct {
3235 reader * bufio.Reader
3336 line string // current line being parsed
3437 eof bool // flag to show EOF was read
38+ error bool // set if an error has ocurred
3539 indentStack []int // indent stack to control INDENT / DEDENT tokens
3640 state int // current state of state machine
3741 currentIndent string // whitespace at start of current line
38- indentSpace bool // whether we are indenting with spaces
39- indentTab bool // whether we are indenting with tabs
4042 interactive bool // set if reading interactive input
43+ bracket int // number of open [ ]
44+ parenthesis int // number of open ( )
45+ brace int // number of open { }
4146}
4247
4348func NewLex (r io.Reader ) * yyLex {
@@ -76,28 +81,29 @@ func (x *yyLex) countIndent(s string) int {
7681 // mixes tabs and spaces in a way that makes the meaning
7782 // dependent on the worth of a tab in spaces; a TabError is
7883 // raised in that case
79- if ! x .indentSpace && ! x .indentTab {
80- switch s [0 ] {
84+ indent := 0
85+ for _ , c := range s {
86+ switch c {
8187 case ' ' :
82- x . indentSpace = true
88+ indent ++
8389 case '\t' :
84- x .indentTab = true
90+ // 012345678901234567
91+ // a b
92+ // a b
93+ // a b
94+ // a b
95+ // a b
96+ // a b
97+ // a b
98+ // ab
99+ // a b
100+ indent += tabSize - (indent & (tabSize - 1 ))
85101 default :
86102 panic ("bad indent" )
87103 }
104+
88105 }
89- if x .indentSpace {
90- if strings .ContainsRune (s , '\t' ) {
91- x .Error ("Inconsistent indent" )
92- }
93- } else if x .indentTab {
94- if strings .ContainsRune (s , ' ' ) {
95- x .Error ("Inconsistent indent" )
96- }
97- } else {
98- panic ("indent not set" )
99- }
100- return len (s )
106+ return indent
101107}
102108
103109var operators = map [string ]int {
@@ -213,6 +219,11 @@ func init() {
213219 tokenToString [NUMBER ] = "NUMBER"
214220}
215221
222+ // True if there are any open brackets
223+ func (x * yyLex ) openBrackets () bool {
224+ return x .bracket != 0 || x .parenthesis != 0 || x .brace != 0
225+ }
226+
216227// States
217228const (
218229 readString = iota
@@ -243,11 +254,15 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
243254 case readString :
244255 // Read x.line
245256 x .refill ()
257+ x .state ++
246258 // an empty line while reading interactive input should return a NEWLINE
247259 if x .interactive && (x .line == "" || x .line == "\n " ) {
260+ // Don't output NEWLINE if brackets are open
261+ if x .openBrackets () {
262+ continue
263+ }
248264 return NEWLINE
249265 }
250- x .state ++
251266 case readIndent :
252267 // Read the initial indent and get rid of it
253268 trimmed := strings .TrimLeft (x .line , " \t " )
@@ -262,6 +277,11 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
262277 }
263278 x .state ++
264279 case checkIndent :
280+ // Don't output INDENT or DEDENT if brackets are open
281+ if x .openBrackets () {
282+ x .state ++
283+ continue
284+ }
265285 // See if indent has changed and issue INDENT / DEDENT
266286 indent := x .countIndent (x .currentIndent )
267287 indentStackTop := x .indentStack [len (x .indentStack )- 1 ]
@@ -293,9 +313,13 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
293313 continue
294314 }
295315
296- // Check if newline reached
297- if x .line [0 ] == '\n' {
316+ // Check if newline or comment reached
317+ if x .line [0 ] == '\n' || x . line [ 0 ] == '#' {
298318 x .state = checkEof
319+ // Don't output NEWLINE if brackets are open
320+ if x .openBrackets () {
321+ continue
322+ }
299323 return NEWLINE
300324 }
301325
@@ -329,6 +353,21 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
329353 // Read an operator if available
330354 token = x .readOperator ()
331355 if token != eof {
356+ // implement implicit line joining rules
357+ switch token {
358+ case '[' :
359+ x .bracket ++
360+ case ']' :
361+ x .bracket --
362+ case '(' :
363+ x .parenthesis ++
364+ case ')' :
365+ x .parenthesis --
366+ case '{' :
367+ x .brace ++
368+ case '}' :
369+ x .brace --
370+ }
332371 return token
333372 }
334373
@@ -645,6 +684,7 @@ foundEndOfString:
645684
646685// The parser calls this method on a parse error.
647686func (x * yyLex ) Error (s string ) {
687+ x .error = true
648688 log .Printf ("Parse error: %s" , s )
649689 log .Printf ("Parse buffer %q" , x .line )
650690 log .Printf ("State %#v" , x )
@@ -656,12 +696,17 @@ func SetDebug(level int) {
656696}
657697
658698// Parse a file
659- func Parse (in io.Reader ) {
660- yyParse (NewLex (in ))
699+ func Parse (in io.Reader ) error {
700+ lex := NewLex (in )
701+ yyParse (lex )
702+ if lex .error {
703+ return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
704+ }
705+ return nil
661706}
662707
663708// Lex a file only
664- func Lex (in io.Reader ) {
709+ func Lex (in io.Reader ) error {
665710 lex := NewLex (in )
666711 yylval := yySymType {}
667712 for {
@@ -670,4 +715,8 @@ func Lex(in io.Reader) {
670715 break
671716 }
672717 }
718+ if lex .error {
719+ return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
720+ }
721+ return nil
673722}
0 commit comments