@@ -35,6 +35,7 @@ type yyLex struct {
3535 line string // current line being parsed
3636 eof bool // flag to show EOF was read
3737 error bool // set if an error has ocurred
38+ errorString string // the string of the error
3839 indentStack []int // indent stack to control INDENT / DEDENT tokens
3940 state int // current state of state machine
4041 currentIndent string // whitespace at start of current line
@@ -71,7 +72,7 @@ func (x *yyLex) refill() {
7172}
7273
7374// Finds the length of a space and tab seperated string
74- func ( x * yyLex ) countIndent (s string ) int {
75+ func countIndent (s string ) int {
7576 if len (s ) == 0 {
7677 return 0
7778 }
@@ -236,20 +237,71 @@ const (
236237 isEof
237238)
238239
240+ // A Token with value
241+ type LexToken struct {
242+ token int
243+ value py.Object
244+ }
245+
246+ // Convert the yySymType and token into a LexToken
247+ func newLexToken (token int , yylval * yySymType ) (lt LexToken ) {
248+ lt .token = token
249+ if token == NAME {
250+ lt .value = py .String (yylval .str )
251+ } else if token == STRING || token == NUMBER {
252+ lt .value = yylval .obj
253+ } else {
254+ lt .value = nil
255+ }
256+ return
257+ }
258+
259+ // String a LexToken
260+ func (lt * LexToken ) String () string {
261+ name := tokenToString [lt .token ]
262+ if lt .value == nil {
263+ return fmt .Sprintf ("%q (%d)" , name , lt .token )
264+ }
265+ return fmt .Sprintf ("%q (%d) = %T{%v}" , name , lt .token , lt .value , lt .value )
266+ }
267+
268+ // An slice of LexToken~s
269+ type LexTokens []LexToken
270+
271+ // Compare two LexTokens
272+ func (as LexTokens ) Eq (bs []LexToken ) bool {
273+ if len (as ) != len (bs ) {
274+ return false
275+ }
276+ for i := range as {
277+ a := as [i ]
278+ b := bs [i ]
279+ if a != b {
280+ return false
281+ }
282+ }
283+ return true
284+ }
285+
286+ // String a LexTokens
287+ func (lts LexTokens ) String () string {
288+ buf := new (bytes.Buffer )
289+ buf .WriteString ("[" )
290+ for i := range lts {
291+ lt := lts [i ]
292+ buf .WriteString ("{" )
293+ buf .WriteString (lt .String ())
294+ buf .WriteString ("}, " )
295+ }
296+ buf .WriteString ("]" )
297+ return buf .String ()
298+ }
299+
239300// The parser calls this method to get each new token. This
240301// implementation returns operators and NUM.
241302func (x * yyLex ) Lex (yylval * yySymType ) (ret int ) {
242303 if yyDebug >= 2 {
243- defer func () {
244- name := tokenToString [ret ]
245- if ret == NAME {
246- fmt .Printf ("LEX> %q (%d) = %q\n " , name , ret , yylval .str )
247- } else if ret == STRING || ret == NUMBER {
248- fmt .Printf ("LEX> %q (%d) = %T{%v}\n " , name , ret , yylval .obj , yylval .obj )
249- } else {
250- fmt .Printf ("LEX> %q (%d) \n " , name , ret )
251- }
252- }()
304+ defer func () { fmt .Printf ("LEX> %v\n " , newLexToken (ret , yylval )) }()
253305 }
254306
255307 for {
@@ -286,7 +338,7 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
286338 continue
287339 }
288340 // See if indent has changed and issue INDENT / DEDENT
289- indent := x . countIndent (x .currentIndent )
341+ indent := countIndent (x .currentIndent )
290342 indentStackTop := x .indentStack [len (x .indentStack )- 1 ]
291343 switch {
292344 case indent > indentStackTop :
@@ -329,7 +381,8 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
329381 // Check if continuation character
330382 if x .line [0 ] == '\\' && (len (x .line ) <= 1 || x .line [1 ] == '\n' ) {
331383 if x .eof {
332- return eof
384+ x .state = checkEof
385+ continue
333386 }
334387 x .refill ()
335388 x .state = parseTokens
@@ -385,7 +438,7 @@ func (x *yyLex) Lex(yylval *yySymType) (ret int) {
385438 }
386439
387440 // Nothing we recognise found
388- x .Error ("Syntax error " )
441+ x .Error ("invalid syntax " )
389442 return eof
390443 case checkEof :
391444 if x .eof {
@@ -567,6 +620,7 @@ isNumber:
567620 } else {
568621 // Discard numbers with leading 0 except all 0s
569622 if illegalDecimalInteger .FindString (x .line ) != "" {
623+ x .Error ("illegal decimal with leading zero" )
570624 return eofError , nil
571625 }
572626 value = py .IntNew (py .IntType , py.Tuple {py .String (s ), py .Int (10 )}, nil )
@@ -707,6 +761,7 @@ foundEndOfString:
707761// The parser calls this method on a parse error.
708762func (x * yyLex ) Error (s string ) {
709763 x .error = true
764+ x .errorString = s
710765 if yyDebug >= 1 {
711766 log .Printf ("Parse error: %s" , s )
712767 log .Printf ("Parse buffer %q" , x .line )
@@ -719,6 +774,14 @@ func (x *yyLex) Errorf(format string, a ...interface{}) {
719774 x .Error (fmt .Sprintf (format , a ... ))
720775}
721776
777+ // Returns an python error for the current yyLex
778+ func (x * yyLex ) ErrorReturn () error {
779+ if x .error {
780+ return py .ExceptionNewf (py .SyntaxError , "Syntax Error: %s" , x .errorString )
781+ }
782+ return nil
783+ }
784+
722785// Set the debug level 0 = off, 4 = max
723786func SetDebug (level int ) {
724787 yyDebug = level
@@ -728,24 +791,31 @@ func SetDebug(level int) {
728791func Parse (in io.Reader ) error {
729792 lex := NewLex (in )
730793 yyParse (lex )
731- if lex .error {
732- return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
733- }
734- return nil
794+ return lex .ErrorReturn ()
735795}
736796
737- // Lex a file only
738- func Lex (in io.Reader ) error {
797+ // Parse a string
798+ func ParseString (in string ) error {
799+ return Parse (bytes .NewBufferString (in ))
800+ }
801+
802+ // Lex a file only, returning a sequence of tokens
803+ func Lex (in io.Reader ) (lts LexTokens , err error ) {
739804 lex := NewLex (in )
740805 yylval := yySymType {}
741806 for {
742807 ret := lex .Lex (& yylval )
743808 if ret == eof {
744809 break
745810 }
811+ lt := newLexToken (ret , & yylval )
812+ lts = append (lts , lt )
746813 }
747- if lex .error {
748- return py .ExceptionNewf (py .SyntaxError , "Syntax Error" )
749- }
750- return nil
814+ err = lex .ErrorReturn ()
815+ return
816+ }
817+
818+ // Lex a string
819+ func LexString (in string ) (lts LexTokens , err error ) {
820+ return Lex (bytes .NewBufferString (in ))
751821}
0 commit comments