@@ -24,21 +24,11 @@ class Lexer(object):
2424 flags = re .IGNORECASE | re .UNICODE
2525
2626 def __init__ (self ):
27- self ._tokens = {}
28-
29- for state in SQL_REGEX :
30- self ._tokens [state ] = []
31-
32- for tdef in SQL_REGEX [state ]:
33- rex = re .compile (tdef [0 ], self .flags ).match
34- new_state = None
35- if len (tdef ) > 2 :
36- # Only Multiline comments
37- if tdef [2 ] == '#pop' :
38- new_state = - 1
39- elif tdef [2 ] in SQL_REGEX :
40- new_state = (tdef [2 ],)
41- self ._tokens [state ].append ((rex , tdef [1 ], new_state ))
27+ self ._tokens = []
28+
29+ for tdef in SQL_REGEX ['root' ]:
30+ rex = re .compile (tdef [0 ], self .flags ).match
31+ self ._tokens .append ((rex , tdef [1 ]))
4232
4333 def get_tokens (self , text , encoding = None ):
4434 """
@@ -54,8 +44,6 @@ def get_tokens(self, text, encoding=None):
5444 ``stack`` is the inital stack (default: ``['root']``)
5545 """
5646 encoding = encoding or 'utf-8'
57- statestack = ['root' , ]
58- statetokens = self ._tokens ['root' ]
5947
6048 if isinstance (text , string_types ):
6149 text = StringIO (text )
@@ -69,7 +57,7 @@ def get_tokens(self, text, encoding=None):
6957
7058 iterable = enumerate (text )
7159 for pos , char in iterable :
72- for rexmatch , action , new_state in statetokens :
60+ for rexmatch , action in self . _tokens :
7361 m = rexmatch (text , pos )
7462
7563 if not m :
@@ -79,16 +67,6 @@ def get_tokens(self, text, encoding=None):
7967 elif callable (action ):
8068 yield action (m .group ())
8169
82- if isinstance (new_state , tuple ):
83- for state in new_state :
84- # fixme: multiline-comments not stackable
85- if not (state == 'multiline-comments'
86- and statestack [- 1 ] == 'multiline-comments' ):
87- statestack .append (state )
88- elif isinstance (new_state , int ):
89- del statestack [new_state :]
90- statetokens = self ._tokens [statestack [- 1 ]]
91-
9270 consume (iterable , m .end () - pos - 1 )
9371 break
9472 else :
0 commit comments