Skip to content

Commit 5ee6aed

Browse files
committed
Improved parsing of identifier lists (targets issue2).
1 parent 118d6bb commit 5ee6aed

5 files changed

Lines changed: 61 additions & 25 deletions

File tree

CHANGES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ In Development
33
* Lexers preserves original line breaks (issue1).
44
* Improved identifier parsing: backtick quotes, wildcards, T-SQL variables
55
prefixed with @.
6+
* Improved parsing of identifier lists (issue2).
67
* Recursive recognition of AS (issue4) and CASE.
78

89

sqlparse/engine/grouping.py

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -132,31 +132,44 @@ def group_identifier_list(tlist):
132132
[group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
133133
if not isinstance(sgroup, (Identifier, IdentifierList))]
134134
idx = 0
135-
token = tlist.token_next_by_instance(idx, Identifier)
136-
while token:
137-
tidx = tlist.token_index(token)
138-
end = tlist.token_not_matching(tidx+1,
139-
[lambda t: isinstance(t, Identifier),
140-
lambda t: t.is_whitespace(),
141-
lambda t: t.match(T.Punctuation,
142-
',')
143-
])
144-
if end is None:
145-
end = tlist.tokens[-1]
146-
exclude_end = False
135+
# Allowed list items
136+
fend1_funcs = [lambda t: isinstance(t, Identifier),
137+
lambda t: t.is_whitespace(),
138+
lambda t: t.ttype == T.Wildcard,
139+
lambda t: t.match(T.Keyword, 'null'),
140+
lambda t: t.ttype == T.Number.Integer,
141+
lambda t: t.ttype == T.String.Single,
142+
]
143+
tcomma = tlist.token_next_match(idx, T.Punctuation, ',')
144+
start = None
145+
while tcomma is not None:
146+
before = tlist.token_prev(tcomma)
147+
after = tlist.token_next(tcomma)
148+
# Check if the tokens around tcomma belong to a list
149+
bpassed = apassed = False
150+
for func in fend1_funcs:
151+
if before is not None and func(before):
152+
bpassed = True
153+
if after is not None and func(after):
154+
apassed = True
155+
if not bpassed or not apassed:
156+
# Something's wrong here, skip ahead to next ","
157+
start = None
158+
tcomma = tlist.token_next_match(tlist.token_index(tcomma)+1,
159+
T.Punctuation, ',')
147160
else:
148-
exclude_end = True
149-
grp_tokens = tlist.tokens_between(token, end,
150-
exclude_end=exclude_end)
151-
while grp_tokens and (grp_tokens[-1].is_whitespace()
152-
or grp_tokens[-1].match(T.Punctuation, ',')):
153-
grp_tokens.pop()
154-
if len(grp_tokens) <= 1:
155-
idx = tidx + 1
156-
else:
157-
group = tlist.group_tokens(IdentifierList, grp_tokens)
158-
idx = tlist.token_index(group)
159-
token = tlist.token_next_by_instance(idx, Identifier)
161+
if start is None:
162+
start = before
163+
next_ = tlist.token_next(after)
164+
if next_ is None or not next_.match(T.Punctuation, ','):
165+
# Reached the end of the list
166+
tokens = tlist.tokens_between(start, after)
167+
group = tlist.group_tokens(IdentifierList, tokens)
168+
start = None
169+
tcomma = tlist.token_next_match(tlist.token_index(group)+1,
170+
T.Punctuation, ',')
171+
else:
172+
tcomma = next_
160173

161174

162175
def group_parenthesis(tlist):

sqlparse/keywords.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
'BREADTH': Keyword,
4040
'BY': Keyword,
4141

42-
'C': Keyword,
42+
# 'C': Keyword, # most likely this is an alias
4343
'CACHE': Keyword,
4444
'CALL': Keyword,
4545
'CALLED': Keyword,

sqlparse/sql.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,12 +204,24 @@ def token_not_matching(self, idx, funcs):
204204
return token
205205
return None
206206

207+
def token_matching(self, idx, funcs):
208+
for token in self.tokens[idx:]:
209+
for i, func in enumerate(funcs):
210+
if func(token):
211+
print 'MATCHED', i, token
212+
return token
213+
return None
214+
207215
def token_prev(self, idx, skip_ws=True):
208216
"""Returns the previous token relative to *idx*.
209217
210218
If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
211219
``None`` is returned if there's no previous token.
212220
"""
221+
if idx is None:
222+
return None
223+
if not isinstance(idx, int):
224+
idx = self.token_index(idx)
213225
while idx != 0:
214226
idx -= 1
215227
if self.tokens[idx].is_whitespace() and skip_ws:
@@ -222,6 +234,10 @@ def token_next(self, idx, skip_ws=True):
222234
If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
223235
``None`` is returned if there's no next token.
224236
"""
237+
if idx is None:
238+
return None
239+
if not isinstance(idx, int):
240+
idx = self.token_index(idx)
225241
while idx < len(self.tokens)-1:
226242
idx += 1
227243
if self.tokens[idx].is_whitespace() and skip_ws:

tests/test_grouping.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ def test_identifier_list_case(self):
7575
p = sqlparse.parse('(a, case when 1 then 2 else 3 end as b, c)')[0]
7676
self.assert_(isinstance(p.tokens[0].tokens[1], IdentifierList))
7777

78+
def test_identifier_list_other(self): # issue2
79+
p = sqlparse.parse("select *, null, 1, 'foo', bar from mytable, x")[0]
80+
self.assert_(isinstance(p.tokens[2], IdentifierList))
81+
l = p.tokens[2]
82+
self.assertEqual(len(l.tokens), 13)
83+
7884
def test_where(self):
7985
s = 'select * from foo where bar = 1 order by id desc'
8086
p = sqlparse.parse(s)[0]

0 commit comments

Comments
 (0)