Improved parsing of identifier lists (targets issue2).

andialbrecht · andialbrecht · commit 5ee6aed6aef8 · 2009-05-03T21:32:27.000+02:00
diff --git a/CHANGES b/CHANGES
@@ -3,6 +3,7 @@ In Development
  * Lexers preserves original line breaks (issue1).
  * Improved identifier parsing: backtick quotes, wildcards, T-SQL variables
    prefixed with @.
+ * Improved parsing of identifier lists (issue2).
  * Recursive recognition of AS (issue4) and CASE.
 
 
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
@@ -132,31 +132,44 @@ def group_identifier_list(tlist):
     [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
      if not isinstance(sgroup, (Identifier, IdentifierList))]
     idx = 0
-    token = tlist.token_next_by_instance(idx, Identifier)
-    while token:
-        tidx = tlist.token_index(token)
-        end = tlist.token_not_matching(tidx+1,
-                                       [lambda t: isinstance(t, Identifier),
-                                        lambda t: t.is_whitespace(),
-                                        lambda t: t.match(T.Punctuation,
-                                                          ',')
-                                        ])
-        if end is None:
-            end = tlist.tokens[-1]
-            exclude_end = False
+    # Allowed list items
+    fend1_funcs = [lambda t: isinstance(t, Identifier),
+                   lambda t: t.is_whitespace(),
+                   lambda t: t.ttype == T.Wildcard,
+                   lambda t: t.match(T.Keyword, 'null'),
+                   lambda t: t.ttype == T.Number.Integer,
+                   lambda t: t.ttype == T.String.Single,
+                   ]
+    tcomma = tlist.token_next_match(idx, T.Punctuation, ',')
+    start = None
+    while tcomma is not None:
+        before = tlist.token_prev(tcomma)
+        after = tlist.token_next(tcomma)
+        # Check if the tokens around tcomma belong to a list
+        bpassed = apassed = False
+        for func in fend1_funcs:
+            if before is not None and func(before):
+                bpassed = True
+            if after is not None and func(after):
+                apassed = True
+        if not bpassed or not apassed:
+            # Something's wrong here, skip ahead to next ","
+            start = None
+            tcomma = tlist.token_next_match(tlist.token_index(tcomma)+1,
+                                            T.Punctuation, ',')
         else:
-            exclude_end = True
-        grp_tokens = tlist.tokens_between(token, end,
-                                          exclude_end=exclude_end)
-        while grp_tokens and (grp_tokens[-1].is_whitespace()
-                              or grp_tokens[-1].match(T.Punctuation, ',')):
-            grp_tokens.pop()
-        if len(grp_tokens) <= 1:
-            idx = tidx + 1
-        else:
-            group = tlist.group_tokens(IdentifierList, grp_tokens)
-            idx = tlist.token_index(group)
-        token = tlist.token_next_by_instance(idx, Identifier)
+            if start is None:
+                start = before
+            next_ = tlist.token_next(after)
+            if next_ is None or not next_.match(T.Punctuation, ','):
+                # Reached the end of the list
+                tokens = tlist.tokens_between(start, after)
+                group = tlist.group_tokens(IdentifierList, tokens)
+                start = None
+                tcomma = tlist.token_next_match(tlist.token_index(group)+1,
+                                                T.Punctuation, ',')
+            else:
+                tcomma = next_
 
 
 def group_parenthesis(tlist):
diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py
@@ -39,7 +39,7 @@
     'BREADTH': Keyword,
     'BY': Keyword,
 
-    'C': Keyword,
+#    'C': Keyword,  # most likely this is an alias
     'CACHE': Keyword,
     'CALL': Keyword,
     'CALLED': Keyword,
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
@@ -204,12 +204,24 @@ def token_not_matching(self, idx, funcs):
                 return token
         return None
 
+    def token_matching(self, idx, funcs):
+        for token in self.tokens[idx:]:
+            for i, func in enumerate(funcs):
+                if func(token):
+                    print 'MATCHED', i, token
+                    return token
+        return None
+
     def token_prev(self, idx, skip_ws=True):
         """Returns the previous token relative to *idx*.
 
         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
         ``None`` is returned if there's no previous token.
         """
+        if idx is None:
+            return None
+        if not isinstance(idx, int):
+            idx = self.token_index(idx)
         while idx != 0:
             idx -= 1
             if self.tokens[idx].is_whitespace() and skip_ws:
@@ -222,6 +234,10 @@ def token_next(self, idx, skip_ws=True):
         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
         ``None`` is returned if there's no next token.
         """
+        if idx is None:
+            return None
+        if not isinstance(idx, int):
+            idx = self.token_index(idx)
         while idx < len(self.tokens)-1:
             idx += 1
             if self.tokens[idx].is_whitespace() and skip_ws:
diff --git a/tests/test_grouping.py b/tests/test_grouping.py
@@ -75,6 +75,12 @@ def test_identifier_list_case(self):
         p = sqlparse.parse('(a, case when 1 then 2 else 3 end as b, c)')[0]
         self.assert_(isinstance(p.tokens[0].tokens[1], IdentifierList))
 
+    def test_identifier_list_other(self):  # issue2
+        p = sqlparse.parse("select *, null, 1, 'foo', bar from mytable, x")[0]
+        self.assert_(isinstance(p.tokens[2], IdentifierList))
+        l = p.tokens[2]
+        self.assertEqual(len(l.tokens), 13)
+
     def test_where(self):
         s = 'select * from foo where bar = 1 order by id desc'
         p = sqlparse.parse(s)[0]