diff --git a/pyproject.toml b/pyproject.toml index 3a28ce93..4cb45ef4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,14 @@ doc = [ [tool.hatch.version] path = "sqlparse/__init__.py" +[tool.hatch.build.targets.wheel] +packages = ["sqlparse"] + +[tool.hatch.build.targets.sdist] +include = [ + "/sqlparse", +] + [tool.coverage.run] source_pkgs = ["sqlparse", "tests"] branch = true diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index cdb63f0d..ef717c68 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -97,17 +97,22 @@ def _stripws_default(tlist): is_first_char = False def _stripws_identifierlist(self, tlist): - # Removes newlines before commas, see issue140 - last_nl = None - for token in list(tlist.tokens): - if last_nl and token.ttype is T.Punctuation and token.value == ',': - tlist.tokens.remove(last_nl) - last_nl = token if token.is_whitespace else None - - # next_ = tlist.token_next(token, skip_ws=False) - # if (next_ and not next_.is_whitespace and - # token.ttype is T.Punctuation and token.value == ','): - # tlist.insert_after(token, sql.Token(T.Whitespace, ' ')) + # Removes whitespace before commas, see issue140 and issue823 + # First pass: identify all whitespace tokens that appear before commas + tokens_to_remove = [] + ws_before_comma = [] + for token in tlist.tokens: + if token.is_whitespace: + ws_before_comma.append(token) + elif token.ttype is T.Punctuation and token.value == ',': + # Mark all collected whitespace for removal + tokens_to_remove.extend(ws_before_comma) + ws_before_comma = [] + else: + ws_before_comma = [] + # Second pass: remove marked tokens + for token in tokens_to_remove: + tlist.tokens.remove(token) return self._stripws_default(tlist) def _stripws_parenthesis(self, tlist): @@ -124,7 +129,10 @@ def _stripws_parenthesis(self, tlist): def process(self, stmt, depth=0): [self.process(sgroup, depth + 1) for sgroup in stmt.get_sublists()] self._stripws(stmt) - if depth == 0 and stmt.tokens and stmt.tokens[-1].is_whitespace: + # Strip trailing whitespace from all groups (not just root level). + # This is needed when strip_comments removes comments from inside + # groups like Identifier, leaving trailing whitespace behind. + while stmt.tokens and stmt.tokens[-1].is_whitespace: stmt.tokens.pop(-1) return stmt diff --git a/sqlparse/keywords.py b/sqlparse/keywords.py index d0cb5fbf..c43a9a89 100644 --- a/sqlparse/keywords.py +++ b/sqlparse/keywords.py @@ -59,9 +59,9 @@ (r'(?![_A-ZÀ-Ü])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÀ-Ü])', tokens.Number.Float), (r'(?![_A-ZÀ-Ü])-?\d+(?![_A-ZÀ-Ü])', tokens.Number.Integer), - (r"'(''|\\'|[^'])*'", tokens.String.Single), + (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), # not a real string literal in ANSI SQL: - (r'"(""|\\"|[^"])*"', tokens.String.Symbol), + (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), (r'(""|".*?[^\\]")', tokens.String.Symbol), # sqlite names can be escaped with [square brackets]. left bracket # cannot be preceded by word character or a right bracket --