From 2051de9b55b490f8983b8f87860b8eb54985fb3d Mon Sep 17 00:00:00 2001 From: funcpp Date: Mon, 30 Mar 2026 11:04:42 +0900 Subject: [PATCH 1/5] Support optional AS keyword in CTE definitions for Databricks Databricks allows omitting the AS keyword in CTE definitions: `WITH cte (SELECT ...) SELECT * FROM cte` Add `supports_cte_without_as()` dialect method and enable it for Databricks and Generic dialects. --- src/dialect/databricks.rs | 5 +++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 11 +++++ src/parser/mod.rs | 81 ++++++++++++++++++++++++----------- tests/sqlparser_databricks.rs | 27 ++++++++++++ 5 files changed, 104 insertions(+), 24 deletions(-) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 55e4f56cc5..1a2040e891 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -90,4 +90,9 @@ impl Dialect for DatabricksDialect { fn supports_optimize_table(&self) -> bool { true } + + /// See + fn supports_cte_without_as(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 1d5461fec1..c7f17351b3 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -288,4 +288,8 @@ impl Dialect for GenericDialect { fn supports_comma_separated_trim(&self) -> bool { true } + + fn supports_cte_without_as(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fed81b60a4..626aeba958 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1664,6 +1664,17 @@ pub trait Dialect: Debug + Any { fn supports_comma_separated_trim(&self) -> bool { false } + + /// Returns true if the dialect supports the `AS` keyword being + /// optional in a CTE definition. For example: + /// ```sql + /// WITH cte_name (SELECT ...) + /// ``` + /// + /// [Databricks](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-cte) + fn supports_cte_without_as(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6282ed3d72..9fa581b213 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14060,7 +14060,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + /// Parse a CTE (`alias [( col1, col2, ... )] [AS] (subquery)`) pub fn parse_cte(&mut self) -> Result { let name = self.parse_identifier()?; @@ -14091,32 +14091,65 @@ impl<'a> Parser<'a> { closing_paren_token: closing_paren_token.into(), } } else { - let columns = self.parse_table_alias_column_defs()?; - self.expect_keyword_is(Keyword::AS)?; - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); + let as_optional = self.dialect.supports_cte_without_as(); + let opt_query = if as_optional { + self.maybe_parse(|p| { + p.expect_token(&Token::LParen)?; + let query = p.parse_query()?; + let closing_paren_token = p.expect_token(&Token::RParen)?; + Ok((query, closing_paren_token)) + })? + } else { + None + }; + match opt_query { + Some((query, closing_paren_token)) => { + let alias = TableAlias { + explicit: false, + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + materialized: None, + closing_paren_token: closing_paren_token.into(), + } } - } - self.expect_token(&Token::LParen)?; + None => { + let columns = self.parse_table_alias_column_defs()?; + if as_optional { + let _ = self.parse_keyword(Keyword::AS); + } else { + self.expect_keyword_is(Keyword::AS)?; + } + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; - let alias = TableAlias { - explicit: false, - name, - columns, - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), + let alias = TableAlias { + explicit: false, + name, + columns, + }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + } } }; if self.parse_keyword(Keyword::FROM) { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 79b3d0654d..ff44bf610f 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -644,3 +644,30 @@ fn parse_databricks_json_accessor() { "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", ); } + +#[test] +fn parse_cte_without_as() { + databricks_and_generic().one_statement_parses_to( + "WITH cte (SELECT 1) SELECT * FROM cte", + "WITH cte AS (SELECT 1) SELECT * FROM cte", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH a AS (SELECT 1), b (SELECT 2) SELECT * FROM a, b", + "WITH a AS (SELECT 1), b AS (SELECT 2) SELECT * FROM a, b", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH cte (col1, col2) (SELECT 1, 2) SELECT * FROM cte", + "WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte", + ); + + databricks_and_generic().verified_query("WITH cte AS (SELECT 1) SELECT * FROM cte"); + + databricks_and_generic() + .verified_query("WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte"); + + assert!(all_dialects_where(|d| !d.supports_cte_without_as()) + .parse_sql_statements("WITH cte (SELECT 1) SELECT * FROM cte") + .is_err()); +} From d86acabae1817c6f53c581fc4a58daccdea0c1a1 Mon Sep 17 00:00:00 2001 From: funcpp Date: Tue, 31 Mar 2026 11:30:04 +0900 Subject: [PATCH 2/5] Enable `!` as NOT operator for Databricks dialect --- src/dialect/databricks.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 55e4f56cc5..876eef22f8 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -90,4 +90,9 @@ impl Dialect for DatabricksDialect { fn supports_optimize_table(&self) -> bool { true } + + /// See + fn supports_bang_not_operator(&self) -> bool { + true + } } From 4d0f3be9be4350c2666d43edd158f60a35460b18 Mon Sep 17 00:00:00 2001 From: funcpp Date: Tue, 31 Mar 2026 11:38:19 +0900 Subject: [PATCH 3/5] Allow bare columns in GROUPING SETS expressions GROUPING SETS used parse_tuple(false, true) which required each element to be parenthesized, while CUBE and ROLLUP already used parse_tuple(true, true) allowing bare columns. This inconsistency meant GROUPING SETS (a, b, c) failed to parse despite being valid syntax in PostgreSQL, Databricks, and other dialects. --- src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6282ed3d72..8497dc2ec2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2592,7 +2592,7 @@ impl<'a> Parser<'a> { if self.dialect.supports_group_by_expr() { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; + let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; self.expect_token(&Token::RParen)?; Ok(Expr::GroupingSets(result)) } else if self.parse_keyword(Keyword::CUBE) { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 17f368bbb7..5c46f3cf26 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -13109,6 +13109,19 @@ fn test_group_by_grouping_sets() { ); } +#[test] +fn test_group_by_grouping_sets_bare_columns() { + all_dialects_where(|d| d.supports_group_by_expr()).one_statement_parses_to( + "SELECT a, b FROM t GROUP BY GROUPING SETS (a, b, c)", + "SELECT a, b FROM t GROUP BY GROUPING SETS ((a), (b), (c))", + ); + + all_dialects_where(|d| d.supports_group_by_expr()).one_statement_parses_to( + "SELECT a, b FROM t GROUP BY GROUPING SETS ((a, b), c)", + "SELECT a, b FROM t GROUP BY GROUPING SETS ((a, b), (c))", + ); +} + #[test] fn test_xmltable() { all_dialects() From 156b6ffeedb067f58b66b5e8dfa0e4992cf7c051 Mon Sep 17 00:00:00 2001 From: funcpp Date: Tue, 31 Mar 2026 13:04:08 +0900 Subject: [PATCH 4/5] Support multi-column aliases in SELECT items for Databricks Spark SQL grammar allows parenthesized identifier lists as SELECT item aliases: namedExpression: expression (AS? (identifier | identifierList))? identifierList: '(' identifier (',' identifier)* ')' This enables syntax like: SELECT stack(2, 'a', 'b', 'c', 'd') AS (col1, col2) --- src/ast/query.rs | 15 +++++++++++++++ src/ast/spans.rs | 3 +++ src/dialect/databricks.rs | 4 ++++ src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 11 +++++++++++ src/parser/mod.rs | 13 +++++++++++++ tests/sqlparser_databricks.rs | 14 ++++++++++++++ 7 files changed, 64 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index a52d518b1f..49ba86f1f7 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -872,6 +872,15 @@ pub enum SelectItem { /// The alias for the expression. alias: Ident, }, + /// An expression, followed by `[ AS ] (alias1, alias2, ...)` + /// + /// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html) + ExprWithAliases { + /// The expression being projected. + expr: Expr, + /// The list of aliases for the expression. + aliases: Vec, + }, /// An expression, followed by a wildcard expansion. /// e.g. `alias.*`, `STRUCT('foo').*` QualifiedWildcard(SelectItemQualifiedWildcardKind, WildcardAdditionalOptions), @@ -1175,6 +1184,12 @@ impl fmt::Display for SelectItem { f.write_str(" AS ")?; alias.fmt(f) } + SelectItem::ExprWithAliases { expr, aliases } => { + expr.fmt(f)?; + f.write_str(" AS (")?; + display_comma_separated(aliases).fmt(f)?; + f.write_str(")") + } SelectItem::QualifiedWildcard(kind, additional_options) => { kind.fmt(f)?; additional_options.fmt(f) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index d80a3f4d54..90fa2b8b50 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1821,6 +1821,9 @@ impl Spanned for SelectItem { match self { SelectItem::UnnamedExpr(expr) => expr.span(), SelectItem::ExprWithAlias { expr, alias } => expr.span().union(&alias.span), + SelectItem::ExprWithAliases { expr, aliases } => { + union_spans(iter::once(expr.span()).chain(aliases.iter().map(|i| i.span))) + } SelectItem::QualifiedWildcard(kind, wildcard_additional_options) => union_spans( [kind.span()] .into_iter() diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 55e4f56cc5..c115a110ef 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -90,4 +90,8 @@ impl Dialect for DatabricksDialect { fn supports_optimize_table(&self) -> bool { true } + + fn supports_select_item_multi_column_alias(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 1d5461fec1..08f10d5436 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -288,4 +288,8 @@ impl Dialect for GenericDialect { fn supports_comma_separated_trim(&self) -> bool { true } + + fn supports_select_item_multi_column_alias(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fed81b60a4..2d5474db3d 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1664,6 +1664,17 @@ pub trait Dialect: Debug + Any { fn supports_comma_separated_trim(&self) -> bool { false } + + /// Returns true if the dialect supports parenthesized multi-column + /// aliases in SELECT items. For example: + /// ```sql + /// SELECT stack(2, 'a', 'b') AS (col1, col2) + /// ``` + /// + /// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html) + fn supports_select_item_multi_column_alias(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6282ed3d72..b3e69c6e27 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -18074,6 +18074,19 @@ impl<'a> Parser<'a> { self.parse_wildcard_additional_options(wildcard_token)?, )) } + expr if self.dialect.supports_select_item_multi_column_alias() + && self.peek_keyword(Keyword::AS) + && self.peek_nth_token(1).token == Token::LParen => + { + self.expect_keyword(Keyword::AS)?; + self.expect_token(&Token::LParen)?; + let aliases = self.parse_comma_separated(|p| p.parse_identifier())?; + self.expect_token(&Token::RParen)?; + Ok(SelectItem::ExprWithAliases { + expr: maybe_prefixed_expr(expr, prefix), + aliases, + }) + } expr => self .maybe_parse_select_item_alias() .map(|alias| match alias { diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 79b3d0654d..74ec9427e3 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -644,3 +644,17 @@ fn parse_databricks_json_accessor() { "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", ); } + +#[test] +fn parse_select_item_multi_column_alias() { + databricks_and_generic().verified_stmt("SELECT stack(2, 'a', 'b', 'c', 'd') AS (col1, col2)"); + + databricks_and_generic() + .verified_stmt("SELECT stack(2, 'a', 'b', 'c', 'd') AS (col1, col2) FROM t"); + + assert!( + all_dialects_where(|d| !d.supports_select_item_multi_column_alias()) + .parse_sql_statements("SELECT stack(2, 'a', 'b') AS (col1, col2)") + .is_err() + ); +} From ae901ac218e3bade9b0ba8c64e7cbe3f486a67ac Mon Sep 17 00:00:00 2001 From: funcpp Date: Tue, 31 Mar 2026 13:31:15 +0900 Subject: [PATCH 5/5] Enable numeric-prefix identifiers for Databricks dialect Databricks, built on Spark SQL, allows identifiers that start with digits. The Spark SQL ANTLR lexer defines IDENTIFIER as (UNICODE_LETTER | DIGIT | '_')+, with no restriction on the first character. Enable the existing supports_numeric_prefix() for DatabricksDialect. --- src/dialect/databricks.rs | 4 ++++ tests/sqlparser_databricks.rs | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 55e4f56cc5..64c40211eb 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -39,6 +39,10 @@ impl Dialect for DatabricksDialect { matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') } + fn supports_numeric_prefix(&self) -> bool { + true + } + fn supports_filter_during_aggregation(&self) -> bool { true } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 79b3d0654d..e9b19fcce5 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -644,3 +644,10 @@ fn parse_databricks_json_accessor() { "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", ); } + +#[test] +fn parse_numeric_prefix_identifier() { + databricks().verified_stmt("SELECT * FROM catalog.schema.1st_table"); + + databricks().verified_stmt("SELECT * FROM a.b.1c"); +}