diff --git a/src/ast/query.rs b/src/ast/query.rs index a52d518b1f..49ba86f1f7 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -872,6 +872,15 @@ pub enum SelectItem { /// The alias for the expression. alias: Ident, }, + /// An expression, followed by `[ AS ] (alias1, alias2, ...)` + /// + /// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html) + ExprWithAliases { + /// The expression being projected. + expr: Expr, + /// The list of aliases for the expression. + aliases: Vec, + }, /// An expression, followed by a wildcard expansion. /// e.g. `alias.*`, `STRUCT('foo').*` QualifiedWildcard(SelectItemQualifiedWildcardKind, WildcardAdditionalOptions), @@ -1175,6 +1184,12 @@ impl fmt::Display for SelectItem { f.write_str(" AS ")?; alias.fmt(f) } + SelectItem::ExprWithAliases { expr, aliases } => { + expr.fmt(f)?; + f.write_str(" AS (")?; + display_comma_separated(aliases).fmt(f)?; + f.write_str(")") + } SelectItem::QualifiedWildcard(kind, additional_options) => { kind.fmt(f)?; additional_options.fmt(f) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index d80a3f4d54..90fa2b8b50 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1821,6 +1821,9 @@ impl Spanned for SelectItem { match self { SelectItem::UnnamedExpr(expr) => expr.span(), SelectItem::ExprWithAlias { expr, alias } => expr.span().union(&alias.span), + SelectItem::ExprWithAliases { expr, aliases } => { + union_spans(iter::once(expr.span()).chain(aliases.iter().map(|i| i.span))) + } SelectItem::QualifiedWildcard(kind, wildcard_additional_options) => union_spans( [kind.span()] .into_iter() diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 55e4f56cc5..7f5e27ac95 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -19,7 +19,7 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/) /// -/// See . + /// See . #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct DatabricksDialect; @@ -39,6 +39,10 @@ impl Dialect for DatabricksDialect { matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') } + fn supports_numeric_prefix(&self) -> bool { + true + } + fn supports_filter_during_aggregation(&self) -> bool { true } @@ -90,4 +94,19 @@ impl Dialect for DatabricksDialect { fn supports_optimize_table(&self) -> bool { true } + + /// See + fn supports_bang_not_operator(&self) -> bool { + true + } + + /// See + fn supports_cte_without_as(&self) -> bool { + true + } + + + fn supports_select_item_multi_column_alias(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 1d5461fec1..12e873afd1 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -288,4 +288,12 @@ impl Dialect for GenericDialect { fn supports_comma_separated_trim(&self) -> bool { true } + + fn supports_cte_without_as(&self) -> bool { + true + } + + fn supports_select_item_multi_column_alias(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index fed81b60a4..3b2c512d4b 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1664,6 +1664,29 @@ pub trait Dialect: Debug + Any { fn supports_comma_separated_trim(&self) -> bool { false } + + /// Returns true if the dialect supports the `AS` keyword being + /// optional in a CTE definition. For example: + /// ```sql + /// WITH cte_name (SELECT ...) + /// ``` + /// + /// [Databricks](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-cte) + fn supports_cte_without_as(&self) -> bool { + false + } + + +/// Returns true if the dialect supports parenthesized multi-column + /// aliases in SELECT items. For example: + /// ```sql + /// SELECT stack(2, 'a', 'b') AS (col1, col2) + /// ``` + /// + /// [Spark SQL](https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html) + fn supports_select_item_multi_column_alias(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6282ed3d72..ef88ea5fb1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2592,7 +2592,7 @@ impl<'a> Parser<'a> { if self.dialect.supports_group_by_expr() { if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; + let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; self.expect_token(&Token::RParen)?; Ok(Expr::GroupingSets(result)) } else if self.parse_keyword(Keyword::CUBE) { @@ -14060,7 +14060,7 @@ impl<'a> Parser<'a> { }) } - /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + /// Parse a CTE (`alias [( col1, col2, ... )] [AS] (subquery)`) pub fn parse_cte(&mut self) -> Result { let name = self.parse_identifier()?; @@ -14091,32 +14091,65 @@ impl<'a> Parser<'a> { closing_paren_token: closing_paren_token.into(), } } else { - let columns = self.parse_table_alias_column_defs()?; - self.expect_keyword_is(Keyword::AS)?; - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); + let as_optional = self.dialect.supports_cte_without_as(); + let opt_query = if as_optional { + self.maybe_parse(|p| { + p.expect_token(&Token::LParen)?; + let query = p.parse_query()?; + let closing_paren_token = p.expect_token(&Token::RParen)?; + Ok((query, closing_paren_token)) + })? + } else { + None + }; + match opt_query { + Some((query, closing_paren_token)) => { + let alias = TableAlias { + explicit: false, + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + materialized: None, + closing_paren_token: closing_paren_token.into(), + } } - } - self.expect_token(&Token::LParen)?; + None => { + let columns = self.parse_table_alias_column_defs()?; + if as_optional { + let _ = self.parse_keyword(Keyword::AS); + } else { + self.expect_keyword_is(Keyword::AS)?; + } + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; - let alias = TableAlias { - explicit: false, - name, - columns, - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), + let alias = TableAlias { + explicit: false, + name, + columns, + }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + } } }; if self.parse_keyword(Keyword::FROM) { @@ -18074,6 +18107,19 @@ impl<'a> Parser<'a> { self.parse_wildcard_additional_options(wildcard_token)?, )) } + expr if self.dialect.supports_select_item_multi_column_alias() + && self.peek_keyword(Keyword::AS) + && self.peek_nth_token(1).token == Token::LParen => + { + self.expect_keyword(Keyword::AS)?; + self.expect_token(&Token::LParen)?; + let aliases = self.parse_comma_separated(|p| p.parse_identifier())?; + self.expect_token(&Token::RParen)?; + Ok(SelectItem::ExprWithAliases { + expr: maybe_prefixed_expr(expr, prefix), + aliases, + }) + } expr => self .maybe_parse_select_item_alias() .map(|alias| match alias { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 17f368bbb7..5c46f3cf26 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -13109,6 +13109,19 @@ fn test_group_by_grouping_sets() { ); } +#[test] +fn test_group_by_grouping_sets_bare_columns() { + all_dialects_where(|d| d.supports_group_by_expr()).one_statement_parses_to( + "SELECT a, b FROM t GROUP BY GROUPING SETS (a, b, c)", + "SELECT a, b FROM t GROUP BY GROUPING SETS ((a), (b), (c))", + ); + + all_dialects_where(|d| d.supports_group_by_expr()).one_statement_parses_to( + "SELECT a, b FROM t GROUP BY GROUPING SETS ((a, b), c)", + "SELECT a, b FROM t GROUP BY GROUPING SETS ((a, b), (c))", + ); +} + #[test] fn test_xmltable() { all_dialects() diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 79b3d0654d..0444ff966a 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -644,3 +644,52 @@ fn parse_databricks_json_accessor() { "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", ); } + +#[test] +fn parse_numeric_prefix_identifier() { + databricks().verified_stmt("SELECT * FROM catalog.schema.1st_table"); + + databricks().verified_stmt("SELECT * FROM a.b.1c"); +} + +#[test] +fn parse_cte_without_as() { + databricks_and_generic().one_statement_parses_to( + "WITH cte (SELECT 1) SELECT * FROM cte", + "WITH cte AS (SELECT 1) SELECT * FROM cte", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH a AS (SELECT 1), b (SELECT 2) SELECT * FROM a, b", + "WITH a AS (SELECT 1), b AS (SELECT 2) SELECT * FROM a, b", + ); + + databricks_and_generic().one_statement_parses_to( + "WITH cte (col1, col2) (SELECT 1, 2) SELECT * FROM cte", + "WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte", + ); + + databricks_and_generic().verified_query("WITH cte AS (SELECT 1) SELECT * FROM cte"); + + databricks_and_generic() + .verified_query("WITH cte (col1, col2) AS (SELECT 1, 2) SELECT * FROM cte"); + + assert!(all_dialects_where(|d| !d.supports_cte_without_as()) + .parse_sql_statements("WITH cte (SELECT 1) SELECT * FROM cte") + .is_err()); + +} + +#[test] +fn parse_select_item_multi_column_alias() { + databricks_and_generic().verified_stmt("SELECT stack(2, 'a', 'b', 'c', 'd') AS (col1, col2)"); + + databricks_and_generic() + .verified_stmt("SELECT stack(2, 'a', 'b', 'c', 'd') AS (col1, col2) FROM t"); + + assert!( + all_dialects_where(|d| !d.supports_select_item_multi_column_alias()) + .parse_sql_statements("SELECT stack(2, 'a', 'b') AS (col1, col2)") + .is_err() + ); +}