From bffa3fd222b3ba236d5617cc0e0fcb73b60817d9 Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Thu, 18 Jun 2026 12:51:31 +0800 Subject: [PATCH 1/3] MySQL/Generic: Structure AUTO_INCREMENT column option --- src/ast/ddl.rs | 13 +++++++++++++ src/ast/spans.rs | 2 ++ src/parser/mod.rs | 4 +--- tests/sqlparser_mysql.rs | 9 ++------- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 444fc1c01..298976729 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -2001,6 +2001,12 @@ pub enum ColumnOption { /// ``` /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/invisible-columns.html Invisible, + /// Column-level auto-increment option with an optional start value. + /// + /// MySQL and Generic use this unified AST node without a start value. + /// Dialects that support a start value can use `Some`. + /// Syntax: `AUTO_INCREMENT` or `AUTO_INCREMENT()`. + AutoIncrement(Option), } impl From for ColumnOption { @@ -2149,6 +2155,13 @@ impl fmt::Display for ColumnOption { Invisible => { write!(f, "INVISIBLE") } + AutoIncrement(start) => { + f.write_str("AUTO_INCREMENT")?; + if let Some(start) = start { + write!(f, "({start})")?; + } + Ok(()) + } } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index cd4034c7d..963265586 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -822,6 +822,7 @@ impl Spanned for RaiseStatementValue { /// - [ColumnOption::PrimaryKey] /// - [ColumnOption::Unique] /// - [ColumnOption::DialectSpecific] +/// - [ColumnOption::AutoIncrement] /// - [ColumnOption::Generated] impl Spanned for ColumnOption { fn span(&self) -> Span { @@ -849,6 +850,7 @@ impl Spanned for ColumnOption { ColumnOption::Tags(..) => Span::empty(), ColumnOption::Srid(..) => Span::empty(), ColumnOption::Invisible => Span::empty(), + ColumnOption::AutoIncrement(_) => Span::empty(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d36e1adcd..98e0747e0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9657,9 +9657,7 @@ impl<'a> Parser<'a> { && dialect_of!(self is MySqlDialect | GenericDialect) { // Support AUTO_INCREMENT for MySQL - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTO_INCREMENT"), - ]))) + Ok(Some(ColumnOption::AutoIncrement(None))) } else if self.parse_keyword(Keyword::AUTOINCREMENT) && dialect_of!(self is SQLiteDialect | GenericDialect) { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f513d3670..f4773f293 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -27,7 +27,6 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, MySqlDialect}; use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::tokenizer::Span; -use sqlparser::tokenizer::Token; use test_utils::*; #[macro_use] @@ -661,9 +660,7 @@ fn parse_create_table_auto_increment() { }, ColumnOptionDef { name: None, - option: ColumnOption::DialectSpecific(vec![Token::make_keyword( - "AUTO_INCREMENT" - )]), + option: ColumnOption::AutoIncrement(None), }, ], }], @@ -770,9 +767,7 @@ fn parse_create_table_primary_and_unique_key() { }, ColumnOptionDef { name: None, - option: ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTO_INCREMENT") - ]), + option: ColumnOption::AutoIncrement(None), }, ], }, From 8f0906f4f8399db5e93f80d73729fc29c3b19f65 Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Thu, 18 Jun 2026 12:53:44 +0800 Subject: [PATCH 2/3] Doris: Add dialect skeleton --- examples/cli.rs | 1 + src/dialect/doris.rs | 53 ++++++++++++++++++++++++++++++++++++++ src/dialect/mod.rs | 5 ++++ src/test_utils.rs | 1 + tests/sqlparser_doris.rs | 55 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+) create mode 100644 src/dialect/doris.rs create mode 100644 tests/sqlparser_doris.rs diff --git a/examples/cli.rs b/examples/cli.rs index 3c4299b20..e5643dc0b 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -52,6 +52,7 @@ $ cargo run --example cli - [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--mysql" => Box::new(MySqlDialect {}), + "--doris" => Box::new(DorisDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), "--hive" => Box::new(HiveDialect {}), "--redshift" => Box::new(RedshiftSqlDialect {}), diff --git a/src/dialect/doris.rs b/src/dialect/doris.rs new file mode 100644 index 000000000..17911fd7a --- /dev/null +++ b/src/dialect/doris.rs @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::dialect::Dialect; + +/// A [`Dialect`] for [Apache Doris](https://doris.apache.org/). +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct DorisDialect {} + +impl Dialect for DorisDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '`' + } + + fn identifier_quote_style(&self, _identifier: &str) -> Option { + Some('`') + } + + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_ascii_alphabetic() || ch == '_' || !ch.is_ascii() + } + + fn is_identifier_part(&self, ch: char) -> bool { + self.is_identifier_start(ch) || ch.is_ascii_digit() + } + + fn supports_string_literal_backslash_escape(&self) -> bool { + true + } + + fn ignores_wildcard_escapes(&self) -> bool { + true + } + + fn supports_numeric_prefix(&self) -> bool { + true + } +} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 4b791d8ed..deda44583 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -19,6 +19,7 @@ mod ansi; mod bigquery; mod clickhouse; mod databricks; +mod doris; mod duckdb; mod generic; mod hive; @@ -43,6 +44,7 @@ pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; pub use self::databricks::DatabricksDialect; +pub use self::doris::DorisDialect; pub use self::duckdb::DuckDbDialect; pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; @@ -1883,6 +1885,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef) -> Option Some(Box::new(BigQueryDialect)), "ansi" => Some(Box::new(AnsiDialect {})), "duckdb" => Some(Box::new(DuckDbDialect {})), + "doris" => Some(Box::new(DorisDialect {})), "databricks" => Some(Box::new(DatabricksDialect {})), "spark" | "sparksql" => Some(Box::new(SparkSqlDialect {})), "oracle" => Some(Box::new(OracleDialect {})), @@ -1938,6 +1941,8 @@ mod tests { assert!(parse_dialect("ANSI").is::()); assert!(parse_dialect("duckdb").is::()); assert!(parse_dialect("DuckDb").is::()); + assert!(parse_dialect("doris").is::()); + assert!(parse_dialect("Doris").is::()); assert!(parse_dialect("DataBricks").is::()); assert!(parse_dialect("databricks").is::()); assert!(parse_dialect("teradata").is::()); diff --git a/src/test_utils.rs b/src/test_utils.rs index c4d1d0db2..e19f28d20 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -286,6 +286,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(HiveDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), + Box::new(DorisDialect {}), Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), Box::new(DuckDbDialect {}), diff --git a/tests/sqlparser_doris.rs b/tests/sqlparser_doris.rs new file mode 100644 index 000000000..31ed99864 --- /dev/null +++ b/tests/sqlparser_doris.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![warn(clippy::all)] +//! Test SQL syntax specific to Apache Doris. + +#[macro_use] +mod test_utils; + +use sqlparser::dialect::{Dialect, DorisDialect, GenericDialect}; +use test_utils::*; + +fn doris() -> TestedDialects { + TestedDialects::new(vec![Box::new(DorisDialect {})]) +} + +fn doris_and_generic() -> TestedDialects { + TestedDialects::new(vec![Box::new(DorisDialect {}), Box::new(GenericDialect {})]) +} + +#[test] +fn doris_identifier_and_string_literal_gates() { + let dialect = DorisDialect {}; + assert_eq!(dialect.identifier_quote_style("identifier"), Some('`')); + assert!(dialect.is_delimited_identifier_start('`')); + assert!(dialect.supports_string_literal_backslash_escape()); + assert!(dialect.ignores_wildcard_escapes()); + assert!(dialect.supports_numeric_prefix()); +} + +#[test] +fn parse_doris_strings_and_identifiers() { + doris().verified_stmt( + r#"SELECT "double quoted string", 'single quoted string', `select` FROM `db`.`table`"#, + ); +} + +#[test] +fn doris_and_generic_parse_common_sql_identically() { + doris_and_generic().verified_stmt("SELECT 1 AS properties FROM t"); +} From b4e4adf8852f32fa0f9dd98641f9952ac3ad74bc Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Thu, 18 Jun 2026 12:59:39 +0800 Subject: [PATCH 3/3] Doris: Add generalized CREATE TABLE table model --- src/ast/ddl.rs | 373 +++++++++- src/ast/helpers/stmt_create_table.rs | 68 +- src/ast/mod.rs | 6 +- src/ast/spans.rs | 111 ++- src/dialect/doris.rs | 28 + src/dialect/generic.rs | 28 + src/dialect/mod.rs | 44 ++ src/keywords.rs | 11 + src/parser/mod.rs | 467 ++++++++++++- tests/sqlparser_clickhouse.rs | 28 + tests/sqlparser_doris.rs | 991 ++++++++++++++++++++++++++- tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_mssql.rs | 2 + tests/sqlparser_postgres.rs | 2 + 14 files changed, 2119 insertions(+), 41 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 298976729..be8217dd9 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -1928,7 +1928,7 @@ pub enum ColumnOption { /// `CHECK ()` Check(CheckConstraint), /// Dialect-specific options, such as: - /// - MySQL's `AUTO_INCREMENT` or SQLite's `AUTOINCREMENT` + /// - SQLite's `AUTOINCREMENT` /// - ... DialectSpecific(Vec), /// `CHARACTER SET ` column option @@ -2003,8 +2003,8 @@ pub enum ColumnOption { Invisible, /// Column-level auto-increment option with an optional start value. /// - /// MySQL and Generic use this unified AST node without a start value. - /// Dialects that support a start value can use `Some`. + /// MySQL, Generic, and Doris use this unified AST node. Doris also supports + /// the optional parenthesized start value. /// Syntax: `AUTO_INCREMENT` or `AUTO_INCREMENT()`. AutoIncrement(Option), } @@ -2156,11 +2156,11 @@ impl fmt::Display for ColumnOption { write!(f, "INVISIBLE") } AutoIncrement(start) => { - f.write_str("AUTO_INCREMENT")?; if let Some(start) = start { - write!(f, "({start})")?; + write!(f, "AUTO_INCREMENT({start})") + } else { + write!(f, "AUTO_INCREMENT") } - Ok(()) } } } @@ -2907,6 +2907,361 @@ impl fmt::Display for CreateIndex { } } +/// Table key model kind. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableKeyModelKind { + /// `DUPLICATE KEY`. + Duplicate, + /// `UNIQUE KEY`. + Unique, + /// `AGGREGATE KEY`. + Aggregate, +} + +impl fmt::Display for TableKeyModelKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + TableKeyModelKind::Duplicate => "DUPLICATE", + TableKeyModelKind::Unique => "UNIQUE", + TableKeyModelKind::Aggregate => "AGGREGATE", + }) + } +} + +/// `DUPLICATE|UNIQUE|AGGREGATE KEY (...)` table model. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableKeyModel { + /// The key model kind. + pub kind: TableKeyModelKind, + /// Columns named in the `KEY` clause. + pub columns: Vec, + /// Optional `ORDER BY (...)` clause for local sort order. + /// + /// The parser accepts this clause for all Doris key models and leaves + /// model-specific semantic validation to Doris or downstream consumers. + pub order_by: Option>, +} + +impl fmt::Display for TableKeyModel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{} KEY({})", + self.kind, + display_comma_separated(&self.columns) + )?; + if let Some(order_by) = &self.order_by { + write!(f, " ORDER BY({})", display_comma_separated(order_by))?; + } + Ok(()) + } +} + +/// Bucket count declaration. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum BucketCount { + /// A fixed bucket count. + Count(u64), + /// `BUCKETS AUTO`. + Auto, +} + +impl fmt::Display for BucketCount { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BucketCount::Count(value) => write!(f, "{value}"), + BucketCount::Auto => f.write_str("AUTO"), + } + } +} + +/// `DISTRIBUTED BY ...` clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TableDistribution { + /// `DISTRIBUTED BY HASH(col[, ...]) [BUCKETS n|AUTO]`. + Hash { + /// Hash distribution columns. + columns: Vec, + /// Optional bucket count. + buckets: Option, + }, + /// `DISTRIBUTED BY RANDOM [BUCKETS n|AUTO]`. + Random { + /// Optional bucket count. + buckets: Option, + }, +} + +impl fmt::Display for TableDistribution { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TableDistribution::Hash { columns, buckets } => { + write!( + f, + "DISTRIBUTED BY HASH({})", + display_comma_separated(columns) + )?; + if let Some(buckets) = buckets { + write!(f, " BUCKETS {buckets}")?; + } + Ok(()) + } + TableDistribution::Random { buckets } => { + f.write_str("DISTRIBUTED BY RANDOM")?; + if let Some(buckets) = buckets { + write!(f, " BUCKETS {buckets}")?; + } + Ok(()) + } + } + } +} + +/// Table partitioning strategy. +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TablePartitioningKind { + /// `PARTITION BY RANGE`. + Range, + /// `PARTITION BY LIST`. + List, +} + +impl fmt::Display for TablePartitioningKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + TablePartitioningKind::Range => "RANGE", + TablePartitioningKind::List => "LIST", + }) + } +} + +/// Table partitioning value specification. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TablePartitioningValues { + /// `VALUES LESS THAN (...)`. + LessThan(Vec), + /// `VALUES LESS THAN MAXVALUE`. + LessThanMaxValue, + /// `VALUES IN (...)`. + In(Vec>), + /// `VALUES [("start"), ("end"))` — fixed range with half-open interval. + FixedRange { + /// Lower bound (inclusive). + start: Vec, + /// Upper bound (exclusive). + end: Vec, + }, +} + +impl fmt::Display for TablePartitioningValues { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TablePartitioningValues::LessThan(values) => { + write!(f, "VALUES LESS THAN ({})", display_comma_separated(values)) + } + TablePartitioningValues::LessThanMaxValue => f.write_str("VALUES LESS THAN MAXVALUE"), + TablePartitioningValues::In(values) => { + let values = values + .iter() + .map(|value| format!("({})", display_comma_separated(value))) + .collect::>(); + write!(f, "VALUES IN ({})", display_comma_separated(&values)) + } + TablePartitioningValues::FixedRange { start, end } => { + write!( + f, + "VALUES [({}), ({}))", + display_comma_separated(start), + display_comma_separated(end) + ) + } + } + } +} + +/// One partitioning definition inside `PARTITION BY ... (...)`. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TablePartitioningDefinition { + /// `IF NOT EXISTS` clause. + pub if_not_exists: bool, + /// Partition name. + pub name: Ident, + /// Partition values. + pub values: TablePartitioningValues, + /// Optional per-partition properties. + pub properties: Vec, +} + +impl fmt::Display for TablePartitioningDefinition { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("PARTITION ")?; + if self.if_not_exists { + f.write_str("IF NOT EXISTS ")?; + } + write!(f, "{} {}", self.name, self.values)?; + if !self.properties.is_empty() { + write!( + f, + " PROPERTIES ({})", + display_comma_separated(&self.properties) + )?; + } + Ok(()) + } +} + +/// A single entry in a partitioning definition list. +/// +/// Can be either a named partition or a batch range definition. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum TablePartitioningEntry { + /// Named partition: `PARTITION VALUES ...` + Definition(TablePartitioningDefinition), + /// Batch range: `FROM (...) TO (...) INTERVAL []` + BatchRange { + /// Lower bound values. + from: Vec, + /// Upper bound values. + to: Vec, + /// Interval numeric value. + interval_value: u64, + /// Optional interval unit (e.g. DAY, MONTH). + interval_unit: Option, + }, +} + +impl fmt::Display for TablePartitioningEntry { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TablePartitioningEntry::Definition(def) => def.fmt(f), + TablePartitioningEntry::BatchRange { + from, + to, + interval_value, + interval_unit, + } => { + write!( + f, + "FROM ({}) TO ({}) INTERVAL {}", + display_comma_separated(from), + display_comma_separated(to), + interval_value + )?; + if let Some(unit) = interval_unit { + write!(f, " {unit}")?; + } + Ok(()) + } + } + } +} + +/// `PARTITION BY RANGE|LIST` clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TablePartitioning { + /// Whether the clause starts with `AUTO PARTITION`. + pub auto: bool, + /// Partition strategy. + pub kind: TablePartitioningKind, + /// Partition columns/expressions (supports multiple columns). + pub columns: Vec, + /// Explicit partition definitions. + pub partitions: Vec, +} + +impl fmt::Display for TablePartitioning { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.auto { + f.write_str("AUTO ")?; + } + write!( + f, + "PARTITION BY {}({})", + self.kind, + display_comma_separated(&self.columns) + )?; + if !self.partitions.is_empty() { + write!(f, " ({})", display_comma_separated(&self.partitions))?; + } + Ok(()) + } +} + +/// Grouped table model clauses such as engine, key model, partitioning, distribution, and properties. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct TableModel { + /// `ENGINE = ` clause (e.g. `OLAP`). + pub engine: Option, + /// Table key model clause. + pub key_model: Option, + /// Table-level `COMMENT ''`. + pub comment: Option, + /// Table partitioning clause. + pub partitioning: Option, + /// Table distribution clause. + pub distribution: Option, + /// Table model properties. + pub properties: Vec, +} + +impl fmt::Display for TableModel { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut separator = ""; + if let Some(engine) = &self.engine { + write!(f, "{separator}ENGINE = {engine}")?; + separator = " "; + } + if let Some(key_model) = &self.key_model { + write!(f, "{separator}{key_model}")?; + separator = " "; + } + if let Some(comment) = &self.comment { + write!( + f, + "{separator}COMMENT '{}'", + escape_single_quote_string(comment) + )?; + separator = " "; + } + if let Some(partitioning) = &self.partitioning { + write!(f, "{separator}{partitioning}")?; + separator = " "; + } + if let Some(distribution) = &self.distribution { + write!(f, "{separator}{distribution}")?; + separator = " "; + } + if !self.properties.is_empty() { + write!( + f, + "{separator}PROPERTIES ({})", + display_comma_separated(&self.properties) + )?; + } + Ok(()) + } +} + /// CREATE TABLE statement. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -2985,6 +3340,8 @@ pub struct CreateTable { /// Snowflake: Table clustering list which contains base column, expressions on base columns. /// pub cluster_by: Option>>, + /// Grouped table model clauses such as engine, key model, partitioning, distribution, and properties. + pub table_model: Option, /// Hive: Table clustering column list. /// pub clustered_by: Option, @@ -3172,6 +3529,10 @@ impl fmt::Display for CreateTable { write!(f, " COMMENT '{comment}'")?; } + if let Some(table_model) = &self.table_model { + write!(f, " {table_model}")?; + } + // Only for SQLite if self.without_rowid { write!(f, " WITHOUT ROWID")?; diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 9ec9ab28c..9de1f3724 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -29,7 +29,7 @@ use crate::ast::{ DistStyle, Expr, FileFormat, ForValues, HiveDistributionStyle, HiveFormat, Ident, InitializeKind, ObjectName, OnCommit, OneOrManyWithParens, Query, RefreshModeKind, RowAccessPolicy, Statement, StorageLifecyclePolicy, StorageSerializationPolicy, - TableConstraint, TableVersion, Tag, WithData, WrappedCollection, + TableConstraint, TableModel, TableVersion, Tag, WithData, WrappedCollection, }; use crate::parser::ParserError; @@ -123,6 +123,8 @@ pub struct CreateTableBuilder { pub partition_by: Option>, /// Optional `CLUSTER BY` expressions. pub cluster_by: Option>>, + /// Optional grouped table model clauses. + pub table_model: Option, /// Optional `CLUSTERED BY` clause. pub clustered_by: Option, /// Optional parent tables (`INHERITS`). @@ -227,6 +229,7 @@ impl CreateTableBuilder { order_by: None, partition_by: None, cluster_by: None, + table_model: None, clustered_by: None, inherits: None, partition_of: None, @@ -404,6 +407,11 @@ impl CreateTableBuilder { self.cluster_by = cluster_by; self } + /// Set grouped table model clauses. + pub fn table_model(mut self, table_model: Option) -> Self { + self.table_model = table_model; + self + } /// Set `CLUSTERED BY` clause. pub fn clustered_by(mut self, clustered_by: Option) -> Self { self.clustered_by = clustered_by; @@ -622,6 +630,7 @@ impl CreateTableBuilder { order_by: self.order_by, partition_by: self.partition_by, cluster_by: self.cluster_by, + table_model: self.table_model, clustered_by: self.clustered_by, inherits: self.inherits, partition_of: self.partition_of, @@ -707,6 +716,7 @@ impl From for CreateTableBuilder { order_by: table.order_by, partition_by: table.partition_by, cluster_by: table.cluster_by, + table_model: table.table_model, clustered_by: table.clustered_by, inherits: table.inherits, partition_of: table.partition_of, @@ -757,7 +767,12 @@ pub(crate) struct CreateTableConfiguration { #[cfg(test)] mod tests { use crate::ast::helpers::stmt_create_table::CreateTableBuilder; - use crate::ast::{Ident, ObjectName, Statement}; + use crate::ast::{ + BucketCount, Expr, Ident, ObjectName, SqlOption, Statement, TableDistribution, + TableKeyModel, TableKeyModelKind, TableModel, TablePartitioning, + TablePartitioningDefinition, TablePartitioningEntry, TablePartitioningKind, + TablePartitioningValues, Value, + }; use crate::parser::ParserError; #[test] @@ -785,4 +800,53 @@ mod tests { ) ); } + + #[test] + fn test_table_model_builder_fields_round_trip() { + let key = Ident::new("k"); + let partition_expr = Expr::Identifier(Ident::new("dt")); + let partition_value = Expr::Value(Value::SingleQuotedString("2024-01-01".into()).into()); + let property_value = Expr::Value(Value::SingleQuotedString("1".into()).into()); + + let builder = CreateTableBuilder::new(ObjectName::from(vec![Ident::new("t")])).table_model( + Some(TableModel { + engine: Some(Ident::new("OLAP")), + key_model: Some(TableKeyModel { + kind: TableKeyModelKind::Aggregate, + columns: vec![key.clone()], + order_by: None, + }), + comment: None, + partitioning: Some(TablePartitioning { + auto: true, + kind: TablePartitioningKind::Range, + columns: vec![partition_expr], + partitions: vec![TablePartitioningEntry::Definition( + TablePartitioningDefinition { + if_not_exists: false, + name: Ident::new("p1"), + values: TablePartitioningValues::LessThan(vec![partition_value]), + properties: vec![], + }, + )], + }), + distribution: Some(TableDistribution::Hash { + columns: vec![key.clone()], + buckets: Some(BucketCount::Auto), + }), + properties: vec![SqlOption::KeyValue { + key: Ident::new("replication_num"), + value: property_value, + }], + }), + ); + + let create_table = builder.clone().build(); + + assert_eq!( + create_table.to_string(), + "CREATE TABLE t () ENGINE = OLAP AGGREGATE KEY(k) AUTO PARTITION BY RANGE(dt) (PARTITION p1 VALUES LESS THAN ('2024-01-01')) DISTRIBUTED BY HASH(k) BUCKETS AUTO PROPERTIES (replication_num = '1')" + ); + assert_eq!(builder, CreateTableBuilder::from(create_table)); + } } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 23944c459..71fc23d35 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -67,7 +67,7 @@ pub use self::ddl::{ AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, AlterTableLock, AlterTableOperation, AlterTableType, AlterTextSearch, AlterTextSearchOperation, AlterTextSearchOption, AlterType, AlterTypeAddValue, AlterTypeAddValuePosition, - AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, ClusteredBy, ColumnDef, + AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, BucketCount, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateCollation, CreateCollationDefinition, CreateConnector, CreateDomain, CreateExtension, CreateFunction, CreateIndex, CreateOperator, @@ -80,7 +80,9 @@ pub use self::ddl::{ IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, OperatorOption, OperatorPurpose, Owner, Partition, PartitionBoundValue, ProcedureParam, ReferentialAction, RenameTableNameKind, - ReplicaIdentity, TagsColumnOption, TextSearchObjectType, TriggerObjectKind, Truncate, + ReplicaIdentity, TableDistribution, TableKeyModel, TableKeyModelKind, TableModel, + TablePartitioning, TablePartitioningDefinition, TablePartitioningEntry, TablePartitioningKind, + TablePartitioningValues, TagsColumnOption, TextSearchObjectType, TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, WithData, diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 963265586..6c391cea4 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -45,9 +45,11 @@ use super::{ PartitionBoundValue, PivotValueSource, ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, ReferentialAction, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, - SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject, - TableOptionsClustered, TableWithJoins, Update, UpdateTableFromKind, Use, Values, ViewColumnDef, - WhileStatement, WildcardAdditionalOptions, With, WithFill, + SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableDistribution, + TableFactor, TableKeyModel, TableModel, TableObject, TableOptionsClustered, TablePartitioning, + TablePartitioningDefinition, TablePartitioningEntry, TablePartitioningValues, TableWithJoins, + Update, UpdateTableFromKind, Use, Values, ViewColumnDef, WhileStatement, + WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -546,6 +548,103 @@ impl Spanned for Use { } } +impl Spanned for TableKeyModel { + fn span(&self) -> Span { + union_spans( + self.columns.iter().map(|i| i.span).chain( + self.order_by + .iter() + .flat_map(|cols| cols.iter().map(|i| i.span)), + ), + ) + } +} + +impl Spanned for TableDistribution { + fn span(&self) -> Span { + match self { + TableDistribution::Hash { + columns, + buckets: _, + } => union_spans(columns.iter().map(|i| i.span)), + TableDistribution::Random { buckets: _ } => Span::empty(), + } + } +} + +impl Spanned for TablePartitioningValues { + fn span(&self) -> Span { + match self { + TablePartitioningValues::LessThan(values) => { + union_spans(values.iter().map(|i| i.span())) + } + TablePartitioningValues::LessThanMaxValue => Span::empty(), + TablePartitioningValues::In(values) => union_spans( + values + .iter() + .flat_map(|values| values.iter().map(|i| i.span())), + ), + TablePartitioningValues::FixedRange { start, end } => { + union_spans(start.iter().chain(end.iter()).map(|i| i.span())) + } + } + } +} + +impl Spanned for TablePartitioningDefinition { + fn span(&self) -> Span { + union_spans( + core::iter::once(self.name.span) + .chain(core::iter::once(self.values.span())) + .chain(self.properties.iter().map(|i| i.span())), + ) + } +} + +impl Spanned for TablePartitioningEntry { + fn span(&self) -> Span { + match self { + TablePartitioningEntry::Definition(def) => def.span(), + TablePartitioningEntry::BatchRange { + from, + to, + interval_unit, + .. + } => union_spans( + from.iter() + .chain(to.iter()) + .map(|i| i.span()) + .chain(interval_unit.iter().map(|i| i.span)), + ), + } + } +} + +impl Spanned for TablePartitioning { + fn span(&self) -> Span { + union_spans( + self.columns + .iter() + .map(|i| i.span()) + .chain(self.partitions.iter().map(|i| i.span())), + ) + } +} + +impl Spanned for TableModel { + fn span(&self) -> Span { + union_spans( + self.engine + .iter() + .map(|i| i.span) + .chain(self.key_model.iter().map(|i| i.span())) + .chain(self.partitioning.iter().map(|i| i.span())) + .chain(self.distribution.iter().map(|i| i.span())) + .chain(self.properties.iter().map(|i| i.span())), + ) + } +} + impl Spanned for CreateTable { fn span(&self) -> Span { let CreateTable { @@ -577,6 +676,7 @@ impl Spanned for CreateTable { order_by: _, // todo, clickhouse specific partition_by: _, // todo, BigQuery specific cluster_by: _, // todo, BigQuery specific + table_model, clustered_by: _, // todo, Hive specific inherits: _, // todo, PostgreSQL specific partition_of, @@ -622,7 +722,8 @@ impl Spanned for CreateTable { .chain(query.iter().map(|i| i.span())) .chain(clone.iter().map(|i| i.span())) .chain(partition_of.iter().map(|i| i.span())) - .chain(for_values.iter().map(|i| i.span())), + .chain(for_values.iter().map(|i| i.span())) + .chain(table_model.iter().map(|i| i.span())), ) } } @@ -822,8 +923,8 @@ impl Spanned for RaiseStatementValue { /// - [ColumnOption::PrimaryKey] /// - [ColumnOption::Unique] /// - [ColumnOption::DialectSpecific] -/// - [ColumnOption::AutoIncrement] /// - [ColumnOption::Generated] +/// - [ColumnOption::AutoIncrement] impl Spanned for ColumnOption { fn span(&self) -> Span { match self { diff --git a/src/dialect/doris.rs b/src/dialect/doris.rs index 17911fd7a..71d21bf7a 100644 --- a/src/dialect/doris.rs +++ b/src/dialect/doris.rs @@ -50,4 +50,32 @@ impl Dialect for DorisDialect { fn supports_numeric_prefix(&self) -> bool { true } + + fn supports_create_table_key_model_clause(&self) -> bool { + true + } + + fn supports_create_table_range_list_partitioning_clause(&self) -> bool { + true + } + + fn supports_create_table_distribution_clause(&self) -> bool { + true + } + + fn supports_create_table_properties_clause(&self) -> bool { + true + } + + fn supports_create_table_model_clause_without_marker(&self) -> bool { + true + } + + fn supports_parenthesized_auto_increment_column_option(&self) -> bool { + true + } + + fn supports_column_aggregation_function_option(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 7ee826c25..6cde91e2e 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -173,6 +173,34 @@ impl Dialect for GenericDialect { true } + // GenericDialect intentionally enables unambiguous Doris-compatible parser + // gates for permissive parsing. Ambiguous clauses such as ENGINE and + // COMMENT are still only committed to a table model when an unambiguous + // marker is present. + fn supports_create_table_key_model_clause(&self) -> bool { + true + } + + fn supports_create_table_range_list_partitioning_clause(&self) -> bool { + true + } + + fn supports_create_table_distribution_clause(&self) -> bool { + true + } + + fn supports_create_table_properties_clause(&self) -> bool { + true + } + + fn supports_create_table_model_clause_without_marker(&self) -> bool { + false + } + + fn supports_column_aggregation_function_option(&self) -> bool { + true + } + fn supports_named_fn_args_with_assignment_operator(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index deda44583..f7b803937 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1238,6 +1238,50 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports table key model clauses after a + /// `CREATE TABLE` column list, such as `DUPLICATE KEY (...)`, + /// `UNIQUE KEY (...)`, or `AGGREGATE KEY (...)`. + fn supports_create_table_key_model_clause(&self) -> bool { + false + } + + /// Returns true if the dialect supports range/list partitioning clauses in + /// `CREATE TABLE`, such as `PARTITION BY RANGE(dt) (...)` or + /// `AUTO PARTITION BY RANGE(...)`. + fn supports_create_table_range_list_partitioning_clause(&self) -> bool { + false + } + + /// Returns true if the dialect supports table distribution clauses such as + /// `DISTRIBUTED BY HASH(col) BUCKETS 8` or `DISTRIBUTED BY RANDOM`. + fn supports_create_table_distribution_clause(&self) -> bool { + false + } + + /// Returns true if the dialect supports a `PROPERTIES (...)` clause in + /// `CREATE TABLE`. + fn supports_create_table_properties_clause(&self) -> bool { + false + } + + /// Returns true if `ENGINE = ...` or table-level `COMMENT '...'` can by + /// itself create a grouped table model after a `CREATE TABLE` column list. + fn supports_create_table_model_clause_without_marker(&self) -> bool { + false + } + + /// Returns true if the dialect supports `AUTO_INCREMENT` with an optional + /// parenthesized start value in column definitions. + fn supports_parenthesized_auto_increment_column_option(&self) -> bool { + false + } + + /// Returns true if the dialect supports aggregate column option functions in + /// `CREATE TABLE`, such as `SUM`, `REPLACE`, or `BITMAP_UNION`. + fn supports_column_aggregation_function_option(&self) -> bool { + false + } + /// Returns true if the dialect supports PartiQL for querying semi-structured data /// fn supports_partiql(&self) -> bool { diff --git a/src/keywords.rs b/src/keywords.rs index 0ff32948f..9ae054fb9 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -163,6 +163,7 @@ define_keywords!( BIND, BINDING, BIT, + BITMAP_UNION, BLANKSASNULL, BLOB, BLOCK, @@ -345,6 +346,8 @@ define_keywords!( DISTINCTROW, DISTKEY, DISTRIBUTE, + DISTRIBUTED, + DISTRIBUTION, DISTSTYLE, DIV, DO, @@ -489,6 +492,7 @@ define_keywords!( HIGH_PRIORITY, HISTORY, HIVEVAR, + HLL_UNION, HOLD, HOSTS, HOUR, @@ -549,6 +553,7 @@ define_keywords!( INTERSECTION, INTERVAL, INTO, + INVERTED, INVISIBLE, INVOKER, IO, @@ -573,6 +578,7 @@ define_keywords!( LAMBDA, LANGUAGE, LARGE, + LARGEINT, LAST, LAST_VALUE, LATERAL, @@ -582,6 +588,7 @@ define_keywords!( LEAST, LEFT, LEFTARG, + LESS, LEVEL, LIFECYCLE, LIKE, @@ -823,17 +830,20 @@ define_keywords!( PROFILE, PROGRAM, PROJECTION, + PROPERTIES, PUBLIC, PURCHASE, PURGE, PUT, QUALIFY, + QUANTILE_UNION, QUARTER, QUERIES, QUERY, QUOTE, RAISE, RAISERROR, + RANDOM, RANGE, RANK, RAW, @@ -1049,6 +1059,7 @@ define_keywords!( TERSE, TEXT, TEXTFILE, + THAN, THEN, THROW, TIES, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 98e0747e0..fb3cc2cd2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -392,6 +392,44 @@ impl From<&ParserError> for ExprPrefixError { } } +/// Intermediate container for table model clauses parsed between the column +/// list and trailing table options. +#[derive(Default)] +struct TableModelClauses { + engine: Option, + key_model: Option, + comment: Option, + partitioning: Option, + distribution: Option, + properties: Vec, +} + +impl TableModelClauses { + fn is_empty(&self) -> bool { + self.engine.is_none() + && self.key_model.is_none() + && self.comment.is_none() + && self.partitioning.is_none() + && self.distribution.is_none() + && self.properties.is_empty() + } + + fn into_table_model(self) -> Option { + if self.is_empty() { + None + } else { + Some(TableModel { + engine: self.engine, + key_model: self.key_model, + comment: self.comment, + partitioning: self.partitioning, + distribution: self.distribution, + properties: self.properties, + }) + } + } +} + impl<'a> Parser<'a> { /// Create a parser for a [`Dialect`] /// @@ -8743,6 +8781,8 @@ impl<'a> Parser<'a> { let clustered_by = self.parse_optional_clustered_by()?; let hive_formats = self.parse_hive_formats()?; + let table_model = self.parse_optional_doris_create_table_clauses()?; + let create_table_config = self.parse_optional_create_table_config()?; // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` @@ -8864,6 +8904,7 @@ impl<'a> Parser<'a> { .clustered_by(clustered_by) .partition_by(partition_by) .cluster_by(create_table_config.cluster_by) + .table_model(table_model) .inherits(create_table_config.inherits) .partition_of(partition_of) .for_values(for_values) @@ -9653,11 +9694,25 @@ impl<'a> Parser<'a> { } .into(), )) - } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - // Support AUTO_INCREMENT for MySQL - Ok(Some(ColumnOption::AutoIncrement(None))) + } else if self.parse_keyword(Keyword::AUTO_INCREMENT) { + if self + .dialect + .supports_parenthesized_auto_increment_column_option() + { + let start = if self.consume_token(&Token::LParen) { + let value = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Some(value) + } else { + None + }; + Ok(Some(ColumnOption::AutoIncrement(start))) + } else if dialect_of!(self is MySqlDialect | GenericDialect) { + Ok(Some(ColumnOption::AutoIncrement(None))) + } else { + self.prev_token(); + Ok(None) + } } else if self.parse_keyword(Keyword::AUTOINCREMENT) && dialect_of!(self is SQLiteDialect | GenericDialect) { @@ -9738,10 +9793,42 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::INVISIBLE) { Ok(Some(ColumnOption::Invisible)) } else { - Ok(None) + self.parse_optional_doris_aggregate_column_option() } } + fn parse_optional_doris_aggregate_column_option( + &mut self, + ) -> Result, ParserError> { + if !self.dialect.supports_column_aggregation_function_option() { + return Ok(None); + } + + let token = self.peek_token(); + let option_name = match token.token { + Token::Word(word) + if matches!( + word.keyword, + Keyword::SUM + | Keyword::MAX + | Keyword::MIN + | Keyword::REPLACE + | Keyword::HLL_UNION + | Keyword::BITMAP_UNION + | Keyword::QUANTILE_UNION + ) => + { + word.value + } + _ => return Ok(None), + }; + + self.next_token(); + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword(&option_name), + ]))) + } + pub(crate) fn parse_tag(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_token(&Token::Eq)?; @@ -9873,6 +9960,356 @@ impl<'a> Parser<'a> { Ok(clustered_by) } + /// Parse Doris-compatible `CREATE TABLE` table model clauses in official order: + /// `[ENGINE = ...] [KEY_MODEL] [COMMENT '...'] [PARTITION] [DISTRIBUTION] [PROPERTIES]` + /// + /// Uses save/restore to avoid consuming tokens when no table model + /// clause is found, so the generic parser can handle MySQL/ClickHouse + /// ENGINE etc. + fn parse_optional_doris_create_table_clauses( + &mut self, + ) -> Result, ParserError> { + if !self.dialect.supports_create_table_key_model_clause() + && !self + .dialect + .supports_create_table_range_list_partitioning_clause() + && !self.dialect.supports_create_table_distribution_clause() + && !self.dialect.supports_create_table_properties_clause() + && !self + .dialect + .supports_create_table_model_clause_without_marker() + { + return Ok(None); + } + + let save_index = self.index; + + let engine = self.parse_optional_doris_engine()?; + let key_model = self.parse_optional_doris_key_model()?; + let comment = self.parse_optional_doris_table_comment()?; + let partitioning = self.parse_optional_doris_partition()?; + let distribution = self.parse_optional_doris_distribution()?; + let properties = self.parse_optional_doris_properties()?; + + // Key model, partition, distribution and PROPERTIES (the keyword, + // not WITH/OPTIONS) are unambiguous table model markers. GenericDialect + // only commits to the table model path for those markers, so + // MySQL/ClickHouse-style ENGINE and COMMENT remain plain table options. + let has_unambiguous_marker = key_model.is_some() + || partitioning.is_some() + || distribution.is_some() + || !properties.is_empty(); + let has_markerless_model_clause = self + .dialect + .supports_create_table_model_clause_without_marker() + && (engine.is_some() || comment.is_some()); + + if !has_unambiguous_marker && !has_markerless_model_clause { + self.index = save_index; + return Ok(None); + } + + Ok(TableModelClauses { + engine, + key_model, + comment, + partitioning, + distribution, + properties, + } + .into_table_model()) + } + + fn parse_optional_doris_engine(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::ENGINE) { + return Ok(None); + } + let _ = self.consume_token(&Token::Eq); + Ok(Some(self.parse_identifier()?)) + } + + fn parse_optional_doris_table_comment(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::COMMENT) { + return Ok(None); + } + let _ = self.consume_token(&Token::Eq); + let next = self.next_token(); + match next.token { + Token::SingleQuotedString(s) => Ok(Some(s)), + Token::DoubleQuotedString(s) => Ok(Some(s)), + _ => self.expected("a string literal after COMMENT", next), + } + } + + fn parse_optional_doris_key_model(&mut self) -> Result, ParserError> { + if !self.dialect.supports_create_table_key_model_clause() { + return Ok(None); + } + + let kind = if self.parse_keyword(Keyword::DUPLICATE) { + Some(TableKeyModelKind::Duplicate) + } else if self.parse_keyword(Keyword::UNIQUE) { + Some(TableKeyModelKind::Unique) + } else if self.parse_keyword(Keyword::AGGREGATE) { + Some(TableKeyModelKind::Aggregate) + } else { + None + }; + + let Some(kind) = kind else { + return Ok(None); + }; + + self.expect_keyword_is(Keyword::KEY)?; + let columns = self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?; + + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + Some(self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?) + } else { + None + }; + + Ok(Some(TableKeyModel { + kind, + columns, + order_by, + })) + } + + fn parse_optional_doris_buckets(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::BUCKETS) { + return Ok(None); + } + + if self.parse_keyword(Keyword::AUTO) { + return Ok(Some(BucketCount::Auto)); + } + + let value = self.parse_literal_uint()?; + Ok(Some(BucketCount::Count(value))) + } + + fn parse_optional_doris_distribution( + &mut self, + ) -> Result, ParserError> { + if !self.dialect.supports_create_table_distribution_clause() + || !self.parse_keywords(&[Keyword::DISTRIBUTED, Keyword::BY]) + { + return Ok(None); + } + + if self.parse_keyword(Keyword::HASH) { + let columns = self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?; + let buckets = self.parse_optional_doris_buckets()?; + Ok(Some(TableDistribution::Hash { columns, buckets })) + } else if self.parse_keyword(Keyword::RANDOM) { + let buckets = self.parse_optional_doris_buckets()?; + Ok(Some(TableDistribution::Random { buckets })) + } else { + self.expected("HASH or RANDOM after DISTRIBUTED BY", self.peek_token()) + } + } + + fn parse_optional_doris_partition(&mut self) -> Result, ParserError> { + if !self + .dialect + .supports_create_table_range_list_partitioning_clause() + { + return Ok(None); + } + + let index = self.index; + let auto = self.parse_keyword(Keyword::AUTO); + if !self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { + if auto { + return self.expected("PARTITION BY after AUTO", self.peek_token()); + } + return Ok(None); + } + + let kind = if self.parse_keyword(Keyword::RANGE) { + TablePartitioningKind::Range + } else if self.parse_keyword(Keyword::LIST) { + TablePartitioningKind::List + } else { + self.index = index; + return Ok(None); + }; + + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + + let partitions = if self.consume_token(&Token::LParen) { + let partitions = + self.parse_comma_separated0(Parser::parse_doris_partition_entry, Token::RParen)?; + self.expect_token(&Token::RParen)?; + partitions + } else { + vec![] + }; + + // When there are no explicit partition definitions, we must + // distinguish Doris `PARTITION BY RANGE(col) DISTRIBUTED BY ...` + // from PostgreSQL `PARTITION BY RANGE(col)`. Only commit to the + // Doris path when a recognisable Doris follow-up keyword is next. + if !auto && partitions.is_empty() { + let is_doris_follow_up = + self.peek_keyword(Keyword::DISTRIBUTED) || self.peek_keyword(Keyword::PROPERTIES); + if !is_doris_follow_up { + self.index = index; + return Ok(None); + } + } + + Ok(Some(TablePartitioning { + auto, + kind, + columns, + partitions, + })) + } + + fn parse_doris_partition_entry(&mut self) -> Result { + if self.parse_keyword(Keyword::FROM) { + self.expect_token(&Token::LParen)?; + let from = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword_is(Keyword::TO)?; + self.expect_token(&Token::LParen)?; + let to = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword_is(Keyword::INTERVAL)?; + let interval_token = self.next_token(); + let interval_value = match interval_token.token { + Token::Number(ref n, _) => n.parse::().map_err(|e| { + ParserError::ParserError(format!("Expected integer for INTERVAL value: {e}")) + })?, + _ => { + return self.expected("integer value after INTERVAL", interval_token); + } + }; + let interval_unit = if self.peek_token().token != Token::Comma + && self.peek_token().token != Token::RParen + { + Some(self.parse_identifier()?) + } else { + None + }; + Ok(TablePartitioningEntry::BatchRange { + from, + to, + interval_value, + interval_unit, + }) + } else { + Ok(TablePartitioningEntry::Definition( + self.parse_doris_partition_definition()?, + )) + } + } + + fn parse_doris_partition_definition( + &mut self, + ) -> Result { + self.expect_keyword_is(Keyword::PARTITION)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_identifier()?; + self.expect_keyword_is(Keyword::VALUES)?; + + let values = if self.parse_keywords(&[Keyword::LESS, Keyword::THAN]) { + if self.parse_doris_maxvalue_token() { + TablePartitioningValues::LessThanMaxValue + } else { + self.expect_token(&Token::LParen)?; + let values = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Self::normalize_doris_less_than(values) + } + } else if self.parse_keyword(Keyword::IN) { + self.expect_token(&Token::LParen)?; + let values = self.parse_comma_separated(Parser::parse_doris_partition_value_tuple)?; + self.expect_token(&Token::RParen)?; + TablePartitioningValues::In(values) + } else if self.consume_token(&Token::LBracket) { + self.expect_token(&Token::LParen)?; + let start = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::Comma)?; + self.expect_token(&Token::LParen)?; + let end = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + TablePartitioningValues::FixedRange { start, end } + } else { + return self.expected( + "LESS THAN, IN, or [ after PARTITION VALUES", + self.peek_token(), + ); + }; + + let properties = if self.peek_keyword(Keyword::PROPERTIES) { + self.parse_options(Keyword::PROPERTIES)? + } else { + vec![] + }; + + Ok(TablePartitioningDefinition { + if_not_exists, + name, + values, + properties, + }) + } + + /// Try to consume `MAXVALUE` or `MAX_VALUE` (the Doris alternative spelling). + fn parse_doris_maxvalue_token(&mut self) -> bool { + if self.parse_keyword(Keyword::MAXVALUE) { + return true; + } + if let Token::Word(w) = &self.peek_token().token { + if w.value.eq_ignore_ascii_case("MAX_VALUE") && w.quote_style.is_none() { + self.next_token(); + return true; + } + } + false + } + + /// Normalize a single `MAXVALUE` or `MAX_VALUE` identifier inside parentheses + /// to `LessThanMaxValue`, e.g. `VALUES LESS THAN (MAXVALUE)`. + fn normalize_doris_less_than(values: Vec) -> TablePartitioningValues { + if values.len() == 1 { + if let Expr::Identifier(ref ident) = values[0] { + let upper = ident.value.to_uppercase(); + if (upper == "MAXVALUE" || upper == "MAX_VALUE") && ident.quote_style.is_none() { + return TablePartitioningValues::LessThanMaxValue; + } + } + } + TablePartitioningValues::LessThan(values) + } + + fn parse_doris_partition_value_tuple(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let values = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(values) + } else { + Ok(vec![self.parse_expr()?]) + } + } + + fn parse_optional_doris_properties(&mut self) -> Result, ParserError> { + if !self.dialect.supports_create_table_properties_clause() + || !self.peek_keyword(Keyword::PROPERTIES) + { + return Ok(vec![]); + } + + self.parse_options(Keyword::PROPERTIES) + } + /// Parse a referential action used in foreign key clauses. /// /// Recognized forms: `RESTRICT`, `CASCADE`, `SET NULL`, `NO ACTION`, `SET DEFAULT`. @@ -19893,11 +20330,10 @@ impl<'a> Parser<'a> { /// Parse a SQL LOAD statement pub fn parse_load(&mut self) -> Result { - if self.dialect.supports_load_extension() { - let extension_name = self.parse_identifier()?; - Ok(Statement::Load { extension_name }) - } else if self.parse_keyword(Keyword::DATA) && self.dialect.supports_load_data() { - let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); + if self.peek_keyword(Keyword::DATA) && self.dialect.supports_load_data() { + self.expect_keyword_is(Keyword::DATA)?; + let local = self.parse_keyword(Keyword::LOCAL); + self.expect_keyword_is(Keyword::INPATH)?; let inpath = self.parse_literal_string()?; let overwrite = self.parse_one_of_keywords(&[Keyword::OVERWRITE]).is_some(); @@ -19914,6 +20350,9 @@ impl<'a> Parser<'a> { partitioned, table_format, }) + } else if self.dialect.supports_load_extension() { + let extension_name = self.parse_identifier()?; + Ok(Statement::Load { extension_name }) } else { self.expected_ref( "`DATA` or an extension name after `LOAD`", @@ -20809,11 +21248,7 @@ impl<'a> Parser<'a> { return self.expected_ref(" another option or EOF", self.peek_token_ref()); } } - Token::EOF => break, - Token::SemiColon => { - self.prev_token(); - break; - } + Token::EOF | Token::SemiColon => break, Token::Comma => { delimiter = KeyValueOptionsDelimiter::Comma; continue; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index cb2df1ff6..bf9bfc770 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -292,6 +292,34 @@ fn parse_create_table_partition_by_after_order_by() { .expect_err("PARTITION BY with no expression should fail"); } +#[test] +fn parse_create_table_engine_order_by_is_not_doris_engine() { + match clickhouse_and_generic() + .verified_stmt("CREATE TABLE x (a INT) ENGINE = MergeTree ORDER BY a") + { + Statement::CreateTable(CreateTable { + table_model, + table_options, + order_by, + .. + }) => { + assert!(table_model.is_none()); + assert!(order_by.is_some()); + match table_options { + CreateTableOptions::Plain(options) => assert!(options.contains( + &SqlOption::NamedParenthesizedList(NamedParenthesizedList { + key: Ident::new("ENGINE"), + name: Some(Ident::new("MergeTree")), + values: vec![], + }) + )), + other => panic!("Expected Plain table options, got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +} + #[test] fn parse_insert_into_function() { clickhouse().verified_stmt(r#"INSERT INTO TABLE FUNCTION remote('localhost', default.simple_table) VALUES (100, 'inserted via remote()')"#); diff --git a/tests/sqlparser_doris.rs b/tests/sqlparser_doris.rs index 31ed99864..6340451ce 100644 --- a/tests/sqlparser_doris.rs +++ b/tests/sqlparser_doris.rs @@ -21,7 +21,9 @@ #[macro_use] mod test_utils; -use sqlparser::dialect::{Dialect, DorisDialect, GenericDialect}; +use sqlparser::ast::*; +use sqlparser::dialect::{AnsiDialect, Dialect, DorisDialect, GenericDialect}; +use sqlparser::tokenizer::Token; use test_utils::*; fn doris() -> TestedDialects { @@ -32,16 +34,34 @@ fn doris_and_generic() -> TestedDialects { TestedDialects::new(vec![Box::new(DorisDialect {}), Box::new(GenericDialect {})]) } +// ============================================================ +// Dialect gate verification +// ============================================================ + +#[test] +fn doris_and_generic_enable_doris_create_table_gates() { + let dialects = doris_and_generic(); + for dialect in dialects.dialects { + assert!(dialect.supports_create_table_key_model_clause()); + assert!(dialect.supports_create_table_range_list_partitioning_clause()); + assert!(dialect.supports_create_table_distribution_clause()); + assert!(dialect.supports_create_table_properties_clause()); + assert!(dialect.supports_column_aggregation_function_option()); + } +} + #[test] -fn doris_identifier_and_string_literal_gates() { - let dialect = DorisDialect {}; - assert_eq!(dialect.identifier_quote_style("identifier"), Some('`')); - assert!(dialect.is_delimited_identifier_start('`')); - assert!(dialect.supports_string_literal_backslash_escape()); - assert!(dialect.ignores_wildcard_escapes()); - assert!(dialect.supports_numeric_prefix()); +fn doris_only_parenthesized_auto_increment_gate() { + let doris_dialect = DorisDialect {}; + assert!(doris_dialect.supports_parenthesized_auto_increment_column_option()); + let generic_dialect = GenericDialect {}; + assert!(!generic_dialect.supports_parenthesized_auto_increment_column_option()); } +// ============================================================ +// Strings and identifiers +// ============================================================ + #[test] fn parse_doris_strings_and_identifiers() { doris().verified_stmt( @@ -49,7 +69,958 @@ fn parse_doris_strings_and_identifiers() { ); } +// ============================================================ +// CREATE TABLE - key model +// ============================================================ + +#[test] +fn tokenize_doris_create_table_keywords() { + doris().verified_stmt( + "CREATE TABLE t (k LARGEINT, v BIGINT SUM) AGGREGATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS AUTO PROPERTIES ('replication_num' = '1')", + ); +} + +#[test] +fn parse_doris_duplicate_key_hash_distribution() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_unique_key_random_distribution() { + doris_and_generic() + .verified_stmt("CREATE TABLE t (k BIGINT, v STRING) UNIQUE KEY(k) DISTRIBUTED BY RANDOM"); +} + +#[test] +fn parse_doris_buckets_auto() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS AUTO", + ); +} + +#[test] +fn parse_doris_table_properties() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8 PROPERTIES ('replication_num' = '1')", + ); +} + +#[test] +fn parse_doris_table_properties_with_double_quoted_values() { + doris().verified_stmt( + r#"CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8 PROPERTIES ("storage_medium" = "SSD", "replication_num" = "1")"#, + ); +} + +#[test] +fn parse_doris_properties_only() { + doris_and_generic() + .verified_stmt("CREATE TABLE t (k BIGINT) PROPERTIES ('replication_num' = '1')"); +} + +#[test] +fn parse_doris_engine_with_properties_only() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT) ENGINE = OLAP PROPERTIES ('replication_num' = '1')", + ); +} + +#[test] +fn parse_doris_engine_comment_properties_only() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT) ENGINE = OLAP COMMENT 'my table' PROPERTIES ('replication_num' = '1')", + ); +} + +// ============================================================ +// CREATE TABLE - ENGINE clause +// ============================================================ + +#[test] +fn parse_doris_engine_before_key_model() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT) ENGINE = OLAP DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_engine_with_comment_and_properties() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, v STRING) ENGINE = OLAP DUPLICATE KEY(k) COMMENT 'my table' DISTRIBUTED BY HASH(k) BUCKETS 8 PROPERTIES ('replication_num' = '1')", + ); +} + +#[test] +fn ast_doris_engine_is_captured() { + let sql = + "CREATE TABLE t (k BIGINT) ENGINE = OLAP DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + engine: Some(engine), + key_model: Some(km), + .. + }), + .. + }) => { + assert_eq!(engine, Ident::new("OLAP")); + assert_eq!(km.kind, TableKeyModelKind::Duplicate); + } + _ => panic!("Expected CreateTable with engine"), + } +} + +#[test] +fn ast_doris_engine_only_is_captured() { + let sql = "CREATE TABLE t (k BIGINT) ENGINE = OLAP"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + engine: Some(engine), + .. + }), + table_options, + .. + }) => { + assert_eq!(engine, Ident::new("OLAP")); + assert_eq!(table_options, CreateTableOptions::None); + } + _ => panic!("Expected CreateTable with structured engine"), + } +} + +#[test] +fn ast_generic_engine_only_stays_plain_table_option() { + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + let sql = "CREATE TABLE t (k BIGINT) ENGINE = InnoDB"; + let stmt = generic.verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model, + table_options, + .. + }) => { + assert!(table_model.is_none()); + match table_options { + CreateTableOptions::Plain(options) => assert!(options.contains( + &SqlOption::NamedParenthesizedList(NamedParenthesizedList { + key: Ident::new("ENGINE"), + name: Some(Ident::new("InnoDB")), + values: vec![], + }) + )), + other => panic!("Expected Plain table options, got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +} + +// ============================================================ +// CREATE TABLE - table COMMENT +// ============================================================ + +#[test] +fn parse_doris_table_comment_after_key_model() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT) DUPLICATE KEY(k) COMMENT 'table comment' DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn ast_doris_table_comment_is_captured() { + let sql = + "CREATE TABLE t (k BIGINT) DUPLICATE KEY(k) COMMENT 'table comment' DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + comment: Some(comment), + .. + }), + .. + }) => { + assert_eq!(comment, "table comment"); + } + _ => panic!("Expected CreateTable with table_comment"), + } +} + +#[test] +fn ast_doris_table_comment_only_is_captured() { + let sql = "CREATE TABLE t (k BIGINT) COMMENT 'table comment'"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + comment: Some(comment), + .. + }), + table_options, + .. + }) => { + assert_eq!(comment, "table comment"); + assert_eq!(table_options, CreateTableOptions::None); + } + _ => panic!("Expected CreateTable with structured table_comment"), + } +} + +#[test] +fn parse_doris_table_comment_escapes_single_quote() { + doris().one_statement_parses_to( + "CREATE TABLE t (k BIGINT) COMMENT 'it''s ok'", + "CREATE TABLE t (k BIGINT) COMMENT 'it''s ok'", + ); +} + +#[test] +fn ast_doris_table_comment_with_properties_is_captured() { + let sql = + "CREATE TABLE t (k BIGINT) COMMENT 'table comment' PROPERTIES ('replication_num' = '1')"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + comment: Some(comment), + properties, + .. + }), + .. + }) => { + assert_eq!(comment, "table comment"); + assert_eq!(properties.len(), 1); + } + _ => panic!("Expected CreateTable with table_comment and properties"), + } +} + +// ============================================================ +// CREATE TABLE - key model ORDER BY +// ============================================================ + +#[test] +fn parse_doris_unique_key_order_by() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, c BIGINT) UNIQUE KEY(k) ORDER BY(c) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_key_model_order_by_is_parser_level_loose() { + for key_model in ["UNIQUE", "DUPLICATE", "AGGREGATE"] { + let sql = format!( + "CREATE TABLE t (k BIGINT, c BIGINT) {key_model} KEY(k) ORDER BY(c) DISTRIBUTED BY HASH(k) BUCKETS 8" + ); + let stmt = doris().verified_stmt(&sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + key_model: + Some(TableKeyModel { + order_by: Some(order_by), + .. + }), + .. + }), + .. + }) => assert_eq!(order_by, vec![Ident::new("c")]), + _ => panic!("Expected structured key model ORDER BY for {key_model}"), + } + } +} + +#[test] +fn doris_cluster_by_after_key_model_is_out_of_scope() { + let sql = + "CREATE TABLE t (k BIGINT) DUPLICATE KEY(k) CLUSTER BY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + assert!(doris().parse_sql_statements(sql).is_err()); +} + +#[test] +fn ast_doris_key_model_order_by() { + let sql = + "CREATE TABLE t (k BIGINT, c BIGINT) UNIQUE KEY(k) ORDER BY(c) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + key_model: Some(km), + .. + }), + .. + }) => { + assert_eq!(km.kind, TableKeyModelKind::Unique); + assert_eq!(km.columns, vec![Ident::new("k")]); + assert_eq!(km.order_by, Some(vec![Ident::new("c")])); + } + _ => panic!("Expected CreateTable with key_model"), + } +} + +// ============================================================ +// CREATE TABLE - key model AST shape assertions +// ============================================================ + +#[test] +fn ast_doris_key_model_is_structured() { + let sql = + "CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + key_model: Some(km), + .. + }), + .. + }) => { + assert_eq!(km.kind, TableKeyModelKind::Duplicate); + assert_eq!(km.columns, vec![Ident::new("k")]); + } + _ => panic!("Expected CreateTable with key_model"), + } +} + +#[test] +fn ast_doris_aggregate_key_model() { + let sql = + "CREATE TABLE t (k BIGINT, v BIGINT SUM) AGGREGATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + key_model: Some(km), + .. + }), + .. + }) => { + assert_eq!(km.kind, TableKeyModelKind::Aggregate); + assert_eq!(km.columns, vec![Ident::new("k")]); + } + _ => panic!("Expected CreateTable with key_model"), + } +} + +#[test] +fn ast_doris_unique_key_model() { + let sql = "CREATE TABLE t (k BIGINT, v STRING) UNIQUE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + key_model: Some(km), + .. + }), + .. + }) => { + assert_eq!(km.kind, TableKeyModelKind::Unique); + assert_eq!(km.columns, vec![Ident::new("k")]); + } + _ => panic!("Expected CreateTable with key_model"), + } +} + +// ============================================================ +// CREATE TABLE - distribution AST shape assertions +// ============================================================ + +#[test] +fn ast_doris_distribution_hash_is_structured() { + let sql = + "CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + distribution: Some(TableDistribution::Hash { columns, buckets }), + .. + }), + .. + }) => { + assert_eq!(columns, vec![Ident::new("k")]); + assert_eq!(buckets, Some(BucketCount::Count(8))); + } + _ => panic!("Expected CreateTable with Hash distribution"), + } +} + +#[test] +fn ast_doris_distribution_random_is_structured() { + let sql = "CREATE TABLE t (k BIGINT, v STRING) UNIQUE KEY(k) DISTRIBUTED BY RANDOM"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + distribution: Some(TableDistribution::Random { buckets }), + .. + }), + .. + }) => { + assert_eq!(buckets, None); + } + _ => panic!("Expected CreateTable with Random distribution"), + } +} + +// ============================================================ +// CREATE TABLE - partition +// ============================================================ + +#[test] +fn parse_doris_range_partition() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION p1 VALUES LESS THAN ('2024-01-01')) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_list_partition() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY LIST(dt) (PARTITION p1 VALUES IN (('2024-01-01'), ('2024-01-02'))) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_auto_partition_skeleton() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) AUTO PARTITION BY RANGE(date_trunc(dt, 'day')) DISTRIBUTED BY RANDOM", + ); +} + +#[test] +fn parse_doris_partition_values_less_than_maxvalue() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION p1 VALUES LESS THAN ('2024-01-01'), PARTITION pmax VALUES LESS THAN MAXVALUE) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_partition_with_properties() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION p1 VALUES LESS THAN ('2024-01-01') PROPERTIES ('storage_medium' = 'SSD')) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_list_partition_single_values() { + doris_and_generic().one_statement_parses_to( + "CREATE TABLE t (k BIGINT, city STRING) DUPLICATE KEY(k) PARTITION BY LIST(city) (PARTITION p1 VALUES IN ('Beijing', 'Shanghai')) DISTRIBUTED BY HASH(k) BUCKETS 8", + "CREATE TABLE t (k BIGINT, city STRING) DUPLICATE KEY(k) PARTITION BY LIST(city) (PARTITION p1 VALUES IN (('Beijing'), ('Shanghai'))) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_multi_column_range_partition() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k1 INT, k2 INT, v INT) DUPLICATE KEY(k1, k2) PARTITION BY RANGE(k1, k2) (PARTITION p1 VALUES LESS THAN ('100', '200')) DISTRIBUTED BY HASH(k1) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_multi_column_list_partition() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k1 INT, k2 INT, v INT) DUPLICATE KEY(k1, k2) PARTITION BY LIST(k1, k2) (PARTITION p1 VALUES IN (('1', '2'))) DISTRIBUTED BY HASH(k1) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_auto_partition_by_list_multi_column() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k1 INT, k2 INT, v INT) DUPLICATE KEY(k1, k2) AUTO PARTITION BY LIST(k1, k2) DISTRIBUTED BY HASH(k1) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_partition_fixed_range() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION p1 VALUES [('2024-01-01'), ('2024-02-01'))) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_partition_batch_range() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (FROM ('2024-01-01') TO ('2024-02-01') INTERVAL 1 DAY) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_partition_batch_range_no_unit() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt INT) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (FROM ('1') TO ('100') INTERVAL 10) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +// ============================================================ +// CREATE TABLE - partition IF NOT EXISTS +// ============================================================ + +#[test] +fn parse_doris_partition_if_not_exists() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION IF NOT EXISTS p1 VALUES LESS THAN ('2024-01-01')) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn ast_doris_partition_if_not_exists() { + let sql = "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION IF NOT EXISTS p1 VALUES LESS THAN ('2024-01-01')) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + partitioning: Some(dp), + .. + }), + .. + }) => { + assert_eq!(dp.partitions.len(), 1); + match &dp.partitions[0] { + TablePartitioningEntry::Definition(def) => { + assert!(def.if_not_exists); + assert_eq!(def.name, Ident::new("p1")); + } + _ => panic!("Expected Definition entry"), + } + } + _ => panic!("Expected CreateTable with doris_partition"), + } +} + +// ============================================================ +// CREATE TABLE - MAXVALUE / MAX_VALUE normalization +// ============================================================ + +#[test] +fn parse_doris_max_value_underscore() { + doris_and_generic().one_statement_parses_to( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION pmax VALUES LESS THAN MAX_VALUE) DISTRIBUTED BY HASH(k) BUCKETS 8", + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION pmax VALUES LESS THAN MAXVALUE) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_maxvalue_parenthesized() { + doris_and_generic().one_statement_parses_to( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION pmax VALUES LESS THAN (MAXVALUE)) DISTRIBUTED BY HASH(k) BUCKETS 8", + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION pmax VALUES LESS THAN MAXVALUE) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_max_value_parenthesized_underscore() { + doris_and_generic().one_statement_parses_to( + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION pmax VALUES LESS THAN (MAX_VALUE)) DISTRIBUTED BY HASH(k) BUCKETS 8", + "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION pmax VALUES LESS THAN MAXVALUE) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +// ============================================================ +// CREATE TABLE - partition AST shape assertions +// ============================================================ + +#[test] +fn ast_doris_partition_range_is_structured() { + let sql = "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION p1 VALUES LESS THAN ('2024-01-01')) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + partitioning: Some(dp), + .. + }), + .. + }) => { + assert!(!dp.auto); + assert_eq!(dp.kind, TablePartitioningKind::Range); + assert_eq!(dp.columns.len(), 1); + assert_eq!(dp.partitions.len(), 1); + match &dp.partitions[0] { + TablePartitioningEntry::Definition(def) => { + assert_eq!(def.name, Ident::new("p1")); + match &def.values { + TablePartitioningValues::LessThan(values) => { + assert_eq!(values.len(), 1); + } + _ => panic!("Expected LessThan partition values"), + } + } + _ => panic!("Expected Definition entry"), + } + } + _ => panic!("Expected CreateTable with doris_partition"), + } +} + +#[test] +fn ast_doris_partition_maxvalue_is_structured() { + let sql = "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION pmax VALUES LESS THAN MAXVALUE) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + partitioning: Some(dp), + .. + }), + .. + }) => { + assert_eq!(dp.partitions.len(), 1); + match &dp.partitions[0] { + TablePartitioningEntry::Definition(def) => { + assert_eq!(def.name, Ident::new("pmax")); + assert_eq!(def.values, TablePartitioningValues::LessThanMaxValue); + } + _ => panic!("Expected Definition entry"), + } + } + _ => panic!("Expected CreateTable with doris_partition"), + } +} + +#[test] +fn ast_doris_auto_partition_is_structured() { + let sql = "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) AUTO PARTITION BY RANGE(date_trunc(dt, 'day')) DISTRIBUTED BY RANDOM"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + partitioning: Some(dp), + .. + }), + .. + }) => { + assert!(dp.auto); + assert_eq!(dp.kind, TablePartitioningKind::Range); + assert!(dp.partitions.is_empty()); + } + _ => panic!("Expected CreateTable with auto doris_partition"), + } +} + +#[test] +fn ast_doris_multi_column_partition() { + let sql = "CREATE TABLE t (k1 INT, k2 INT, v INT) DUPLICATE KEY(k1, k2) PARTITION BY RANGE(k1, k2) (PARTITION p1 VALUES LESS THAN ('100', '200')) DISTRIBUTED BY HASH(k1) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + partitioning: Some(dp), + .. + }), + .. + }) => { + assert!(!dp.auto); + assert_eq!(dp.kind, TablePartitioningKind::Range); + assert_eq!(dp.columns.len(), 2); + assert_eq!(dp.partitions.len(), 1); + } + _ => panic!("Expected CreateTable with multi-column doris_partition"), + } +} + +#[test] +fn ast_doris_fixed_range_partition() { + let sql = "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (PARTITION p1 VALUES [('2024-01-01'), ('2024-02-01'))) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + partitioning: Some(dp), + .. + }), + .. + }) => { + assert_eq!(dp.partitions.len(), 1); + match &dp.partitions[0] { + TablePartitioningEntry::Definition(def) => { + assert_eq!(def.name, Ident::new("p1")); + match &def.values { + TablePartitioningValues::FixedRange { start, end } => { + assert_eq!(start.len(), 1); + assert_eq!(end.len(), 1); + } + _ => panic!("Expected FixedRange partition values"), + } + } + _ => panic!("Expected Definition entry"), + } + } + _ => panic!("Expected CreateTable with doris_partition"), + } +} + +#[test] +fn ast_doris_batch_range_partition() { + let sql = "CREATE TABLE t (k BIGINT, dt DATE) DUPLICATE KEY(k) PARTITION BY RANGE(dt) (FROM ('2024-01-01') TO ('2024-02-01') INTERVAL 1 DAY) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + partitioning: Some(dp), + .. + }), + .. + }) => { + assert_eq!(dp.partitions.len(), 1); + match &dp.partitions[0] { + TablePartitioningEntry::BatchRange { + from, + to, + interval_value, + interval_unit, + } => { + assert_eq!(from.len(), 1); + assert_eq!(to.len(), 1); + assert_eq!(*interval_value, 1); + assert_eq!(interval_unit.as_ref().unwrap(), &Ident::new("DAY")); + } + _ => panic!("Expected BatchRange entry"), + } + } + _ => panic!("Expected CreateTable with doris_partition"), + } +} + +// ============================================================ +// CREATE TABLE - column options: AUTO_INCREMENT +// ============================================================ + +#[test] +fn parse_doris_auto_increment_column() { + doris().verified_stmt( + "CREATE TABLE t (id BIGINT AUTO_INCREMENT(100), name STRING) DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_auto_increment_no_start_value() { + doris().verified_stmt( + "CREATE TABLE t (id BIGINT AUTO_INCREMENT, name STRING) DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 8", + ); +} + +#[test] +fn parse_generic_auto_increment_uses_unified_ast() { + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + let sql = "CREATE TABLE t (id BIGINT AUTO_INCREMENT)"; + let stmt = generic.verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns[0].options[0].option, + ColumnOption::AutoIncrement(None) + ); + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn ast_doris_auto_increment_with_start() { + let sql = "CREATE TABLE t (id BIGINT AUTO_INCREMENT(100), name STRING) DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + let id_col = &columns[0]; + assert_eq!(id_col.name, Ident::new("id")); + let auto_inc = id_col + .options + .iter() + .find(|o| matches!(o.option, ColumnOption::AutoIncrement(_))); + assert!(auto_inc.is_some()); + match &auto_inc.unwrap().option { + ColumnOption::AutoIncrement(Some(100)) => {} + other => panic!("Expected AutoIncrement(Some(100)), got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn ast_doris_auto_increment_without_start() { + let sql = "CREATE TABLE t (id BIGINT AUTO_INCREMENT, name STRING) DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + let id_col = &columns[0]; + let auto_inc = id_col + .options + .iter() + .find(|o| matches!(o.option, ColumnOption::AutoIncrement(_))); + assert!(auto_inc.is_some()); + match &auto_inc.unwrap().option { + ColumnOption::AutoIncrement(None) => {} + other => panic!("Expected AutoIncrement(None), got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +} + +// ============================================================ +// CREATE TABLE - column options: aggregate functions +// ============================================================ + +#[test] +fn parse_doris_aggregate_column_options() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, v BIGINT SUM, bitmap_col BITMAP BITMAP_UNION) AGGREGATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn parse_doris_all_aggregate_column_options() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, v1 BIGINT SUM, v2 BIGINT MAX, v3 BIGINT MIN, v4 BIGINT REPLACE, v5 HLL HLL_UNION, v6 BITMAP BITMAP_UNION, v7 QUANTILESTATE QUANTILE_UNION) AGGREGATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8", + ); +} + +#[test] +fn ast_doris_aggregate_column_option_is_dialect_specific() { + let sql = + "CREATE TABLE t (k BIGINT, v BIGINT SUM) AGGREGATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + let v_col = &columns[1]; + assert_eq!(v_col.name, Ident::new("v")); + let agg_opt = v_col + .options + .iter() + .find(|o| matches!(o.option, ColumnOption::DialectSpecific(_))); + assert!(agg_opt.is_some()); + match &agg_opt.unwrap().option { + ColumnOption::DialectSpecific(tokens) => { + assert_eq!(tokens.len(), 1); + assert_eq!(tokens[0], Token::make_keyword("SUM")); + } + other => panic!("Expected DialectSpecific, got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +} + +// ============================================================ +// CREATE TABLE - PROPERTIES AST shape +// ============================================================ + +#[test] +fn ast_doris_properties_is_structured() { + let sql = "CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8 PROPERTIES ('replication_num' = '1', 'storage_medium' = 'SSD')"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { + table_model: Some(TableModel { properties, .. }), + .. + }) => { + assert_eq!(properties.len(), 2); + } + _ => panic!("Expected CreateTable"), + } +} + +// ============================================================ +// CREATE TABLE - full official clause order +// ============================================================ + +#[test] +fn parse_doris_full_clause_order() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k BIGINT, dt DATE, v STRING) ENGINE = OLAP DUPLICATE KEY(k) COMMENT 'full example' PARTITION BY RANGE(dt) (PARTITION p1 VALUES LESS THAN ('2024-01-01')) DISTRIBUTED BY HASH(k) BUCKETS 8 PROPERTIES ('replication_num' = '1')", + ); +} + +#[test] +fn parse_doris_engine_without_key_model() { + doris_and_generic() + .verified_stmt("CREATE TABLE t (k BIGINT) ENGINE = OLAP DISTRIBUTED BY HASH(k) BUCKETS 8"); +} + +#[test] +fn doris_keywords_can_still_be_common_identifiers_and_aliases() { + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + generic.verified_stmt( + "SELECT 1 AS properties, 2 AS less, 3 AS than, 4 AS random, 5 AS largeint, 6 AS hll_union, 7 AS bitmap_union", + ); + generic.verified_stmt( + "CREATE TABLE properties (less INT, than INT, random INT, largeint INT, hll_union INT, bitmap_union INT)", + ); + generic.verified_stmt( + "SELECT \"PROPERTIES\", \"LESS\", \"THAN\", \"RANDOM\", \"LARGEINT\", \"HLL_UNION\", \"BITMAP_UNION\" FROM \"PROPERTIES\"", + ); +} + +// ============================================================ +// Negative tests: ANSI dialect rejects Doris-specific syntax +// ============================================================ + +#[test] +fn ansi_rejects_doris_key_model() { + let ansi = TestedDialects::new(vec![Box::new(AnsiDialect {})]); + let sql = + "CREATE TABLE t (k BIGINT, v STRING) DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 8"; + assert!(ansi.parse_sql_statements(sql).is_err()); +} + +#[test] +fn generic_engine_without_model_marker_remains_plain_options() { + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + let sql = "CREATE TABLE t (k BIGINT) ENGINE = InnoDB"; + match generic.verified_stmt(sql) { + Statement::CreateTable(CreateTable { + table_model, + table_options, + .. + }) => { + assert!(table_model.is_none()); + assert!(matches!(table_options, CreateTableOptions::Plain(_))); + } + _ => panic!("Expected CreateTable"), + } +} + #[test] -fn doris_and_generic_parse_common_sql_identically() { - doris_and_generic().verified_stmt("SELECT 1 AS properties FROM t"); +fn doris_engine_without_model_marker_creates_table_model() { + let sql = "CREATE TABLE t (k BIGINT) ENGINE = OLAP"; + match doris().verified_stmt(sql) { + Statement::CreateTable(CreateTable { + table_model: + Some(TableModel { + engine: Some(engine), + .. + }), + table_options, + .. + }) => { + assert_eq!(engine, Ident::new("OLAP")); + assert_eq!(table_options, CreateTableOptions::None); + } + _ => panic!("Expected CreateTable with table_model engine"), + } } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 86c765813..ff2d269fa 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -763,6 +763,7 @@ fn test_duckdb_union_datatype() { order_by: Default::default(), partition_by: Default::default(), cluster_by: Default::default(), + table_model: Default::default(), clustered_by: Default::default(), inherits: Default::default(), partition_of: Default::default(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 6e866746d..290995721 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -1980,6 +1980,7 @@ fn parse_create_table_with_valid_options() { order_by: None, partition_by: None, cluster_by: None, + table_model: None, clustered_by: None, inherits: None, partition_of: None, @@ -2160,6 +2161,7 @@ fn parse_create_table_with_identity_column() { order_by: None, partition_by: None, cluster_by: None, + table_model: None, clustered_by: None, inherits: None, partition_of: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index a61dd9afe..04e829e0d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5692,6 +5692,7 @@ fn parse_create_table_with_partition_by() { match pg_and_generic().verified_stmt(sql) { Statement::CreateTable(create_table) => { assert_eq!("t1", create_table.name.to_string()); + assert!(create_table.table_model.is_none()); assert_eq!( vec![ ColumnDef { @@ -6721,6 +6722,7 @@ fn parse_trigger_related_functions() { order_by: None, partition_by: None, cluster_by: None, + table_model: None, clustered_by: None, inherits: None, partition_of: None,