From bffa3fd222b3ba236d5617cc0e0fcb73b60817d9 Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Thu, 18 Jun 2026 12:51:31 +0800 Subject: [PATCH 1/3] MySQL/Generic: Structure AUTO_INCREMENT column option --- src/ast/ddl.rs | 13 +++++++++++++ src/ast/spans.rs | 2 ++ src/parser/mod.rs | 4 +--- tests/sqlparser_mysql.rs | 9 ++------- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 444fc1c01..298976729 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -2001,6 +2001,12 @@ pub enum ColumnOption { /// ``` /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/invisible-columns.html Invisible, + /// Column-level auto-increment option with an optional start value. + /// + /// MySQL and Generic use this unified AST node without a start value. + /// Dialects that support a start value can use `Some`. + /// Syntax: `AUTO_INCREMENT` or `AUTO_INCREMENT()`. + AutoIncrement(Option), } impl From for ColumnOption { @@ -2149,6 +2155,13 @@ impl fmt::Display for ColumnOption { Invisible => { write!(f, "INVISIBLE") } + AutoIncrement(start) => { + f.write_str("AUTO_INCREMENT")?; + if let Some(start) = start { + write!(f, "({start})")?; + } + Ok(()) + } } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index cd4034c7d..963265586 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -822,6 +822,7 @@ impl Spanned for RaiseStatementValue { /// - [ColumnOption::PrimaryKey] /// - [ColumnOption::Unique] /// - [ColumnOption::DialectSpecific] +/// - [ColumnOption::AutoIncrement] /// - [ColumnOption::Generated] impl Spanned for ColumnOption { fn span(&self) -> Span { @@ -849,6 +850,7 @@ impl Spanned for ColumnOption { ColumnOption::Tags(..) => Span::empty(), ColumnOption::Srid(..) => Span::empty(), ColumnOption::Invisible => Span::empty(), + ColumnOption::AutoIncrement(_) => Span::empty(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d36e1adcd..98e0747e0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9657,9 +9657,7 @@ impl<'a> Parser<'a> { && dialect_of!(self is MySqlDialect | GenericDialect) { // Support AUTO_INCREMENT for MySQL - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTO_INCREMENT"), - ]))) + Ok(Some(ColumnOption::AutoIncrement(None))) } else if self.parse_keyword(Keyword::AUTOINCREMENT) && dialect_of!(self is SQLiteDialect | GenericDialect) { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f513d3670..f4773f293 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -27,7 +27,6 @@ use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, MySqlDialect}; use sqlparser::parser::{ParserError, ParserOptions}; use sqlparser::tokenizer::Span; -use sqlparser::tokenizer::Token; use test_utils::*; #[macro_use] @@ -661,9 +660,7 @@ fn parse_create_table_auto_increment() { }, ColumnOptionDef { name: None, - option: ColumnOption::DialectSpecific(vec![Token::make_keyword( - "AUTO_INCREMENT" - )]), + option: ColumnOption::AutoIncrement(None), }, ], }], @@ -770,9 +767,7 @@ fn parse_create_table_primary_and_unique_key() { }, ColumnOptionDef { name: None, - option: ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTO_INCREMENT") - ]), + option: ColumnOption::AutoIncrement(None), }, ], }, From 8f0906f4f8399db5e93f80d73729fc29c3b19f65 Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Thu, 18 Jun 2026 12:53:44 +0800 Subject: [PATCH 2/3] Doris: Add dialect skeleton --- examples/cli.rs | 1 + src/dialect/doris.rs | 53 ++++++++++++++++++++++++++++++++++++++ src/dialect/mod.rs | 5 ++++ src/test_utils.rs | 1 + tests/sqlparser_doris.rs | 55 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 115 insertions(+) create mode 100644 src/dialect/doris.rs create mode 100644 tests/sqlparser_doris.rs diff --git a/examples/cli.rs b/examples/cli.rs index 3c4299b20..e5643dc0b 100644 --- a/examples/cli.rs +++ b/examples/cli.rs @@ -52,6 +52,7 @@ $ cargo run --example cli - [--dialectname] "--postgres" => Box::new(PostgreSqlDialect {}), "--ms" => Box::new(MsSqlDialect {}), "--mysql" => Box::new(MySqlDialect {}), + "--doris" => Box::new(DorisDialect {}), "--snowflake" => Box::new(SnowflakeDialect {}), "--hive" => Box::new(HiveDialect {}), "--redshift" => Box::new(RedshiftSqlDialect {}), diff --git a/src/dialect/doris.rs b/src/dialect/doris.rs new file mode 100644 index 000000000..17911fd7a --- /dev/null +++ b/src/dialect/doris.rs @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::dialect::Dialect; + +/// A [`Dialect`] for [Apache Doris](https://doris.apache.org/). +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +pub struct DorisDialect {} + +impl Dialect for DorisDialect { + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '`' + } + + fn identifier_quote_style(&self, _identifier: &str) -> Option { + Some('`') + } + + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_ascii_alphabetic() || ch == '_' || !ch.is_ascii() + } + + fn is_identifier_part(&self, ch: char) -> bool { + self.is_identifier_start(ch) || ch.is_ascii_digit() + } + + fn supports_string_literal_backslash_escape(&self) -> bool { + true + } + + fn ignores_wildcard_escapes(&self) -> bool { + true + } + + fn supports_numeric_prefix(&self) -> bool { + true + } +} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 4b791d8ed..deda44583 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -19,6 +19,7 @@ mod ansi; mod bigquery; mod clickhouse; mod databricks; +mod doris; mod duckdb; mod generic; mod hive; @@ -43,6 +44,7 @@ pub use self::ansi::AnsiDialect; pub use self::bigquery::BigQueryDialect; pub use self::clickhouse::ClickHouseDialect; pub use self::databricks::DatabricksDialect; +pub use self::doris::DorisDialect; pub use self::duckdb::DuckDbDialect; pub use self::generic::GenericDialect; pub use self::hive::HiveDialect; @@ -1883,6 +1885,7 @@ pub fn dialect_from_str(dialect_name: impl AsRef) -> Option Some(Box::new(BigQueryDialect)), "ansi" => Some(Box::new(AnsiDialect {})), "duckdb" => Some(Box::new(DuckDbDialect {})), + "doris" => Some(Box::new(DorisDialect {})), "databricks" => Some(Box::new(DatabricksDialect {})), "spark" | "sparksql" => Some(Box::new(SparkSqlDialect {})), "oracle" => Some(Box::new(OracleDialect {})), @@ -1938,6 +1941,8 @@ mod tests { assert!(parse_dialect("ANSI").is::()); assert!(parse_dialect("duckdb").is::()); assert!(parse_dialect("DuckDb").is::()); + assert!(parse_dialect("doris").is::()); + assert!(parse_dialect("Doris").is::()); assert!(parse_dialect("DataBricks").is::()); assert!(parse_dialect("databricks").is::()); assert!(parse_dialect("teradata").is::()); diff --git a/src/test_utils.rs b/src/test_utils.rs index c4d1d0db2..e19f28d20 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -286,6 +286,7 @@ pub fn all_dialects() -> TestedDialects { Box::new(HiveDialect {}), Box::new(RedshiftSqlDialect {}), Box::new(MySqlDialect {}), + Box::new(DorisDialect {}), Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), Box::new(DuckDbDialect {}), diff --git a/tests/sqlparser_doris.rs b/tests/sqlparser_doris.rs new file mode 100644 index 000000000..31ed99864 --- /dev/null +++ b/tests/sqlparser_doris.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![warn(clippy::all)] +//! Test SQL syntax specific to Apache Doris. + +#[macro_use] +mod test_utils; + +use sqlparser::dialect::{Dialect, DorisDialect, GenericDialect}; +use test_utils::*; + +fn doris() -> TestedDialects { + TestedDialects::new(vec![Box::new(DorisDialect {})]) +} + +fn doris_and_generic() -> TestedDialects { + TestedDialects::new(vec![Box::new(DorisDialect {}), Box::new(GenericDialect {})]) +} + +#[test] +fn doris_identifier_and_string_literal_gates() { + let dialect = DorisDialect {}; + assert_eq!(dialect.identifier_quote_style("identifier"), Some('`')); + assert!(dialect.is_delimited_identifier_start('`')); + assert!(dialect.supports_string_literal_backslash_escape()); + assert!(dialect.ignores_wildcard_escapes()); + assert!(dialect.supports_numeric_prefix()); +} + +#[test] +fn parse_doris_strings_and_identifiers() { + doris().verified_stmt( + r#"SELECT "double quoted string", 'single quoted string', `select` FROM `db`.`table`"#, + ); +} + +#[test] +fn doris_and_generic_parse_common_sql_identically() { + doris_and_generic().verified_stmt("SELECT 1 AS properties FROM t"); +} From db1262f34dff4aeb2f718743124db07bf31e289a Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Thu, 18 Jun 2026 13:45:29 +0800 Subject: [PATCH 3/3] Doris: Add CREATE TABLE column options --- src/dialect/doris.rs | 8 +++ src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 12 ++++ src/keywords.rs | 4 ++ src/parser/mod.rs | 58 +++++++++++++++++-- tests/sqlparser_doris.rs | 118 +++++++++++++++++++++++++++++++++++++++ 6 files changed, 198 insertions(+), 6 deletions(-) diff --git a/src/dialect/doris.rs b/src/dialect/doris.rs index 17911fd7a..872ea6999 100644 --- a/src/dialect/doris.rs +++ b/src/dialect/doris.rs @@ -50,4 +50,12 @@ impl Dialect for DorisDialect { fn supports_numeric_prefix(&self) -> bool { true } + + fn supports_parenthesized_auto_increment_column_option(&self) -> bool { + true + } + + fn supports_column_aggregation_function_option(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 7ee826c25..076f070c6 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -173,6 +173,10 @@ impl Dialect for GenericDialect { true } + fn supports_column_aggregation_function_option(&self) -> bool { + true + } + fn supports_named_fn_args_with_assignment_operator(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index deda44583..b5064b736 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1228,6 +1228,18 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `AUTO_INCREMENT` with an optional + /// parenthesized start value in column definitions. + fn supports_parenthesized_auto_increment_column_option(&self) -> bool { + false + } + + /// Returns true if the dialect supports aggregate column option functions in + /// `CREATE TABLE`, such as `SUM`, `REPLACE`, or `BITMAP_UNION`. + fn supports_column_aggregation_function_option(&self) -> bool { + false + } + /// Returns true if the dialect accepts a comma-separated list of table-level /// options placed between the table name and the column-list parenthesis, e.g. /// diff --git a/src/keywords.rs b/src/keywords.rs index 0ff32948f..c12ab215b 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -163,6 +163,7 @@ define_keywords!( BIND, BINDING, BIT, + BITMAP_UNION, BLANKSASNULL, BLOB, BLOCK, @@ -489,6 +490,7 @@ define_keywords!( HIGH_PRIORITY, HISTORY, HIVEVAR, + HLL_UNION, HOLD, HOSTS, HOUR, @@ -573,6 +575,7 @@ define_keywords!( LAMBDA, LANGUAGE, LARGE, + LARGEINT, LAST, LAST_VALUE, LATERAL, @@ -828,6 +831,7 @@ define_keywords!( PURGE, PUT, QUALIFY, + QUANTILE_UNION, QUARTER, QUERIES, QUERY, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 98e0747e0..c3e07b811 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9653,11 +9653,25 @@ impl<'a> Parser<'a> { } .into(), )) - } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - // Support AUTO_INCREMENT for MySQL - Ok(Some(ColumnOption::AutoIncrement(None))) + } else if self.parse_keyword(Keyword::AUTO_INCREMENT) { + if self + .dialect + .supports_parenthesized_auto_increment_column_option() + { + let start = if self.consume_token(&Token::LParen) { + let value = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Some(value) + } else { + None + }; + Ok(Some(ColumnOption::AutoIncrement(start))) + } else if dialect_of!(self is MySqlDialect | GenericDialect) { + Ok(Some(ColumnOption::AutoIncrement(None))) + } else { + self.prev_token(); + Ok(None) + } } else if self.parse_keyword(Keyword::AUTOINCREMENT) && dialect_of!(self is SQLiteDialect | GenericDialect) { @@ -9738,8 +9752,40 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::INVISIBLE) { Ok(Some(ColumnOption::Invisible)) } else { - Ok(None) + self.parse_optional_doris_aggregate_column_option() + } + } + + fn parse_optional_doris_aggregate_column_option( + &mut self, + ) -> Result, ParserError> { + if !self.dialect.supports_column_aggregation_function_option() { + return Ok(None); } + + let token = self.peek_token(); + let option_name = match token.token { + Token::Word(word) + if matches!( + word.keyword, + Keyword::SUM + | Keyword::MAX + | Keyword::MIN + | Keyword::REPLACE + | Keyword::HLL_UNION + | Keyword::BITMAP_UNION + | Keyword::QUANTILE_UNION + ) => + { + word.value + } + _ => return Ok(None), + }; + + self.next_token(); + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword(&option_name), + ]))) } pub(crate) fn parse_tag(&mut self) -> Result { diff --git a/tests/sqlparser_doris.rs b/tests/sqlparser_doris.rs index 31ed99864..89afab50c 100644 --- a/tests/sqlparser_doris.rs +++ b/tests/sqlparser_doris.rs @@ -21,7 +21,9 @@ #[macro_use] mod test_utils; +use sqlparser::ast::*; use sqlparser::dialect::{Dialect, DorisDialect, GenericDialect}; +use sqlparser::tokenizer::Token; use test_utils::*; fn doris() -> TestedDialects { @@ -40,6 +42,15 @@ fn doris_identifier_and_string_literal_gates() { assert!(dialect.supports_string_literal_backslash_escape()); assert!(dialect.ignores_wildcard_escapes()); assert!(dialect.supports_numeric_prefix()); + assert!(dialect.supports_parenthesized_auto_increment_column_option()); + assert!(dialect.supports_column_aggregation_function_option()); +} + +#[test] +fn generic_supports_doris_aggregate_column_options_only() { + let dialect = GenericDialect {}; + assert!(!dialect.supports_parenthesized_auto_increment_column_option()); + assert!(dialect.supports_column_aggregation_function_option()); } #[test] @@ -53,3 +64,110 @@ fn parse_doris_strings_and_identifiers() { fn doris_and_generic_parse_common_sql_identically() { doris_and_generic().verified_stmt("SELECT 1 AS properties FROM t"); } + +#[test] +fn parse_doris_auto_increment_column() { + doris().verified_stmt("CREATE TABLE t (id BIGINT AUTO_INCREMENT(100), name STRING)"); +} + +#[test] +fn parse_doris_auto_increment_no_start_value() { + doris().verified_stmt("CREATE TABLE t (id BIGINT AUTO_INCREMENT, name STRING)"); +} + +#[test] +fn parse_generic_auto_increment_uses_unified_ast() { + let generic = TestedDialects::new(vec![Box::new(GenericDialect {})]); + let sql = "CREATE TABLE t (id BIGINT AUTO_INCREMENT)"; + let stmt = generic.verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!( + columns[0].options[0].option, + ColumnOption::AutoIncrement(None) + ); + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn ast_doris_auto_increment_with_start() { + let sql = "CREATE TABLE t (id BIGINT AUTO_INCREMENT(100), name STRING)"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + let id_col = &columns[0]; + assert_eq!(id_col.name, Ident::new("id")); + let auto_inc = id_col + .options + .iter() + .find(|o| matches!(o.option, ColumnOption::AutoIncrement(_))); + assert!(auto_inc.is_some()); + match &auto_inc.unwrap().option { + ColumnOption::AutoIncrement(Some(100)) => {} + other => panic!("Expected AutoIncrement(Some(100)), got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn ast_doris_auto_increment_without_start() { + let sql = "CREATE TABLE t (id BIGINT AUTO_INCREMENT, name STRING)"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + let id_col = &columns[0]; + let auto_inc = id_col + .options + .iter() + .find(|o| matches!(o.option, ColumnOption::AutoIncrement(_))); + assert!(auto_inc.is_some()); + match &auto_inc.unwrap().option { + ColumnOption::AutoIncrement(None) => {} + other => panic!("Expected AutoIncrement(None), got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_doris_aggregate_column_options() { + doris_and_generic() + .verified_stmt("CREATE TABLE t (k BIGINT, v BIGINT SUM, bitmap_col BITMAP BITMAP_UNION)"); +} + +#[test] +fn parse_doris_all_aggregate_column_options() { + doris_and_generic().verified_stmt( + "CREATE TABLE t (k LARGEINT, v1 BIGINT SUM, v2 BIGINT MAX, v3 BIGINT MIN, v4 BIGINT REPLACE, v5 HLL HLL_UNION, v6 BITMAP BITMAP_UNION, v7 QUANTILESTATE QUANTILE_UNION)", + ); +} + +#[test] +fn ast_doris_aggregate_column_option_is_dialect_specific() { + let sql = "CREATE TABLE t (k BIGINT, v BIGINT SUM)"; + let stmt = doris().verified_stmt(sql); + match stmt { + Statement::CreateTable(CreateTable { columns, .. }) => { + let v_col = &columns[1]; + assert_eq!(v_col.name, Ident::new("v")); + let agg_opt = v_col + .options + .iter() + .find(|o| matches!(o.option, ColumnOption::DialectSpecific(_))); + assert!(agg_opt.is_some()); + match &agg_opt.unwrap().option { + ColumnOption::DialectSpecific(tokens) => { + assert_eq!(tokens.len(), 1); + assert_eq!(tokens[0], Token::make_keyword("SUM")); + } + other => panic!("Expected DialectSpecific, got {:?}", other), + } + } + _ => panic!("Expected CreateTable"), + } +}