Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 63 additions & 1 deletion src/ast/ddl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};

use crate::ast::value::escape_single_quote_string;
use crate::ast::{display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName};
use crate::ast::{
display_comma_separated, display_separated, DataType, Expr, Ident, ObjectName, SequenceOptions,
};
use crate::tokenizer::Token;

/// An `ALTER TABLE` (`Statement::AlterTable`) operation
Expand Down Expand Up @@ -575,6 +577,13 @@ pub enum ColumnOption {
CharacterSet(ObjectName),
Comment(String),
OnUpdate(Expr),
/// `Generated`s are modifiers that follow a column definition in a `CREATE
/// TABLE` statement.
Generated {
generated_as: GeneratedAs,
sequence_options: Option<Vec<SequenceOptions>>,
generation_expr: Option<Expr>,
},
}

impl fmt::Display for ColumnOption {
Expand Down Expand Up @@ -610,10 +619,63 @@ impl fmt::Display for ColumnOption {
CharacterSet(n) => write!(f, "CHARACTER SET {n}"),
Comment(v) => write!(f, "COMMENT '{}'", escape_single_quote_string(v)),
OnUpdate(expr) => write!(f, "ON UPDATE {expr}"),
Generated {
generated_as,
sequence_options,
generation_expr,
} => match generated_as {
GeneratedAs::Always => {
write!(f, "GENERATED ALWAYS AS IDENTITY")?;
if sequence_options.is_some() {
let so = sequence_options.as_ref().unwrap();
if !so.is_empty() {
write!(f, " (")?;
}
for sequence_option in so {
write!(f, "{sequence_option}")?;
}
if !so.is_empty() {
write!(f, " )")?;
}
}
Ok(())
}
GeneratedAs::ByDefault => {
write!(f, "GENERATED BY DEFAULT AS IDENTITY")?;
if sequence_options.is_some() {
let so = sequence_options.as_ref().unwrap();
if !so.is_empty() {
write!(f, " (")?;
}
for sequence_option in so {
write!(f, "{sequence_option}")?;
}
if !so.is_empty() {
write!(f, " )")?;
}
}
Ok(())
}
GeneratedAs::ExpStored => {
let expr = generation_expr.as_ref().unwrap();
write!(f, "GENERATED ALWAYS AS ({expr}) STORED")
}
},
}
}
}

/// `GeneratedAs`s are modifiers that follow a column option in a `generated`.
/// 'ExpStored' is PostgreSQL specific
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum GeneratedAs {
Always,
ByDefault,
ExpStored,
}

fn display_constraint_name(name: &'_ Option<Ident>) -> impl fmt::Display + '_ {
struct ConstraintName<'a>(&'a Option<Ident>);
impl<'a> fmt::Display for ConstraintName<'a> {
Expand Down
2 changes: 1 addition & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ pub use self::data_type::{
};
pub use self::ddl::{
AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnOption,
ColumnOptionDef, IndexType, KeyOrIndexDisplay, ReferentialAction, TableConstraint,
ColumnOptionDef, GeneratedAs, IndexType, KeyOrIndexDisplay, ReferentialAction, TableConstraint,
};
pub use self::operator::{BinaryOperator, UnaryOperator};
pub use self::query::{
Expand Down
7 changes: 2 additions & 5 deletions src/dialect/ansi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,10 @@ pub struct AnsiDialect {}

impl Dialect for AnsiDialect {
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch)
ch.is_ascii_lowercase() || ch.is_ascii_uppercase()
}

fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
}
}
8 changes: 4 additions & 4 deletions src/dialect/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ impl Dialect for BigQueryDialect {
}

fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}

fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '_'
|| ch == '-'
}
Expand Down
4 changes: 2 additions & 2 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ pub struct ClickHouseDialect {}
impl Dialect for ClickHouseDialect {
fn is_identifier_start(&self, ch: char) -> bool {
// See https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-identifiers
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}

fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
self.is_identifier_start(ch) || ch.is_ascii_digit()
}
}
12 changes: 4 additions & 8 deletions src/dialect/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,13 @@ pub struct GenericDialect;

impl Dialect for GenericDialect {
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ch == '_'
|| ch == '#'
|| ch == '@'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' || ch == '#' || ch == '@'
}

fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '@'
|| ch == '$'
|| ch == '#'
Expand Down
11 changes: 4 additions & 7 deletions src/dialect/hive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,13 @@ impl Dialect for HiveDialect {
}

fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '$'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '$'
}

fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '_'
|| ch == '$'
|| ch == '{'
Expand Down
12 changes: 4 additions & 8 deletions src/dialect/mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,13 @@ impl Dialect for MsSqlDialect {
fn is_identifier_start(&self, ch: char) -> bool {
// See https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017#rules-for-regular-identifiers
// We don't support non-latin "letters" currently.
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ch == '_'
|| ch == '#'
|| ch == '@'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' || ch == '#' || ch == '@'
}

fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '@'
|| ch == '$'
|| ch == '#'
Expand Down
6 changes: 3 additions & 3 deletions src/dialect/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ impl Dialect for MySqlDialect {
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
// We don't yet support identifiers beginning with numbers, as that
// makes it hard to distinguish numeric literals.
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch == '_'
|| ch == '$'
|| ch == '@'
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
}

fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
self.is_identifier_start(ch) || ch.is_ascii_digit()
}

fn is_delimited_identifier_start(&self, ch: char) -> bool {
Expand Down
8 changes: 4 additions & 4 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ impl Dialect for PostgreSqlDialect {
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
// We don't yet support identifiers beginning with "letters with
// diacritical marks and non-Latin letters"
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}

fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '$'
|| ch == '_'
}
Expand Down
8 changes: 4 additions & 4 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ pub struct SnowflakeDialect;
impl Dialect for SnowflakeDialect {
// see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html
fn is_identifier_start(&self, ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}

fn is_identifier_part(&self, ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '$'
|| ch == '_'
}
Expand Down
6 changes: 3 additions & 3 deletions src/dialect/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ impl Dialect for SQLiteDialect {

fn is_identifier_start(&self, ch: char) -> bool {
// See https://www.sqlite.org/draft/tokenreq.html
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch == '_'
|| ch == '$'
|| ('\u{007f}'..='\u{ffff}').contains(&ch)
}

fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
self.is_identifier_start(ch) || ch.is_ascii_digit()
}

fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> {
Expand Down
2 changes: 2 additions & 0 deletions src/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ define_keywords!(
ALL,
ALLOCATE,
ALTER,
ALWAYS,
ANALYZE,
AND,
ANTI,
Expand Down Expand Up @@ -270,6 +271,7 @@ define_keywords!(
FUNCTION,
FUNCTIONS,
FUSION,
GENERATED,
GET,
GLOBAL,
GRANT,
Expand Down
49 changes: 49 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3567,6 +3567,55 @@ impl<'a> Parser<'a> {
{
let expr = self.parse_expr()?;
Ok(Some(ColumnOption::OnUpdate(expr)))
} else if self.parse_keyword(Keyword::GENERATED) {
self.parse_optional_column_option_generated()
} else {
Ok(None)
}
}
fn parse_optional_column_option_generated(
&mut self,
) -> Result<Option<ColumnOption>, ParserError> {
if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS, Keyword::IDENTITY]) {
let mut sequence_options = vec![];
if self.expect_token(&Token::LParen).is_ok() {
sequence_options = self.parse_create_sequence_options()?;
self.expect_token(&Token::RParen)?;
}
Ok(Some(ColumnOption::Generated {
generated_as: GeneratedAs::Always,
sequence_options: Some(sequence_options),
generation_expr: None,
}))
} else if self.parse_keywords(&[
Keyword::BY,
Keyword::DEFAULT,
Keyword::AS,
Keyword::IDENTITY,
]) {
let mut sequence_options = vec![];
if self.expect_token(&Token::LParen).is_ok() {
sequence_options = self.parse_create_sequence_options()?;
self.expect_token(&Token::RParen)?;
}
Ok(Some(ColumnOption::Generated {
generated_as: GeneratedAs::ByDefault,
sequence_options: Some(sequence_options),
generation_expr: None,
}))
} else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS]) {
if self.expect_token(&Token::LParen).is_ok() {
let expr = self.parse_expr()?;
self.expect_token(&Token::RParen)?;
let _ = self.parse_keywords(&[Keyword::STORED]);
Ok(Some(ColumnOption::Generated {
generated_as: GeneratedAs::ExpStored,
sequence_options: None,
generation_expr: Some(expr),
}))
} else {
Ok(None)
}
} else {
Ok(None)
}
Expand Down
2 changes: 1 addition & 1 deletion src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ impl<'a> Tokenizer<'a> {
let word = self.tokenize_word(ch, chars);

// TODO: implement parsing of exponent here
if word.chars().all(|x| ('0'..='9').contains(&x) || x == '.') {
if word.chars().all(|x| x.is_ascii_digit() || x == '.') {
let mut inner_state = State {
peekable: word.chars().peekable(),
line: 0,
Expand Down
8 changes: 4 additions & 4 deletions tests/sqlparser_custom_dialect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ fn custom_statement_parser() -> Result<(), ParserError> {
}

fn is_identifier_start(ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_'
}

fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
ch.is_ascii_lowercase()
|| ch.is_ascii_uppercase()
|| ch.is_ascii_digit()
|| ch == '$'
|| ch == '_'
}
Loading