From ef099f5eeec06afa9ca40a4cb517b8fa2a11aaf7 Mon Sep 17 00:00:00 2001 From: mashuai Date: Wed, 29 Jul 2020 14:26:34 +0800 Subject: [PATCH 1/3] support SQLite dialog --- src/dialect/mod.rs | 2 ++ src/dialect/sqlite.rs | 36 ++++++++++++++++++++++++++++++++ src/tokenizer.rs | 19 +++++++++-------- tests/sqlparser_sqlite.rs | 44 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 90 insertions(+), 11 deletions(-) create mode 100644 src/dialect/sqlite.rs diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index c9ddbedd31..ff28314c89 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -16,6 +16,7 @@ pub mod keywords; mod mssql; mod mysql; mod postgresql; +mod sqlite; use std::fmt::Debug; @@ -24,6 +25,7 @@ pub use self::generic::GenericDialect; pub use self::mssql::MsSqlDialect; pub use self::mysql::MySqlDialect; pub use self::postgresql::PostgreSqlDialect; +pub use self::sqlite::SQLiteDialect; pub trait Dialect: Debug { /// Determine if a character starts a quoted identifier. The default diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs new file mode 100644 index 0000000000..190b293e16 --- /dev/null +++ b/src/dialect/sqlite.rs @@ -0,0 +1,36 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::dialect::Dialect; + +#[derive(Debug)] +pub struct SQLiteDialect {} + +impl Dialect for SQLiteDialect { + fn is_identifier_start(&self, ch: char) -> bool { + // See https://www.sqlite.org/draft/tokenreq.html + (ch >= 'a' && ch <= 'z') + || (ch >= 'A' && ch <= 'Z') + || ch == '_' + || ch == '$' + || (ch >= '\u{007f}' && ch <= '\u{ffff}') + } + + fn is_identifier_part(&self, ch: char) -> bool { + self.is_identifier_start(ch) || (ch >= '0' && ch <= '9') + } + + // see https://www.sqlite.org/lang_keywords.html + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '`' || ch == '\'' || ch == '"' || ch == '[' + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1774025996..f24d624f8a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -181,7 +181,7 @@ pub struct Word { impl fmt::Display for Word { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.quote_style { - Some(s) if s == '"' || s == '[' || s == '`' => { + Some(s) if s == '"' || s == '[' || s == '`' || s == '\'' => { write!(f, "{}{}{}", s, self.value, Word::matching_end_quote(s)) } None => f.write_str(&self.value), @@ -192,9 +192,10 @@ impl fmt::Display for Word { impl Word { fn matching_end_quote(ch: char) -> char { match ch { - '"' => '"', // ANSI and most dialects - '[' => ']', // MS SQL - '`' => '`', // MySQL + '"' => '"', // ANSI and most dialects + '[' => ']', // MS SQL + '`' => '`', // MySQL + '\'' => '\'', // SQLite _ => panic!("unexpected quoting style!"), } } @@ -329,11 +330,6 @@ impl<'a> Tokenizer<'a> { let s = self.tokenize_word(ch, chars); Ok(Some(Token::make_word(&s, None))) } - // string - '\'' => { - let s = self.tokenize_single_quoted_string(chars)?; - Ok(Some(Token::SingleQuotedString(s))) - } // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { chars.next(); // consume the opening quote @@ -348,6 +344,11 @@ impl<'a> Tokenizer<'a> { ) } } + // string + '\'' => { + let s = self.tokenize_single_quoted_string(chars)?; + Ok(Some(Token::SingleQuotedString(s))) + } // numbers '0'..='9' => { // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index ca6a677be8..77cd3b632b 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -15,7 +15,7 @@ //! generic dialect is also tested (on the inputs it can handle). use sqlparser::ast::*; -use sqlparser::dialect::GenericDialect; +use sqlparser::dialect::{GenericDialect, SQLiteDialect}; use sqlparser::test_utils::*; use sqlparser::tokenizer::Token; @@ -87,9 +87,49 @@ fn parse_create_table_auto_increment() { } } +#[test] +fn parse_create_sqlite_quote() { + let sql = "CREATE TABLE `foo` ('a' INT, \"b\" INT, [c] INT)"; + match sqlite().verified_stmt(sql) { + Statement::CreateTable { name, columns, .. } => { + assert_eq!(name.to_string(), "`foo`"); + assert_eq!( + vec![ + ColumnDef { + name: Ident::with_quote('\'', "a"), + data_type: DataType::Int, + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::with_quote('"', "b"), + data_type: DataType::Int, + collation: None, + options: vec![], + }, + ColumnDef { + name: Ident::with_quote('[', "c"), + data_type: DataType::Int, + collation: None, + options: vec![], + }, + ], + columns + ); + } + _ => unreachable!(), + } +} + +fn sqlite() -> TestedDialects { + TestedDialects { + dialects: vec![Box::new(SQLiteDialect {})], + } +} + fn sqlite_and_generic() -> TestedDialects { TestedDialects { // we don't have a separate SQLite dialect, so test only the generic dialect for now - dialects: vec![Box::new(GenericDialect {})], + dialects: vec![Box::new(SQLiteDialect {}), Box::new(GenericDialect {})], } } From 7bf0843b2476df0dfb18bbd80c6f95018208ddb2 Mon Sep 17 00:00:00 2001 From: mashuai Date: Thu, 30 Jul 2020 08:46:07 +0800 Subject: [PATCH 2/3] add doc comment to SQLiteDialect --- src/dialect/sqlite.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 190b293e16..0837062313 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -12,10 +12,18 @@ use crate::dialect::Dialect; +/// For resilience when confronted with historical SQL statements, SQLite will sometimes bend the quoting rules above: +/// * If a keyword in single quotes (ex: 'key' or 'glob') is used in a context where an identifier is allowed but where a string literal is not allowed, then the token is understood to be an identifier instead of a string literal. +/// * If a keyword in double quotes (ex: "key" or "glob") is used in a context where it cannot be resolved to an identifier but where a string literal is allowed, then the token is understood to be a string literal instead of an identifier. #[derive(Debug)] pub struct SQLiteDialect {} impl Dialect for SQLiteDialect { + // see https://www.sqlite.org/lang_keywords.html + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '`' || ch == '\'' || ch == '"' || ch == '[' + } + fn is_identifier_start(&self, ch: char) -> bool { // See https://www.sqlite.org/draft/tokenreq.html (ch >= 'a' && ch <= 'z') @@ -28,9 +36,4 @@ impl Dialect for SQLiteDialect { fn is_identifier_part(&self, ch: char) -> bool { self.is_identifier_start(ch) || (ch >= '0' && ch <= '9') } - - // see https://www.sqlite.org/lang_keywords.html - fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '`' || ch == '\'' || ch == '"' || ch == '[' - } } From 0f850f492e8e33d0945d8ae9d4ab661a6af2681d Mon Sep 17 00:00:00 2001 From: mashuai Date: Fri, 31 Jul 2020 09:06:43 +0800 Subject: [PATCH 3/3] add comments to sqlite dialect --- src/dialect/sqlite.rs | 7 +++---- src/tokenizer.rs | 19 +++++++++---------- tests/sqlparser_sqlite.rs | 14 ++++---------- 3 files changed, 16 insertions(+), 24 deletions(-) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 0837062313..16ec66ac25 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -12,16 +12,15 @@ use crate::dialect::Dialect; -/// For resilience when confronted with historical SQL statements, SQLite will sometimes bend the quoting rules above: -/// * If a keyword in single quotes (ex: 'key' or 'glob') is used in a context where an identifier is allowed but where a string literal is not allowed, then the token is understood to be an identifier instead of a string literal. -/// * If a keyword in double quotes (ex: "key" or "glob") is used in a context where it cannot be resolved to an identifier but where a string literal is allowed, then the token is understood to be a string literal instead of an identifier. #[derive(Debug)] pub struct SQLiteDialect {} impl Dialect for SQLiteDialect { // see https://www.sqlite.org/lang_keywords.html + // parse `...`, [...] and "..." as identifier + // TODO: support depending on the context tread '...' as identifier too. fn is_delimited_identifier_start(&self, ch: char) -> bool { - ch == '`' || ch == '\'' || ch == '"' || ch == '[' + ch == '`' || ch == '"' || ch == '[' } fn is_identifier_start(&self, ch: char) -> bool { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f24d624f8a..1774025996 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -181,7 +181,7 @@ pub struct Word { impl fmt::Display for Word { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self.quote_style { - Some(s) if s == '"' || s == '[' || s == '`' || s == '\'' => { + Some(s) if s == '"' || s == '[' || s == '`' => { write!(f, "{}{}{}", s, self.value, Word::matching_end_quote(s)) } None => f.write_str(&self.value), @@ -192,10 +192,9 @@ impl fmt::Display for Word { impl Word { fn matching_end_quote(ch: char) -> char { match ch { - '"' => '"', // ANSI and most dialects - '[' => ']', // MS SQL - '`' => '`', // MySQL - '\'' => '\'', // SQLite + '"' => '"', // ANSI and most dialects + '[' => ']', // MS SQL + '`' => '`', // MySQL _ => panic!("unexpected quoting style!"), } } @@ -330,6 +329,11 @@ impl<'a> Tokenizer<'a> { let s = self.tokenize_word(ch, chars); Ok(Some(Token::make_word(&s, None))) } + // string + '\'' => { + let s = self.tokenize_single_quoted_string(chars)?; + Ok(Some(Token::SingleQuotedString(s))) + } // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { chars.next(); // consume the opening quote @@ -344,11 +348,6 @@ impl<'a> Tokenizer<'a> { ) } } - // string - '\'' => { - let s = self.tokenize_single_quoted_string(chars)?; - Ok(Some(Token::SingleQuotedString(s))) - } // numbers '0'..='9' => { // TODO: https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#unsigned-numeric-literal diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 77cd3b632b..2a421e94b2 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -89,26 +89,20 @@ fn parse_create_table_auto_increment() { #[test] fn parse_create_sqlite_quote() { - let sql = "CREATE TABLE `foo` ('a' INT, \"b\" INT, [c] INT)"; + let sql = "CREATE TABLE `PRIMARY` (\"KEY\" INT, [INDEX] INT)"; match sqlite().verified_stmt(sql) { Statement::CreateTable { name, columns, .. } => { - assert_eq!(name.to_string(), "`foo`"); + assert_eq!(name.to_string(), "`PRIMARY`"); assert_eq!( vec![ ColumnDef { - name: Ident::with_quote('\'', "a"), + name: Ident::with_quote('"', "KEY"), data_type: DataType::Int, collation: None, options: vec![], }, ColumnDef { - name: Ident::with_quote('"', "b"), - data_type: DataType::Int, - collation: None, - options: vec![], - }, - ColumnDef { - name: Ident::with_quote('[', "c"), + name: Ident::with_quote('[', "INDEX"), data_type: DataType::Int, collation: None, options: vec![],