Skip to content

Commit 5bac9fd

Browse files
committed
Remove some non ANSI SQL support
1 parent 751a6f0 commit 5bac9fd

3 files changed

Lines changed: 75 additions & 164 deletions

File tree

src/sqlast.rs

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -69,12 +69,6 @@ pub enum ASTNode {
6969
name: String,
7070
/// Optional schema
7171
columns: Vec<SQLColumnDef>,
72-
/// File type (CSV or Parquet)
73-
file_type: FileType,
74-
/// For CSV files, indicate whether the file has a header row or not
75-
header_row: bool,
76-
/// Path to file or directory contianing files
77-
location: String,
7872
},
7973
}
8074

@@ -101,22 +95,29 @@ pub enum SQLType {
10195
Varbinary(usize),
10296
/// Large binary object e.g. BLOB(1000)
10397
Blob(usize),
104-
105-
106-
//TODO: remove these non ANSI sql stypes
107-
98+
/// Decimal type with precision and optional scale e.g. DECIMAL(10,2)
99+
Decimal(usize, Option<usize>),
100+
/// Small integer
101+
SmallInt,
102+
/// Integer
103+
Int,
104+
/// Big integer
105+
BigInt,
106+
/// Floating point with precision e.g. FLOAT(8)
107+
Float(usize),
108+
/// Floating point e.g. REAL
109+
Real,
110+
/// Double e.g. DOUBLE PRECISION
111+
Double,
112+
/// Boolean
108113
Boolean,
109-
UInt8,
110-
UInt16,
111-
UInt32,
112-
UInt64,
113-
Int8,
114-
Int16,
115-
Int32,
116-
Int64,
117-
Float32,
118-
Double64,
119-
Utf8(usize),
114+
/// Date
115+
Date,
116+
/// Time
117+
Time,
118+
/// Timestamp
119+
Timestamp,
120+
120121
}
121122

122123
/// SQL Operator

src/sqlparser.rs

Lines changed: 34 additions & 128 deletions
Original file line numberDiff line numberDiff line change
@@ -309,13 +309,8 @@ impl Parser {
309309
true
310310
}
311311

312-
// fn parse_identifier(&mut self) -> Result<ASTNode::SQLIdentifier, Err> {
313-
// let expr = self.parse_expr()?;
314-
// match expr {
315-
// Some(ASTNode::SQLIdentifier { .. }) => Ok(expr),
316-
// _ => parser_err!(format!("Expected identifier but found {:?}", expr)))
317-
// }
318-
// }
312+
313+
//TODO: this function is inconsistent and sometimes returns bool and sometimes fails
319314

320315
/// Consume the next token if it matches the expected token, otherwise return an error
321316
fn consume_token(&mut self, expected: &Token) -> Result<bool, ParserError> {
@@ -336,7 +331,7 @@ impl Parser {
336331

337332
/// Parse a SQL CREATE statement
338333
fn parse_create(&mut self) -> Result<ASTNode, ParserError> {
339-
if self.parse_keywords(vec!["EXTERNAL", "TABLE"]) {
334+
if self.parse_keywords(vec!["TABLE"]) {
340335
match self.next_token() {
341336
Some(Token::Identifier(id)) => {
342337
// parse optional column list (schema)
@@ -388,39 +383,9 @@ impl Parser {
388383
}
389384
}
390385

391-
//println!("Parsed {} column defs", columns.len());
392-
393-
let mut headers = true;
394-
let file_type: FileType = if self.parse_keywords(vec!["STORED", "AS", "CSV"]) {
395-
if self.parse_keywords(vec!["WITH", "HEADER", "ROW"]) {
396-
headers = true;
397-
} else if self.parse_keywords(vec!["WITHOUT", "HEADER", "ROW"]) {
398-
headers = false;
399-
}
400-
FileType::CSV
401-
} else if self.parse_keywords(vec!["STORED", "AS", "NDJSON"]) {
402-
FileType::NdJson
403-
} else if self.parse_keywords(vec!["STORED", "AS", "PARQUET"]) {
404-
FileType::Parquet
405-
} else {
406-
return parser_err!(format!(
407-
"Expected 'STORED AS' clause, found {:?}",
408-
self.peek_token()
409-
));
410-
};
411-
412-
let location: String = if self.parse_keywords(vec!["LOCATION"]) {
413-
self.parse_literal_string()?
414-
} else {
415-
return parser_err!("Missing 'LOCATION' clause");
416-
};
417-
418386
Ok(ASTNode::SQLCreateTable {
419387
name: id,
420388
columns,
421-
file_type,
422-
header_row: headers,
423-
location,
424389
})
425390
}
426391
_ => parser_err!(format!(
@@ -459,32 +424,35 @@ impl Parser {
459424
match self.next_token() {
460425
Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() {
461426
"BOOLEAN" => Ok(SQLType::Boolean),
462-
"UINT8" => Ok(SQLType::UInt8),
463-
"UINT16" => Ok(SQLType::UInt16),
464-
"UINT32" => Ok(SQLType::UInt32),
465-
"UINT64" => Ok(SQLType::UInt64),
466-
"INT8" => Ok(SQLType::Int8),
467-
"INT16" => Ok(SQLType::Int16),
468-
"INT32" | "INT" | "INTEGER" => Ok(SQLType::Int32),
469-
"INT64" | "LONG" => Ok(SQLType::Int64),
470-
"FLOAT32" | "FLOAT" => Ok(SQLType::Float32),
471-
"FLOAT64" | "DOUBLE" => Ok(SQLType::Double64),
472-
"UTF8" | "VARCHAR" | "STRING" => {
473-
// optional length
474-
if self.consume_token(&Token::LParen)? {
475-
let n = self.parse_literal_int()?;
476-
self.consume_token(&Token::RParen)?;
477-
Ok(SQLType::Utf8(n as usize))
478-
} else {
479-
Ok(SQLType::Utf8(100 as usize))
480-
}
481-
}
427+
"FLOAT" => Ok(SQLType::Float(self.parse_precision()?)),
428+
"REAL" => Ok(SQLType::Real),
429+
"DOUBLE" => Ok(SQLType::Double),
430+
"SMALLINT" => Ok(SQLType::SmallInt),
431+
"INT" | "INTEGER" => Ok(SQLType::Int),
432+
"BIGINT" => Ok(SQLType::BigInt),
433+
"VARCHAR" => Ok(SQLType::Varchar(self.parse_precision()?)),
482434
_ => parser_err!(format!("Invalid data type '{:?}'", k)),
483435
},
484436
other => parser_err!(format!("Invalid data type: '{:?}'", other)),
485437
}
486438
}
487439

440+
fn parse_precision(&mut self) -> Result<usize, ParserError> {
441+
//TODO: error handling
442+
Ok(self.parse_optional_precision()?.unwrap())
443+
}
444+
445+
fn parse_optional_precision(&mut self) -> Result<Option<usize>, ParserError> {
446+
if self.consume_token(&Token::LParen)? {
447+
let n = self.parse_literal_int()?;
448+
//TODO: check return value of reading rparen
449+
self.consume_token(&Token::RParen)?;
450+
Ok(Some(n as usize))
451+
} else {
452+
Ok(None)
453+
}
454+
}
455+
488456
/// Parse a SELECT statement
489457
fn parse_select(&mut self) -> Result<ASTNode, ParserError> {
490458
let projection = self.parse_expr_list()?;
@@ -807,15 +775,15 @@ mod tests {
807775

808776
#[test]
809777
fn parse_cast() {
810-
let sql = String::from("SELECT CAST(id AS DOUBLE) FROM customer");
778+
let sql = String::from("SELECT CAST(id AS BIGINT) FROM customer");
811779
let ast = parse_sql(&sql);
812780
match ast {
813781
ASTNode::SQLSelect { projection, .. } => {
814782
assert_eq!(1, projection.len());
815783
assert_eq!(
816784
ASTNode::SQLCast {
817785
expr: Box::new(ASTNode::SQLIdentifier("id".to_string())),
818-
data_type: SQLType::Double64
786+
data_type: SQLType::BigInt
819787
},
820788
projection[0]
821789
);
@@ -825,103 +793,41 @@ mod tests {
825793
}
826794

827795
#[test]
828-
fn parse_create_external_table_csv_with_header_row() {
796+
fn parse_create_table() {
829797
let sql = String::from(
830-
"CREATE EXTERNAL TABLE uk_cities (\
798+
"CREATE TABLE uk_cities (\
831799
name VARCHAR(100) NOT NULL,\
832800
lat DOUBLE NULL,\
833-
lng DOUBLE NULL) \
834-
STORED AS CSV WITH HEADER ROW \
835-
LOCATION '/mnt/ssd/uk_cities.csv'",
801+
lng DOUBLE NULL)",
836802
);
837803
let ast = parse_sql(&sql);
838804
match ast {
839805
ASTNode::SQLCreateTable {
840806
name,
841807
columns,
842-
file_type,
843-
header_row,
844-
location,
845808
} => {
846809
assert_eq!("uk_cities", name);
847810
assert_eq!(3, columns.len());
848-
assert_eq!(FileType::CSV, file_type);
849-
assert_eq!(true, header_row);
850-
assert_eq!("/mnt/ssd/uk_cities.csv", location);
851811

852812
let c_name = &columns[0];
853813
assert_eq!("name", c_name.name);
854-
assert_eq!(SQLType::Utf8(100), c_name.data_type);
814+
assert_eq!(SQLType::Varchar(100), c_name.data_type);
855815
assert_eq!(false, c_name.allow_null);
856816

857817
let c_lat = &columns[1];
858818
assert_eq!("lat", c_lat.name);
859-
assert_eq!(SQLType::Double64, c_lat.data_type);
819+
assert_eq!(SQLType::Double, c_lat.data_type);
860820
assert_eq!(true, c_lat.allow_null);
861821

862822
let c_lng = &columns[2];
863823
assert_eq!("lng", c_lng.name);
864-
assert_eq!(SQLType::Double64, c_lng.data_type);
824+
assert_eq!(SQLType::Double, c_lng.data_type);
865825
assert_eq!(true, c_lng.allow_null);
866826
}
867827
_ => assert!(false),
868828
}
869829
}
870830

871-
#[test]
872-
fn parse_create_external_table_csv_without_header_row() {
873-
let sql = String::from(
874-
"CREATE EXTERNAL TABLE uk_cities (\
875-
name VARCHAR(100) NOT NULL,\
876-
lat DOUBLE NOT NULL,\
877-
lng DOUBLE NOT NULL) \
878-
STORED AS CSV WITHOUT HEADER ROW \
879-
LOCATION '/mnt/ssd/uk_cities.csv'",
880-
);
881-
let ast = parse_sql(&sql);
882-
match ast {
883-
ASTNode::SQLCreateTable {
884-
name,
885-
columns,
886-
file_type,
887-
header_row,
888-
location,
889-
} => {
890-
assert_eq!("uk_cities", name);
891-
assert_eq!(3, columns.len());
892-
assert_eq!(FileType::CSV, file_type);
893-
assert_eq!(false, header_row);
894-
assert_eq!("/mnt/ssd/uk_cities.csv", location);
895-
}
896-
_ => assert!(false),
897-
}
898-
}
899-
900-
#[test]
901-
fn parse_create_external_table_parquet() {
902-
let sql = String::from(
903-
"CREATE EXTERNAL TABLE uk_cities \
904-
STORED AS PARQUET \
905-
LOCATION '/mnt/ssd/uk_cities.parquet'",
906-
);
907-
let ast = parse_sql(&sql);
908-
match ast {
909-
ASTNode::SQLCreateTable {
910-
name,
911-
columns,
912-
file_type,
913-
location,
914-
..
915-
} => {
916-
assert_eq!("uk_cities", name);
917-
assert_eq!(0, columns.len());
918-
assert_eq!(FileType::Parquet, file_type);
919-
assert_eq!("/mnt/ssd/uk_cities.parquet", location);
920-
}
921-
_ => assert!(false),
922-
}
923-
}
924-
925831
#[test]
926832
fn parse_scalar_function_in_projection() {
927833
let sql = String::from("SELECT sqrt(id) FROM foo");

src/sqltokenizer.rs

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -108,27 +108,31 @@ lazy_static! {
108108
m.insert("ROW");
109109

110110
// SQL types
111-
m.insert("STRING");
111+
m.insert("CHAR");
112+
m.insert("CHARACTER");
113+
m.insert("VARYING");
114+
m.insert("LARGE");
115+
m.insert("OBJECT");
112116
m.insert("VARCHAR");
117+
m.insert("CLOB");
118+
m.insert("BINARY");
119+
m.insert("VARBINARY");
120+
m.insert("BLOB");
113121
m.insert("FLOAT");
122+
m.insert("REAL");
114123
m.insert("DOUBLE");
124+
m.insert("PRECISION");
115125
m.insert("INT");
116126
m.insert("INTEGER");
117-
m.insert("LONG");
118-
119-
// Arrow native types
127+
m.insert("SMALLINT");
128+
m.insert("BIGINT");
129+
m.insert("NUMERIC");
130+
m.insert("DECIMAL");
131+
m.insert("DEC");
120132
m.insert("BOOLEAN");
121-
m.insert("UINT8");
122-
m.insert("UINT16");
123-
m.insert("UINT32");
124-
m.insert("UINT64");
125-
m.insert("INT8");
126-
m.insert("INT16");
127-
m.insert("INT32");
128-
m.insert("INT64");
129-
m.insert("FLOAT32");
130-
m.insert("FLOAT64");
131-
m.insert("UTF8");
133+
m.insert("DATE");
134+
m.insert("TIME");
135+
m.insert("TIMESTAMP");
132136

133137
m
134138
};

0 commit comments

Comments
 (0)