Skip to content

Commit 72cff63

Browse files
committed
Roughing out pratt parser logic
1 parent e6e9c8d commit 72cff63

5 files changed

Lines changed: 91 additions & 46 deletions

File tree

examples/acme_parser.rs

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,21 @@ use datafusion_sql::ansi::tokenizer::ANSISQLTokenizer;
77
use datafusion_sql::tokenizer::*;
88
use datafusion_sql::parser::*;
99

10-
///
1110
/// This example demonstrates building a custom ACME parser that extends the generic parser
12-
/// by adding support for a factorial operator !!
13-
///
11+
/// by adding support for a factorial expression `!! expr`.
1412
13+
/// Custom SQLToken
1514
#[derive(Debug,PartialEq)]
1615
enum AcmeToken {
17-
/// Factorial operator `!!`
16+
/// Factorial token `!!`
1817
Factorial
1918
}
2019

20+
/// Custom SQLExpr
2121
#[derive(Debug)]
22-
enum AcmeOperator {
23-
Factorial
24-
}
25-
26-
#[derive(Debug)]
27-
enum AcmeTokenizerError {
22+
enum AcmeExpr {
23+
/// Factorial expression
24+
Factorial(Box<SQLExpr<AcmeExpr>>)
2825
}
2926

3027
struct AcmeTokenizer {
@@ -34,6 +31,10 @@ struct AcmeTokenizer {
3431
/// The ACME tokenizer looks for the factorial operator `!!` but delegates everything else
3532
impl SQLTokenizer<AcmeToken> for AcmeTokenizer {
3633

34+
fn precedence(&self, token: &SQLToken<AcmeToken>) -> usize {
35+
unimplemented!()
36+
}
37+
3738
fn peek_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<AcmeToken>>, TokenizerError<AcmeToken>> {
3839
unimplemented!()
3940
}
@@ -53,27 +54,55 @@ impl SQLTokenizer<AcmeToken> for AcmeTokenizer {
5354
},
5455
None => Ok(Some(SQLToken::Not))
5556
}
56-
},
57+
}
5758
_ => self.generic.next_token(chars)
5859
}
5960
_ => self.generic.next_token(chars)
6061
}
6162
}
6263
}
6364

65+
struct AcmeParser<'a> {
66+
chars: Peekable<Chars<'a>>
67+
}
68+
69+
impl<'a> AcmeParser<'a> {
70+
71+
pub fn new(sql: &'a str) -> Self {
72+
AcmeParser {
73+
chars: sql.chars().peekable()
74+
}
75+
}
76+
}
77+
78+
impl<'a> SQLParser<AcmeToken, AcmeExpr> for AcmeParser<'a> {
79+
80+
fn parse_prefix(&mut self) -> Result<Box<SQLExpr<AcmeExpr>>, ParserError<AcmeToken>> {
81+
unimplemented!()
82+
}
83+
84+
fn parse_infix(&mut self, left: &SQLExpr<AcmeExpr>, precedence: usize) -> Result<Option<Box<SQLExpr<AcmeExpr>>>, ParserError<AcmeToken>> {
85+
unimplemented!()
86+
}
87+
}
6488

6589

6690
fn main() {
6791

6892
let sql = "1 + !! 5 * 2";
6993

70-
let mut acme_tokenizer = AcmeTokenizer {
71-
generic: ANSISQLTokenizer { }
72-
};
94+
let acme_parser = AcmeParser::new(sql);
95+
7396

74-
let tokens = tokenize(&sql, &mut acme_tokenizer).unwrap();
97+
//acme_parser
7598

76-
println!("tokens = {:?}", tokens);
99+
// let mut acme_tokenizer = AcmeTokenizer {
100+
// generic: ANSISQLTokenizer { }
101+
// };
102+
//
103+
// let tokens = tokenize(&sql, &mut acme_tokenizer).unwrap();
104+
//
105+
// println!("tokens = {:?}", tokens);
77106

78107

79108

src/ansi/parser.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ impl<'a, TokenType, ExprType> SQLParser<TokenType, ExprType> for ANSISQLParser<'
2929
}
3030
}
3131

32-
fn parse_infix(&mut self, left: SQLExpr<ExprType>) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>> {
32+
fn parse_infix(&mut self, left: &SQLExpr<ExprType>, precedence: usize) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>> {
3333
unimplemented!()
3434
}
3535
}

src/ansi/tokenizer.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ pub struct ANSISQLTokenizer {}
1010
impl<TokenType> SQLTokenizer<TokenType> for ANSISQLTokenizer
1111
where TokenType: Debug + PartialEq {
1212

13+
fn precedence(&self, token: &SQLToken<TokenType>) -> usize {
14+
unimplemented!()
15+
}
16+
1317
fn peek_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>> {
1418
unimplemented!()
1519
}

src/parser.rs

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
use std::cmp::PartialEq;
22
use std::fmt::Debug;
3+
use std::rc::Rc;
4+
use std::str::Chars;
5+
use std::iter::Peekable;
36

47
use super::tokenizer::*;
58

@@ -105,41 +108,47 @@ impl<TokenType> From<TokenizerError<TokenType>> for ParserError<TokenType>
105108

106109

107110
pub trait SQLParser<TokenType, ExprType>
108-
where TokenType: Debug + PartialEq, ExprType: Debug + PartialEq {
111+
where TokenType: Debug + PartialEq, ExprType: Debug {
109112

110113
/// parse the prefix and stop once an infix operator is reached
111114
fn parse_prefix(&mut self) -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> ;
112115
/// parse the next infix expression, returning None if the precedence has changed
113-
fn parse_infix(&mut self, left: SQLExpr<ExprType>) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>>;
116+
fn parse_infix(&mut self, left: &SQLExpr<ExprType>, precedence: usize) -> Result<Option<Box<SQLExpr<ExprType>>>, ParserError<TokenType>>;
117+
}
118+
119+
120+
121+
struct PrattParser<'a, TokenType, ExprType> {
122+
chars: Peekable<Chars<'a>>,
123+
tokenizer: Rc<SQLTokenizer<TokenType>>,
124+
parser: SQLParser<TokenType, ExprType>
125+
}
126+
127+
impl<'a, TokenType, ExprType> PrattParser<'a, TokenType, ExprType>
128+
where TokenType: Debug + PartialEq, ExprType: Debug {
129+
130+
fn parse_expr(&mut self) -> Result<Box<SQLExpr<ExprType>>, ParserError<TokenType>> {
131+
132+
let precedence: usize = 0;
133+
134+
let mut expr = self.parser.parse_prefix()?;
135+
136+
while let Some(token) = self.tokenizer.peek_token(&mut self.chars)? {
137+
138+
let next_precedence = self.tokenizer.precedence(&token);
139+
140+
if precedence >= next_precedence {
141+
break;
142+
}
143+
144+
expr = self.parser.parse_infix(&expr, next_precedence)?.unwrap(); //TODO: fix me
145+
}
146+
147+
Ok(expr)
148+
}
149+
114150
}
115151

116-
//
117-
//
118-
//struct GenericParser {
119-
// tokenizer: SQLTokenizer
120-
//}
121-
//
122-
//impl GenericParser {
123-
//
124-
// fn parse_expr(&mut self, precedence: u8) -> Result<Box<SQLExpr>, ParserError> {
125-
//
126-
// let mut expr = self.parse_prefix()?;
127-
//
128-
// // loop while there are more tokens and until the precedence changes
129-
// while let Some(token) = self.tokenizer.peek_token()? {
130-
//
131-
// let next_precedence = self.get_precedence(&token);
132-
//
133-
// if precedence >= next_precedence {
134-
// break;
135-
// }
136-
//
137-
// expr = self.parse_infix(expr, next_precedence)?;
138-
// }
139-
//
140-
// Ok(expr)
141-
// }
142-
//
143152
// fn parse_prefix(&mut self) -> Result<Box<SQLExpr>, ParserError> {
144153
//
145154
// match self.tokenizer.peek_token()? {

src/tokenizer.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ pub enum SQLToken<T: Debug + PartialEq> {
5050
pub trait SQLTokenizer<TokenType>
5151
where TokenType: Debug + PartialEq {
5252

53+
/// get the precendence of a token
54+
fn precedence(&self, token: &SQLToken<TokenType>) -> usize;
55+
5356
/// return a reference to the next token but do not advance the index
5457
fn peek_token(&self, chars: &mut Peekable<Chars>) -> Result<Option<SQLToken<TokenType>>, TokenizerError<TokenType>>;
5558

0 commit comments

Comments
 (0)