@@ -34,6 +34,7 @@ use serde::{Deserialize, Serialize};
3434#[ cfg( feature = "visitor" ) ]
3535use sqlparser_derive:: Visit ;
3636
37+ use crate :: ast:: DollarQuotedString ;
3738use crate :: dialect:: SnowflakeDialect ;
3839use crate :: dialect:: { Dialect , MySqlDialect } ;
3940use crate :: keywords:: { Keyword , ALL_KEYWORDS , ALL_KEYWORDS_INDEX } ;
@@ -55,6 +56,8 @@ pub enum Token {
5556 SingleQuotedString ( String ) ,
5657 /// Double quoted string: i.e: "string"
5758 DoubleQuotedString ( String ) ,
59+ /// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
60+ DollarQuotedString ( DollarQuotedString ) ,
5861 /// "National" string literal: i.e: N'string'
5962 NationalStringLiteral ( String ) ,
6063 /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
@@ -149,8 +152,6 @@ pub enum Token {
149152 PGCubeRoot ,
150153 /// `?` or `$` , a prepared statement arg placeholder
151154 Placeholder ( String ) ,
152- /// `$$`, used for PostgreSQL create function definition
153- DoubleDollarQuoting ,
154155 /// ->, used as a operator to extract json field in PostgreSQL
155156 Arrow ,
156157 /// ->>, used as a operator to extract json field as text in PostgreSQL
@@ -184,6 +185,7 @@ impl fmt::Display for Token {
184185 Token :: Char ( ref c) => write ! ( f, "{}" , c) ,
185186 Token :: SingleQuotedString ( ref s) => write ! ( f, "'{}'" , s) ,
186187 Token :: DoubleQuotedString ( ref s) => write ! ( f, "\" {}\" " , s) ,
188+ Token :: DollarQuotedString ( ref s) => write ! ( f, "{}" , s) ,
187189 Token :: NationalStringLiteral ( ref s) => write ! ( f, "N'{}'" , s) ,
188190 Token :: EscapedStringLiteral ( ref s) => write ! ( f, "E'{}'" , s) ,
189191 Token :: HexStringLiteral ( ref s) => write ! ( f, "X'{}'" , s) ,
@@ -236,7 +238,6 @@ impl fmt::Display for Token {
236238 Token :: HashArrow => write ! ( f, "#>" ) ,
237239 Token :: HashLongArrow => write ! ( f, "#>>" ) ,
238240 Token :: AtArrow => write ! ( f, "@>" ) ,
239- Token :: DoubleDollarQuoting => write ! ( f, "$$" ) ,
240241 Token :: ArrowAt => write ! ( f, "<@" ) ,
241242 Token :: HashMinus => write ! ( f, "#-" ) ,
242243 Token :: AtQuestion => write ! ( f, "@?" ) ,
@@ -837,17 +838,8 @@ impl<'a> Tokenizer<'a> {
837838 let s = peeking_take_while ( chars, |ch| ch. is_numeric ( ) ) ;
838839 Ok ( Some ( Token :: Placeholder ( String :: from ( "?" ) + & s) ) )
839840 }
840- '$' => {
841- chars. next ( ) ;
842- match chars. peek ( ) {
843- Some ( '$' ) => self . consume_and_return ( chars, Token :: DoubleDollarQuoting ) ,
844- _ => {
845- let s =
846- peeking_take_while ( chars, |ch| ch. is_alphanumeric ( ) || ch == '_' ) ;
847- Ok ( Some ( Token :: Placeholder ( String :: from ( "$" ) + & s) ) )
848- }
849- }
850- }
841+ '$' => Ok ( Some ( self . tokenize_dollar_preceded_value ( chars) ?) ) ,
842+
851843 //whitespace check (including unicode chars) should be last as it covers some of the chars above
852844 ch if ch. is_whitespace ( ) => {
853845 self . consume_and_return ( chars, Token :: Whitespace ( Whitespace :: Space ) )
@@ -858,6 +850,97 @@ impl<'a> Tokenizer<'a> {
858850 }
859851 }
860852
853+ /// Tokenize dollar preceded value (i.e: a string/placeholder)
854+ fn tokenize_dollar_preceded_value ( & self , chars : & mut State ) -> Result < Token , TokenizerError > {
855+ let mut s = String :: new ( ) ;
856+ let mut value = String :: new ( ) ;
857+
858+ chars. next ( ) ;
859+
860+ if let Some ( '$' ) = chars. peek ( ) {
861+ chars. next ( ) ;
862+
863+ let mut is_terminated = false ;
864+ let mut prev: Option < char > = None ;
865+
866+ while let Some ( & ch) = chars. peek ( ) {
867+ if prev == Some ( '$' ) {
868+ if ch == '$' {
869+ chars. next ( ) ;
870+ is_terminated = true ;
871+ break ;
872+ } else {
873+ s. push ( '$' ) ;
874+ s. push ( ch) ;
875+ }
876+ } else if ch != '$' {
877+ s. push ( ch) ;
878+ }
879+
880+ prev = Some ( ch) ;
881+ chars. next ( ) ;
882+ }
883+
884+ return if chars. peek ( ) . is_none ( ) && !is_terminated {
885+ self . tokenizer_error ( chars. location ( ) , "Unterminated dollar-quoted string" )
886+ } else {
887+ Ok ( Token :: DollarQuotedString ( DollarQuotedString {
888+ value : s,
889+ tag : None ,
890+ } ) )
891+ } ;
892+ } else {
893+ value. push_str ( & peeking_take_while ( chars, |ch| {
894+ ch. is_alphanumeric ( ) || ch == '_'
895+ } ) ) ;
896+
897+ if let Some ( '$' ) = chars. peek ( ) {
898+ chars. next ( ) ;
899+ s. push_str ( & peeking_take_while ( chars, |ch| ch != '$' ) ) ;
900+
901+ match chars. peek ( ) {
902+ Some ( '$' ) => {
903+ chars. next ( ) ;
904+ for ( _, c) in value. chars ( ) . enumerate ( ) {
905+ let next_char = chars. next ( ) ;
906+ if Some ( c) != next_char {
907+ return self . tokenizer_error (
908+ chars. location ( ) ,
909+ format ! (
910+ "Unterminated dollar-quoted string at or near \" {}\" " ,
911+ value
912+ ) ,
913+ ) ;
914+ }
915+ }
916+
917+ if let Some ( '$' ) = chars. peek ( ) {
918+ chars. next ( ) ;
919+ } else {
920+ return self . tokenizer_error (
921+ chars. location ( ) ,
922+ "Unterminated dollar-quoted string, expected $" ,
923+ ) ;
924+ }
925+ }
926+ _ => {
927+ return self . tokenizer_error (
928+ chars. location ( ) ,
929+ "Unterminated dollar-quoted, expected $" ,
930+ ) ;
931+ }
932+ }
933+ } else {
934+ return Ok ( Token :: Placeholder ( String :: from ( "$" ) + & value) ) ;
935+ }
936+ }
937+
938+ Ok ( Token :: DollarQuotedString ( DollarQuotedString {
939+ value : s,
940+ tag : if value. is_empty ( ) { None } else { Some ( value) } ,
941+ } ) )
942+ }
943+
861944 fn tokenizer_error < R > (
862945 & self ,
863946 loc : Location ,
0 commit comments