From c262e5dd137fe40db73a31eb24c15085e7aaaadd Mon Sep 17 00:00:00 2001 From: Din Date: Thu, 18 Jun 2026 15:36:42 +0100 Subject: [PATCH] ClickHouse: Support unparenthesized IN right-hand side --- src/dialect/clickhouse.rs | 4 ++++ src/dialect/mod.rs | 13 ++++++++++++ src/parser/mod.rs | 11 ++++++++++ tests/sqlparser_clickhouse.rs | 38 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 11 +++++++--- 5 files changed, 74 insertions(+), 3 deletions(-) diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 6ee60cc993..8da5def075 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -80,6 +80,10 @@ impl Dialect for ClickHouseDialect { true } + fn supports_in_unparenthesized_expr(&self) -> bool { + true + } + /// See fn supports_lambda_functions(&self) -> bool { true diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8a963cd42c..40dba457c1 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -435,6 +435,15 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports a bare expression as the right-hand + /// side of `IN`, without a parenthesized list — as in `x IN 'a'` or the + /// ClickHouse `{name:Type}` query-parameter placeholder `x IN {ids:Array(UInt64)}`. + /// The expression is wrapped into a single-element list, matching ClickHouse, + /// which reformats `x IN 'a'` to `x IN ('a')`. + fn supports_in_unparenthesized_expr(&self) -> bool { + false + } + /// Returns true if the dialect supports `BEGIN {DEFERRED | IMMEDIATE | EXCLUSIVE | TRY | CATCH} [TRANSACTION]` statements fn supports_start_transaction_modifier(&self) -> bool { false @@ -2051,6 +2060,10 @@ mod tests { self.0.supports_in_empty_list() } + fn supports_in_unparenthesized_expr(&self) -> bool { + self.0.supports_in_unparenthesized_expr() + } + fn convert_type_before_value(&self) -> bool { self.0.convert_type_before_value() } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6540cdc0d9..293526a464 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4392,6 +4392,17 @@ impl<'a> Parser<'a> { negated, }); } + // ClickHouse accepts a bare expression as the IN RHS (e.g. `x IN 'a'` or + // a `{name:Type}` placeholder), wrapping it into a single-element list. + if self.dialect.supports_in_unparenthesized_expr() + && self.peek_token_ref().token != Token::LParen + { + return Ok(Expr::InList { + expr: Box::new(expr), + list: vec![self.parse_expr()?], + negated, + }); + } self.expect_token(&Token::LParen)?; let in_op = match self.maybe_parse(|p| p.parse_query())? { Some(subquery) => Expr::InSubquery { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index cb2df1ff6f..6c97407f81 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1846,6 +1846,44 @@ fn parse_inner_array_join() { } } +#[test] +fn parse_in_unparenthesized_placeholder() { + // ClickHouse `{name:Type}` query-parameter placeholder as the IN RHS, without parens. + match clickhouse().expr_parses_to("x IN {ids:Array(UInt64)}", "x IN ({ids: Array(UInt64)})") { + Expr::InList { list, negated, .. } => { + assert!(!negated); + assert_eq!(list.len(), 1); + assert!(matches!(list[0], Expr::Dictionary(_))); + } + other => panic!("expected InList, got {other:?}"), + } + + // NOT IN sets negated. + match clickhouse().expr_parses_to( + "x NOT IN {ids:Array(UInt64)}", + "x NOT IN ({ids: Array(UInt64)})", + ) { + Expr::InList { negated, .. } => assert!(negated), + other => panic!("expected InList, got {other:?}"), + } + + // A bare scalar is also wrapped, matching ClickHouse (`x IN 'a'` -> `x IN ('a')`). + clickhouse().expr_parses_to("x IN 'a'", "x IN ('a')"); + + // The new branch must not fire when the next token is `(` (regressions). + clickhouse().verified_expr("x IN ({ids: Array(UInt64)})"); + clickhouse().verified_expr("x IN (1, 2, 3)"); + clickhouse().verified_stmt("SELECT * FROM t WHERE x IN (SELECT y FROM u)"); + + // Precedence: the trailing `AND` is not swallowed into the placeholder. + clickhouse().verified_expr("x IN ({p: Array(UInt64)}) AND y = 1"); + + // Dialect-scoped: GenericDialect (capability defaults false) still errors. + assert!(TestedDialects::new(vec![Box::new(GenericDialect {})]) + .parse_sql_statements("SELECT * FROM t WHERE x IN {ids:Array(UInt64)}") + .is_err()); +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index be3026f637..ef7d183dec 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2375,9 +2375,11 @@ fn parse_in_unnest() { #[test] fn parse_in_error() { - // IN is no valid + // IN is no valid, except in dialects that accept an + // unparenthesized expression as the IN right-hand side (e.g. ClickHouse). let sql = "SELECT * FROM customers WHERE segment in segment"; - let res = parse_sql_statements(sql); + let res = + all_dialects_except(|d| d.supports_in_unparenthesized_expr()).parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: (, found: segment".to_string()), res.unwrap_err() @@ -10834,8 +10836,11 @@ fn parse_position() { #[test] fn parse_position_negative() { + // Dialects that accept an unparenthesized IN right-hand side (e.g. ClickHouse) + // report a different error here, so exclude them. let sql = "SELECT POSITION(foo IN) from bar"; - let res = parse_sql_statements(sql); + let res = + all_dialects_except(|d| d.supports_in_unparenthesized_expr()).parse_sql_statements(sql); assert_eq!( ParserError::ParserError("Expected: (, found: )".to_string()), res.unwrap_err()