diff --git a/apps/sim/app/api/tools/clickhouse/utils.ts b/apps/sim/app/api/tools/clickhouse/utils.ts index 71853591048..ce4ad3afcbd 100644 --- a/apps/sim/app/api/tools/clickhouse/utils.ts +++ b/apps/sim/app/api/tools/clickhouse/utils.ts @@ -1,4 +1,7 @@ -import { validateDatabaseHost } from '@/lib/core/security/input-validation.server' +import { + validateDatabaseHost, + validateSqlWhereClause, +} from '@/lib/core/security/input-validation.server' import type { ClickHouseConnectionConfig } from '@/tools/clickhouse/types' const REQUEST_TIMEOUT_MS = 30_000 @@ -60,7 +63,8 @@ export interface ClickHouseIntrospectionResult { */ async function clickhouseRequest( config: ClickHouseConnectionConfig, - statement: string + statement: string, + options: { readOnly?: boolean } = {} ): Promise { const hostValidation = await validateDatabaseHost(config.host, 'host') if (!hostValidation.isValid) { @@ -70,6 +74,12 @@ async function clickhouseRequest( const protocol = config.secure ? 'https' : 'http' const url = new URL(`${protocol}://${config.host}:${config.port}/`) url.searchParams.set('database', config.database) + if (options.readOnly) { + // Server-enforced read-only: ClickHouse rejects any write/DDL and forbids the + // query from re-enabling writes via `SET readonly=0`. This is the real boundary + // for the query operation; the SQL-shape checks below are defense-in-depth. + url.searchParams.set('readonly', '1') + } const controller = new AbortController() const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS) @@ -290,7 +300,9 @@ export async function executeClickHouseQuery( ) } } - const result = await clickhouseRequest(config, ensureJsonFormat(query)) + const result = await clickhouseRequest(config, ensureJsonFormat(query), { + readOnly: options.enforceReadOnly, + }) return parseRowsResult(result) } @@ -448,37 +460,14 @@ function sanitizeSingleIdentifier(identifier: string): string { } /** - * Rejects WHERE clauses containing patterns commonly used in SQL injection so - * that user-supplied conditions cannot escape the intended mutation. + * Rejects WHERE clauses containing SQL-injection or always-true tautology + * patterns so user-supplied conditions cannot broaden a mutation to every row. + * Delegates to the shared {@link validateSqlWhereClause} guard (defense-in-depth). */ function validateWhereClause(where: string): void { - const dangerousPatterns = [ - /;\s*(drop|delete|insert|alter|create|truncate|rename|grant|revoke)/i, - /union\s+(all\s+)?select/i, - /into\s+outfile/i, - /--/, - /\/\*/, - /\*\//, - /\bor\s+(['"]?)(\w+)\1\s*=\s*\1\2\1/i, - /\bor\s+true\b/i, - /\bor\s+false\b/i, - /\band\s+(['"]?)(\w+)\1\s*=\s*\1\2\1/i, - /\band\s+true\b/i, - /\band\s+false\b/i, - /\bsleep\s*\(/i, - /;\s*\w+/, - // Constant / tautological conditions that don't reference columns and would - // broaden a mutation to all rows (e.g. "1=1", "1 < 2", "'a'='a'", bare "1"/"true"). - /\b\d+\s*(?:=|==|<>|!=|<=|>=|<|>)\s*\d+\b/, - /(['"])([^'"]*)\1\s*(?:=|==|<>|!=)\s*\1\2\1/, - /\b(\w+)\s*=\s*\1\b/i, - /^\s*(?:\d+|true|false)\s*$/i, - ] - - for (const pattern of dangerousPatterns) { - if (pattern.test(where)) { - throw new Error('WHERE clause contains potentially dangerous operation') - } + const result = validateSqlWhereClause(where, 'WHERE clause') + if (!result.isValid) { + throw new Error(result.error) } } diff --git a/apps/sim/lib/core/security/input-validation.server.ts b/apps/sim/lib/core/security/input-validation.server.ts index d76928ade83..cdf5f28a9a1 100644 --- a/apps/sim/lib/core/security/input-validation.server.ts +++ b/apps/sim/lib/core/security/input-validation.server.ts @@ -206,6 +206,112 @@ export async function validateDatabaseHost( } } +/** + * Patterns run against the WHERE clause with string/identifier literals masked + * out (so an attacker cannot smuggle `OR 1` or `; DROP` inside a quoted value). + * + * The connector-literal rules below are intentionally `OR`-only: only an + * `OR ` term broadens a mutation to every row. `AND ` is a no-op + * for broadening and is also exactly what `BETWEEN low AND high` produces, so + * matching it would reject legitimate range filters (e.g. `id BETWEEN 1 AND 10`). + */ +const SQL_WHERE_MASKED_PATTERNS: readonly RegExp[] = [ + /;\s*\w/, // stacked statement + /\bunion\s+(?:all\s+)?select\b/i, + /\binto\s+(?:out|dump)file\b/i, + /--/, + /\/\*/, + /\*\//, + /\b(?:sleep|pg_sleep|benchmark)\s*\(/i, + /\b(\w+)\s*=\s*\1\b/i, // same (unquoted) operand both sides: x=x, 1=1 + /\b\d+(?:\.\d+)?\s*(?:=|==|<>|!=|<=|>=|<|>)\s*\d+(?:\.\d+)?\b/, // constant vs constant: 1=1, 1<2, 2>1 + /\bor\s+(?:true|false)\b/i, // OR TRUE / OR FALSE + /\bor\s+\d+(?:\.\d+)?\b(?!\s*[=<>!+\-*/%])/i, // standalone truthy literal after OR: OR 1, OR 42 + /^\s*(?:\d+(?:\.\d+)?|true|false)\s*$/i, // bare constant: "1" / "true" / "false" +] + +/** + * Patterns run against the raw WHERE clause (need the literal contents intact), + * e.g. equality between two identical string literals. + */ +const SQL_WHERE_RAW_PATTERNS: readonly RegExp[] = [ + /(['"])([^'"]*)\1\s*(?:=|==|<>|!=)\s*\1\2\1/, // 'a'='a' / "x"="x" +] + +/** + * Replaces the contents of string literals ('...'), double-quoted and + * backtick-quoted identifiers with spaces (preserving length) so structural + * scans do not treat data inside quotes as SQL. Comments are intentionally left + * intact so comment-injection sequences are still detected. + */ +function maskSqlStringLiterals(sql: string): string { + let out = '' + let i = 0 + while (i < sql.length) { + const ch = sql[i] + if (ch === "'" || ch === '"' || ch === '`') { + out += ' ' + i++ + while (i < sql.length && sql[i] !== ch) { + if (ch !== '`' && sql[i] === '\\') { + out += ' ' + i += 2 + continue + } + out += ' ' + i++ + } + if (i < sql.length) { + out += ' ' + i++ + } + continue + } + out += ch + i++ + } + return out +} + +/** + * Validates a free-form SQL `WHERE` condition for injection and always-true + * tautology patterns. Returns a {@link ValidationResult}; callers decide whether + * to throw or surface the error. + * + * IMPORTANT: this is **defense-in-depth, not a security boundary**. A free-form + * SQL condition cannot be exhaustively validated against every always-true + * expression (e.g. `OR 2 > 1`, `OR (1)`, `OR NOT 0`, `OR length(x) >= 0`). The + * real boundary is that the caller supplies their own database credentials and + * could run equivalent SQL directly (e.g. via a raw-SQL/execute operation). This + * guard stops the easy, obvious ways an injected condition broadens a mutation + * to every row; it is not a substitute for constraining untrusted input upstream. + * + * @param where - The WHERE clause condition (without the `WHERE` keyword) + * @param paramName - Label used in the error message + */ +export function validateSqlWhereClause( + where: string | null | undefined, + paramName = 'WHERE clause' +): ValidationResult { + if (typeof where !== 'string' || where.trim().length === 0) { + return { isValid: false, error: `${paramName} is required` } + } + + const masked = maskSqlStringLiterals(where) + const matched = + SQL_WHERE_MASKED_PATTERNS.some((pattern) => pattern.test(masked)) || + SQL_WHERE_RAW_PATTERNS.some((pattern) => pattern.test(where)) + + if (matched) { + return { + isValid: false, + error: `${paramName} contains a disallowed or always-true expression`, + } + } + + return { isValid: true } +} + export interface SecureFetchOptions { method?: string headers?: Record