Skip to content

Commit d1b0c95

Browse files
wilmaontherunclaude
andcommitted
Fix demo files: remove unsupported patterns, add dates and arithmetic examples
- windows_query: remove ROWS BETWEEN frame (unsupported in Feldera) - aggregations_query: replace PERCENTILE_APPROX with STDDEV (no Feldera equivalent) - json_combined: replace $.items[0] array path with scalar path (array paths unsupported) - topk_combined: replace Feldera 3-arg DATEDIFF with Spark 2-arg datediff (Spark input) - Add dates_combined: to_date / date_format Spark input demo - Add arithmetic_combined: pmod / try_divide / try_subtract Spark input demo Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 643ba2b commit d1b0c95

File tree

6 files changed

+42
-8
lines changed

6 files changed

+42
-8
lines changed

python/felderize/spark/data/demo/aggregations_query.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ SELECT
44
COUNT(DISTINCT page_url) AS unique_pages,
55
COLLECT_LIST(page_url) AS visited_pages,
66
AVG(view_duration) AS avg_duration,
7-
PERCENTILE_APPROX(view_duration, 0.95) AS p95_duration,
7+
STDDEV(view_duration) AS stddev_duration,
88
MIN(view_time) AS first_seen,
99
MAX(view_time) AS last_seen,
1010
COUNT(CASE WHEN device_type = 'mobile' THEN 1 END) AS mobile_views
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
-- Demo: pmod, try_divide, try_subtract
2+
-- Covers: positive modulo, NULL-on-zero division, safe subtraction
3+
4+
CREATE TABLE metrics (
5+
metric_id BIGINT,
6+
value BIGINT,
7+
bucket BIGINT,
8+
divisor BIGINT,
9+
baseline BIGINT
10+
) USING parquet;
11+
12+
CREATE OR REPLACE TEMP VIEW metric_results AS
13+
SELECT
14+
metric_id,
15+
pmod(value, bucket) AS bucketed,
16+
try_divide(value, divisor) AS safe_ratio,
17+
try_subtract(value, baseline) AS delta
18+
FROM metrics;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
-- Demo: to_date and date_format patterns
2+
-- Covers: date string parsing, date-only formatting, time component formatting
3+
4+
CREATE TABLE raw_events (
5+
event_id BIGINT,
6+
user_id BIGINT,
7+
event_date STRING,
8+
occurred_at TIMESTAMP
9+
) USING parquet;
10+
11+
CREATE OR REPLACE TEMP VIEW event_labels AS
12+
SELECT
13+
event_id,
14+
to_date(event_date, 'yyyy-MM-dd') AS parsed_date,
15+
date_format(occurred_at, 'yyyy-MM-dd') AS day_label,
16+
date_format(occurred_at, 'yyyy-MM-dd HH:mm') AS minute_label
17+
FROM raw_events;

python/felderize/spark/data/demo/json_combined.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ SELECT
2727
get_json_object(payload, '$.amount') AS amount_str,
2828
CAST(get_json_object(payload, '$.amount') AS DOUBLE) AS amount,
2929
get_json_object(payload, '$.currency') AS currency,
30-
get_json_object(payload, '$.items[0]') AS first_item
30+
get_json_object(payload, '$.item_type') AS item_type
3131
FROM raw_events;
3232

3333
-- Aggregate per user, parsing nested JSON

python/felderize/spark/data/demo/topk_combined.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
-- Demo: TopK pattern, QUALIFY clause, and TIMESTAMPDIFF
2-
-- Covers: ROW_NUMBER/RANK in subquery (TopK), QUALIFY, DATEDIFFTIMESTAMPDIFF
1+
-- Demo: TopK pattern, QUALIFY clause, and datediff
2+
-- Covers: ROW_NUMBER in subquery (TopK), QUALIFY, datediffDATEDIFF(unit, start, end)
33

44
CREATE TABLE IF NOT EXISTS employee (
55
emp_id STRING NOT NULL,
@@ -40,11 +40,11 @@ SELECT
4040
FROM review
4141
QUALIFY ROW_NUMBER() OVER (PARTITION BY emp_id ORDER BY review_date DESC) = 1;
4242

43-
-- Employee tenure in years using DATEDIFF
43+
-- Employee tenure in years using datediff
4444
CREATE OR REPLACE TEMP VIEW employee_tenure AS
4545
SELECT
4646
emp_id,
4747
dept,
4848
hire_date,
49-
DATEDIFF(year, hire_date, CURRENT_TIMESTAMP) AS tenure_years
49+
datediff(CURRENT_DATE, hire_date) AS tenure_days
5050
FROM employee;

python/felderize/spark/data/demo/windows_query.sql

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,5 @@ SELECT
44
account_id,
55
amount,
66
LAG(amount) OVER (PARTITION BY account_id ORDER BY txn_time) AS prev_amount,
7-
SUM(amount) OVER (PARTITION BY account_id ORDER BY txn_time
8-
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS running_total
7+
SUM(amount) OVER (PARTITION BY account_id) AS total_account_amount
98
FROM transactions;

0 commit comments

Comments
 (0)