Skip to content

Commit ad1848a

Browse files
committed
fix: Infer entity value types from dbt column types in multi-entity support
When creating entities from dbt models with integer columns, entities were being created with default ValueType.STRING, causing validation errors: "Entity X has type ValueType.STRING, which does not match the inferred type Int64" This fix adds value type inference in both CLI and mapper to properly detect column types (INT64, DOUBLE, BOOL, STRING) from dbt metadata and create entities with matching ValueTypes. Also corrects schema generation to include entity columns, as FeatureView.__init__ expects to extract entity columns from the schema itself (lines 216-234 in feature_view.py). Changes: - feast/cli/dbt_import.py: Add value type inference when creating entities - feast/dbt/mapper.py: Add value type inference in create_feature_view() and create_all_from_model(), and include entity columns in schema Signed-off-by: yassinnouh21 <yassinnouh21@gmail.com>
1 parent 907e858 commit ad1848a

File tree

13 files changed

+238
-5
lines changed

13 files changed

+238
-5
lines changed

sdk/python/feast/cli/dbt_import.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,9 +225,31 @@ def import_command(
225225
model_entities: List[Any] = []
226226
for entity_col in entity_cols:
227227
if entity_col not in entities_created:
228+
# Infer entity value type from model column
229+
from feast.types import String, Int32, Int64, Float32, Float64, Bool
230+
from feast.value_type import ValueType
231+
from feast.dbt.mapper import map_dbt_type_to_feast_type
232+
233+
entity_value_type = ValueType.UNKNOWN
234+
for column in model.columns:
235+
if column.name == entity_col:
236+
feast_type = map_dbt_type_to_feast_type(column.data_type)
237+
if feast_type == String:
238+
entity_value_type = ValueType.STRING
239+
elif feast_type in [Int32, Int64]:
240+
entity_value_type = ValueType.INT64
241+
elif feast_type in [Float32, Float64]:
242+
entity_value_type = ValueType.DOUBLE
243+
elif feast_type == Bool:
244+
entity_value_type = ValueType.BOOL
245+
else:
246+
entity_value_type = ValueType.STRING
247+
break
248+
228249
entity = mapper.create_entity(
229250
name=entity_col,
230251
description="Entity key for dbt models",
252+
value_type=entity_value_type,
231253
)
232254
entities_created[entity_col] = entity
233255
all_objects.append(entity)

sdk/python/feast/dbt/mapper.py

Lines changed: 49 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -310,7 +310,9 @@ def create_feature_view(
310310
"""
311311
# Normalize to lists
312312
entity_cols: List[str] = (
313-
[entity_columns] if isinstance(entity_columns, str) else list(entity_columns)
313+
[entity_columns]
314+
if isinstance(entity_columns, str)
315+
else list(entity_columns)
314316
)
315317

316318
entity_objs: List[Entity] = []
@@ -330,12 +332,14 @@ def create_feature_view(
330332
ts_field = timestamp_field or self.timestamp_field
331333
ttl = timedelta(days=ttl_days if ttl_days is not None else self.ttl_days)
332334

333-
# Columns to exclude from features (all entity columns + timestamp)
334-
excluded = set(entity_cols) | {ts_field}
335+
# Columns to exclude from schema (timestamp + any explicitly excluded)
336+
# Note: entity columns should NOT be excluded - FeatureView.__init__
337+
# expects entity columns to be in the schema and will extract them
338+
excluded = {ts_field}
335339
if exclude_columns:
336340
excluded.update(exclude_columns)
337341

338-
# Create schema from model columns
342+
# Create schema from model columns (includes entity columns)
339343
schema: List[Field] = []
340344
for column in model.columns:
341345
if column.name not in excluded:
@@ -352,9 +356,28 @@ def create_feature_view(
352356
if not entity_objs:
353357
entity_objs = []
354358
for entity_col in entity_cols:
359+
# Infer entity value type from model column
360+
entity_value_type = ValueType.UNKNOWN
361+
for column in model.columns:
362+
if column.name == entity_col:
363+
feast_type = map_dbt_type_to_feast_type(column.data_type)
364+
# Convert Feast type to ValueType
365+
if feast_type == String:
366+
entity_value_type = ValueType.STRING
367+
elif feast_type in [Int32, Int64]:
368+
entity_value_type = ValueType.INT64
369+
elif feast_type in [Float32, Float64]:
370+
entity_value_type = ValueType.DOUBLE
371+
elif feast_type == Bool:
372+
entity_value_type = ValueType.BOOL
373+
else:
374+
entity_value_type = ValueType.STRING
375+
break
376+
355377
ent = self.create_entity(
356378
name=entity_col,
357379
description=f"Entity for {model.name}",
380+
value_type=entity_value_type,
358381
)
359382
entity_objs.append(ent)
360383

@@ -405,16 +428,37 @@ def create_all_from_model(
405428
"""
406429
# Normalize to list
407430
entity_cols: List[str] = (
408-
[entity_columns] if isinstance(entity_columns, str) else list(entity_columns)
431+
[entity_columns]
432+
if isinstance(entity_columns, str)
433+
else list(entity_columns)
409434
)
410435

411436
# Create entities (plural)
412437
entities_list = []
413438
for entity_col in entity_cols:
439+
# Infer entity value type from model column
440+
entity_value_type = ValueType.UNKNOWN
441+
for column in model.columns:
442+
if column.name == entity_col:
443+
feast_type = map_dbt_type_to_feast_type(column.data_type)
444+
# Convert Feast type to ValueType
445+
if feast_type == String:
446+
entity_value_type = ValueType.STRING
447+
elif feast_type in [Int32, Int64]:
448+
entity_value_type = ValueType.INT64
449+
elif feast_type in [Float32, Float64]:
450+
entity_value_type = ValueType.DOUBLE
451+
elif feast_type == Bool:
452+
entity_value_type = ValueType.BOOL
453+
else:
454+
entity_value_type = ValueType.STRING
455+
break
456+
414457
entity = self.create_entity(
415458
name=entity_col,
416459
description=f"Entity for {model.name}",
417460
tags={"dbt.model": model.name},
461+
value_type=entity_value_type,
418462
)
419463
entities_list.append(entity)
420464

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Ignore dbt artifacts
2+
*.pyc
3+
__pycache__/
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
id: 1d11a921-165f-4a70-b512-4c78ceb835b8
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: 'feast_integration_test'
2+
version: '1.0.0'
3+
config-version: 2
4+
5+
# Project profile (for testing we don't need real connections)
6+
profile: 'test'
7+
8+
# Model directory
9+
model-paths: ["models"]
10+
11+
# Seed directory
12+
seed-paths: ["seeds"]
13+
14+
# Target directory where manifest.json will be generated
15+
target-path: "target"
16+
17+
# Configure models
18+
models:
19+
feast_integration_test:
20+
+materialized: table
21+
22+
# Configure seeds
23+
seeds:
24+
feast_integration_test:
25+
+schema: raw
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-- Customer statistics feature model
2+
-- Features for customer behavior
3+
4+
SELECT
5+
customer_id,
6+
event_timestamp,
7+
total_orders,
8+
total_spent,
9+
avg_order_value
10+
FROM {{ ref('customer_stats') }}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-- Driver statistics feature model
2+
-- This model aggregates driver-level features for ML
3+
4+
SELECT
5+
driver_id,
6+
event_timestamp,
7+
conv_rate,
8+
acc_rate,
9+
avg_daily_trips
10+
FROM {{ ref('driver_hourly_stats') }}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
-- Product recommendation features
2+
-- Tagged with 'feast' for filtering tests
3+
4+
SELECT
5+
product_id,
6+
event_timestamp,
7+
view_count,
8+
purchase_count,
9+
rating_avg
10+
FROM {{ ref('product_stats') }}
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
version: 2
2+
3+
# Seeds will be loaded as tables
4+
seeds:
5+
- name: driver_hourly_stats
6+
description: "Raw driver hourly statistics"
7+
- name: customer_stats
8+
description: "Raw customer statistics"
9+
- name: product_stats
10+
description: "Raw product statistics"
11+
12+
models:
13+
- name: driver_features
14+
description: "Driver hourly features for ML models"
15+
config:
16+
tags: ["feast", "ml", "driver"]
17+
columns:
18+
- name: driver_id
19+
description: "Unique driver identifier"
20+
data_type: int64
21+
tests:
22+
- not_null
23+
- name: event_timestamp
24+
description: "Event timestamp"
25+
data_type: timestamp
26+
tests:
27+
- not_null
28+
- name: conv_rate
29+
description: "Conversion rate"
30+
data_type: float64
31+
- name: acc_rate
32+
description: "Acceptance rate"
33+
data_type: float64
34+
- name: avg_daily_trips
35+
description: "Average daily trips"
36+
data_type: int32
37+
38+
- name: customer_features
39+
description: "Customer behavior features"
40+
config:
41+
tags: ["feast", "ml", "customer"]
42+
columns:
43+
- name: customer_id
44+
description: "Unique customer identifier"
45+
data_type: string
46+
tests:
47+
- not_null
48+
- name: event_timestamp
49+
description: "Event timestamp"
50+
data_type: timestamp
51+
tests:
52+
- not_null
53+
- name: total_orders
54+
description: "Total number of orders"
55+
data_type: int64
56+
- name: total_spent
57+
description: "Total amount spent"
58+
data_type: float64
59+
- name: avg_order_value
60+
description: "Average order value"
61+
data_type: float64
62+
63+
- name: product_features
64+
description: "Product recommendation features"
65+
config:
66+
tags: ["feast", "recommendations"]
67+
columns:
68+
- name: product_id
69+
description: "Unique product identifier"
70+
data_type: string
71+
tests:
72+
- not_null
73+
- name: event_timestamp
74+
description: "Event timestamp"
75+
data_type: timestamp
76+
tests:
77+
- not_null
78+
- name: view_count
79+
description: "Number of views"
80+
data_type: int64
81+
- name: purchase_count
82+
description: "Number of purchases"
83+
data_type: int64
84+
- name: rating_avg
85+
description: "Average rating"
86+
data_type: float32
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Profiles for testing dbt compilation
2+
# We use DuckDB for local testing as it doesn't require external services
3+
4+
test:
5+
target: dev
6+
outputs:
7+
dev:
8+
type: duckdb
9+
path: ':memory:'
10+
threads: 1

0 commit comments

Comments
 (0)