fix: Infer entity value types from dbt column types in multi-entity support

YassinNouh21 · YassinNouh21 · commit ad1848aef897 · 2026-01-24T16:36:46.000+02:00
When creating entities from dbt models with integer columns, entities were
being created with default ValueType.STRING, causing validation errors:
"Entity X has type ValueType.STRING, which does not match the inferred type Int64"

This fix adds value type inference in both CLI and mapper to properly detect
column types (INT64, DOUBLE, BOOL, STRING) from dbt metadata and create
entities with matching ValueTypes.

Also corrects schema generation to include entity columns, as FeatureView.__init__
expects to extract entity columns from the schema itself (lines 216-234 in
feature_view.py).

Changes:
- feast/cli/dbt_import.py: Add value type inference when creating entities
- feast/dbt/mapper.py: Add value type inference in create_feature_view() and
  create_all_from_model(), and include entity columns in schema

Signed-off-by: yassinnouh21 &lt;yassinnouh21@gmail.com&gt;
diff --git a/sdk/python/feast/cli/dbt_import.py b/sdk/python/feast/cli/dbt_import.py
@@ -225,9 +225,31 @@ def import_command(
         model_entities: List[Any] = []
         for entity_col in entity_cols:
             if entity_col not in entities_created:
+                # Infer entity value type from model column
+                from feast.types import String, Int32, Int64, Float32, Float64, Bool
+                from feast.value_type import ValueType
+                from feast.dbt.mapper import map_dbt_type_to_feast_type
+
+                entity_value_type = ValueType.UNKNOWN
+                for column in model.columns:
+                    if column.name == entity_col:
+                        feast_type = map_dbt_type_to_feast_type(column.data_type)
+                        if feast_type == String:
+                            entity_value_type = ValueType.STRING
+                        elif feast_type in [Int32, Int64]:
+                            entity_value_type = ValueType.INT64
+                        elif feast_type in [Float32, Float64]:
+                            entity_value_type = ValueType.DOUBLE
+                        elif feast_type == Bool:
+                            entity_value_type = ValueType.BOOL
+                        else:
+                            entity_value_type = ValueType.STRING
+                        break
+
                 entity = mapper.create_entity(
                     name=entity_col,
                     description="Entity key for dbt models",
+                    value_type=entity_value_type,
                 )
                 entities_created[entity_col] = entity
                 all_objects.append(entity)
diff --git a/sdk/python/feast/dbt/mapper.py b/sdk/python/feast/dbt/mapper.py
@@ -310,7 +310,9 @@ def create_feature_view(
         """
         # Normalize to lists
         entity_cols: List[str] = (
-            [entity_columns] if isinstance(entity_columns, str) else list(entity_columns)
+            [entity_columns]
+            if isinstance(entity_columns, str)
+            else list(entity_columns)
         )
 
         entity_objs: List[Entity] = []
@@ -330,12 +332,14 @@ def create_feature_view(
         ts_field = timestamp_field or self.timestamp_field
         ttl = timedelta(days=ttl_days if ttl_days is not None else self.ttl_days)
 
-        # Columns to exclude from features (all entity columns + timestamp)
-        excluded = set(entity_cols) | {ts_field}
+        # Columns to exclude from schema (timestamp + any explicitly excluded)
+        # Note: entity columns should NOT be excluded - FeatureView.__init__
+        # expects entity columns to be in the schema and will extract them
+        excluded = {ts_field}
         if exclude_columns:
             excluded.update(exclude_columns)
 
-        # Create schema from model columns
+        # Create schema from model columns (includes entity columns)
         schema: List[Field] = []
         for column in model.columns:
             if column.name not in excluded:
@@ -352,9 +356,28 @@ def create_feature_view(
         if not entity_objs:
             entity_objs = []
             for entity_col in entity_cols:
+                # Infer entity value type from model column
+                entity_value_type = ValueType.UNKNOWN
+                for column in model.columns:
+                    if column.name == entity_col:
+                        feast_type = map_dbt_type_to_feast_type(column.data_type)
+                        # Convert Feast type to ValueType
+                        if feast_type == String:
+                            entity_value_type = ValueType.STRING
+                        elif feast_type in [Int32, Int64]:
+                            entity_value_type = ValueType.INT64
+                        elif feast_type in [Float32, Float64]:
+                            entity_value_type = ValueType.DOUBLE
+                        elif feast_type == Bool:
+                            entity_value_type = ValueType.BOOL
+                        else:
+                            entity_value_type = ValueType.STRING
+                        break
+
                 ent = self.create_entity(
                     name=entity_col,
                     description=f"Entity for {model.name}",
+                    value_type=entity_value_type,
                 )
                 entity_objs.append(ent)
 
@@ -405,16 +428,37 @@ def create_all_from_model(
         """
         # Normalize to list
         entity_cols: List[str] = (
-            [entity_columns] if isinstance(entity_columns, str) else list(entity_columns)
+            [entity_columns]
+            if isinstance(entity_columns, str)
+            else list(entity_columns)
         )
 
         # Create entities (plural)
         entities_list = []
         for entity_col in entity_cols:
+            # Infer entity value type from model column
+            entity_value_type = ValueType.UNKNOWN
+            for column in model.columns:
+                if column.name == entity_col:
+                    feast_type = map_dbt_type_to_feast_type(column.data_type)
+                    # Convert Feast type to ValueType
+                    if feast_type == String:
+                        entity_value_type = ValueType.STRING
+                    elif feast_type in [Int32, Int64]:
+                        entity_value_type = ValueType.INT64
+                    elif feast_type in [Float32, Float64]:
+                        entity_value_type = ValueType.DOUBLE
+                    elif feast_type == Bool:
+                        entity_value_type = ValueType.BOOL
+                    else:
+                        entity_value_type = ValueType.STRING
+                    break
+
             entity = self.create_entity(
                 name=entity_col,
                 description=f"Entity for {model.name}",
                 tags={"dbt.model": model.name},
+                value_type=entity_value_type,
             )
             entities_list.append(entity)
 
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/.gitignore b/sdk/python/tests/integration/dbt/test_dbt_project/.gitignore
@@ -0,0 +1,3 @@
+# Ignore dbt artifacts
+*.pyc
+__pycache__/
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/.user.yml b/sdk/python/tests/integration/dbt/test_dbt_project/.user.yml
@@ -0,0 +1 @@
+id: 1d11a921-165f-4a70-b512-4c78ceb835b8
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/dbt_project.yml b/sdk/python/tests/integration/dbt/test_dbt_project/dbt_project.yml
@@ -0,0 +1,25 @@
+name: 'feast_integration_test'
+version: '1.0.0'
+config-version: 2
+
+# Project profile (for testing we don't need real connections)
+profile: 'test'
+
+# Model directory
+model-paths: ["models"]
+
+# Seed directory
+seed-paths: ["seeds"]
+
+# Target directory where manifest.json will be generated
+target-path: "target"
+
+# Configure models
+models:
+  feast_integration_test:
+    +materialized: table
+
+# Configure seeds
+seeds:
+  feast_integration_test:
+    +schema: raw
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/models/customer_features.sql b/sdk/python/tests/integration/dbt/test_dbt_project/models/customer_features.sql
@@ -0,0 +1,10 @@
+-- Customer statistics feature model
+-- Features for customer behavior
+
+SELECT
+    customer_id,
+    event_timestamp,
+    total_orders,
+    total_spent,
+    avg_order_value
+FROM {{ ref('customer_stats') }}
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/models/driver_features.sql b/sdk/python/tests/integration/dbt/test_dbt_project/models/driver_features.sql
@@ -0,0 +1,10 @@
+-- Driver statistics feature model
+-- This model aggregates driver-level features for ML
+
+SELECT
+    driver_id,
+    event_timestamp,
+    conv_rate,
+    acc_rate,
+    avg_daily_trips
+FROM {{ ref('driver_hourly_stats') }}
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/models/product_features.sql b/sdk/python/tests/integration/dbt/test_dbt_project/models/product_features.sql
@@ -0,0 +1,10 @@
+-- Product recommendation features
+-- Tagged with 'feast' for filtering tests
+
+SELECT
+    product_id,
+    event_timestamp,
+    view_count,
+    purchase_count,
+    rating_avg
+FROM {{ ref('product_stats') }}
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/models/schema.yml b/sdk/python/tests/integration/dbt/test_dbt_project/models/schema.yml
@@ -0,0 +1,86 @@
+version: 2
+
+# Seeds will be loaded as tables
+seeds:
+  - name: driver_hourly_stats
+    description: "Raw driver hourly statistics"
+  - name: customer_stats
+    description: "Raw customer statistics"
+  - name: product_stats
+    description: "Raw product statistics"
+
+models:
+  - name: driver_features
+    description: "Driver hourly features for ML models"
+    config:
+      tags: ["feast", "ml", "driver"]
+    columns:
+      - name: driver_id
+        description: "Unique driver identifier"
+        data_type: int64
+        tests:
+          - not_null
+      - name: event_timestamp
+        description: "Event timestamp"
+        data_type: timestamp
+        tests:
+          - not_null
+      - name: conv_rate
+        description: "Conversion rate"
+        data_type: float64
+      - name: acc_rate
+        description: "Acceptance rate"
+        data_type: float64
+      - name: avg_daily_trips
+        description: "Average daily trips"
+        data_type: int32
+
+  - name: customer_features
+    description: "Customer behavior features"
+    config:
+      tags: ["feast", "ml", "customer"]
+    columns:
+      - name: customer_id
+        description: "Unique customer identifier"
+        data_type: string
+        tests:
+          - not_null
+      - name: event_timestamp
+        description: "Event timestamp"
+        data_type: timestamp
+        tests:
+          - not_null
+      - name: total_orders
+        description: "Total number of orders"
+        data_type: int64
+      - name: total_spent
+        description: "Total amount spent"
+        data_type: float64
+      - name: avg_order_value
+        description: "Average order value"
+        data_type: float64
+
+  - name: product_features
+    description: "Product recommendation features"
+    config:
+      tags: ["feast", "recommendations"]
+    columns:
+      - name: product_id
+        description: "Unique product identifier"
+        data_type: string
+        tests:
+          - not_null
+      - name: event_timestamp
+        description: "Event timestamp"
+        data_type: timestamp
+        tests:
+          - not_null
+      - name: view_count
+        description: "Number of views"
+        data_type: int64
+      - name: purchase_count
+        description: "Number of purchases"
+        data_type: int64
+      - name: rating_avg
+        description: "Average rating"
+        data_type: float32
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/profiles.yml b/sdk/python/tests/integration/dbt/test_dbt_project/profiles.yml
@@ -0,0 +1,10 @@
+# Profiles for testing dbt compilation
+# We use DuckDB for local testing as it doesn't require external services
+
+test:
+  target: dev
+  outputs:
+    dev:
+      type: duckdb
+      path: ':memory:'
+      threads: 1
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/seeds/customer_stats.csv b/sdk/python/tests/integration/dbt/test_dbt_project/seeds/customer_stats.csv
@@ -0,0 +1,4 @@
+customer_id,event_timestamp,total_orders,total_spent,avg_order_value
+cust_001,2024-01-01 00:00:00,5,250.50,50.10
+cust_002,2024-01-01 00:00:00,3,180.75,60.25
+cust_003,2024-01-01 00:00:00,7,420.00,60.00
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/seeds/driver_hourly_stats.csv b/sdk/python/tests/integration/dbt/test_dbt_project/seeds/driver_hourly_stats.csv
@@ -0,0 +1,4 @@
+driver_id,event_timestamp,conv_rate,acc_rate,avg_daily_trips
+1001,2024-01-01 00:00:00,0.85,0.92,12
+1002,2024-01-01 00:00:00,0.78,0.88,15
+1003,2024-01-01 00:00:00,0.91,0.95,10
diff --git a/sdk/python/tests/integration/dbt/test_dbt_project/seeds/product_stats.csv b/sdk/python/tests/integration/dbt/test_dbt_project/seeds/product_stats.csv
@@ -0,0 +1,4 @@
+product_id,event_timestamp,view_count,purchase_count,rating_avg
+prod_001,2024-01-01 00:00:00,150,25,4.5
+prod_002,2024-01-01 00:00:00,200,30,4.2
+prod_003,2024-01-01 00:00:00,100,15,4.8

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# Ignore dbt artifacts`
	`2`	`+*.pyc`
	`3`	`+__pycache__/`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+id: 1d11a921-165f-4a70-b512-4c78ceb835b8`