Skip to content

Commit 76917b7

Browse files
nquinn408nickquinn408devin-ai-integration[bot]
authored
feat: Making feature view source optional (#6074) (#6075)
* feat: Making feature view source optional (#6074) Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * Fixing when source is used Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * Devin feedback response Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * Fixing linter issues Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * Fixing broken test Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * More feedback Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * Update sdk/python/feast/batch_feature_view.py Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com> Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * Adding integration for feature view with no source Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> * Fix lint-python issue Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> --------- Signed-off-by: Nick Quinn <nicholas_quinn@apple.com> Co-authored-by: Nick Quinn <nicholas_quinn@apple.com> Co-authored-by: devin-ai-integration[bot] <158243242+devin-ai-integration[bot]@users.noreply.github.com>
1 parent bf4e3fa commit 76917b7

File tree

25 files changed

+222
-43
lines changed

25 files changed

+222
-43
lines changed

docs/getting-started/concepts/batch-feature-view.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class BatchFeatureView(FeatureView):
2727
def __init__(
2828
*,
2929
name: str,
30-
source: Union[DataSource, FeatureView, List[FeatureView]],
30+
source: Optional[Union[DataSource, FeatureView, List[FeatureView]]] = None,
3131
sink_source: Optional[DataSource] = None,
3232
schema: Optional[List[Field]] = None,
3333
entities: Optional[List[Entity]] = None,
@@ -142,6 +142,7 @@ See:
142142
## 🛑 Gotchas
143143

144144
- `sink_source` is **required** when chaining views (i.e., `source` is another FeatureView or list of them).
145+
- `source` is optional; if omitted (`None`), the feature view has no associated batch data source.
145146
- Schema fields must be consistent with `sink_source`, `batch_source.field_mapping` if field mappings exist.
146147
- Aggregation logic must reference columns present in the raw source or transformed inputs.
147148

protos/feast/core/FeatureView.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,15 @@ message FeatureViewSpec {
6161
google.protobuf.Duration ttl = 6;
6262

6363
// Batch/Offline DataSource where this view can retrieve offline feature data.
64+
// Optional: if not set, the feature view has no associated batch data source (e.g. purely derived views).
6465
DataSource batch_source = 7;
6566

6667
// Whether these features should be served online or not
6768
// This is also used to determine whether the features should be written to the online store
6869
bool online = 8;
6970

7071
// Streaming DataSource from where this view can consume "online" feature data.
72+
// Optional: only required for streaming feature views.
7173
DataSource stream_source = 9;
7274

7375
// Description of the feature view.

sdk/python/feast/base_feature_view.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,7 @@ def __init__(
9393
self.created_timestamp = None
9494
self.last_updated_timestamp = None
9595

96-
if source:
97-
self.source = source
96+
self.source = source
9897

9998
@property
10099
@abstractmethod

sdk/python/feast/batch_feature_view.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ def __init__(
8282
*,
8383
name: str,
8484
mode: Union[TransformationMode, str] = TransformationMode.PYTHON,
85-
source: Union[DataSource, "BatchFeatureView", List["BatchFeatureView"]],
85+
source: Optional[
86+
Union[DataSource, "BatchFeatureView", List["BatchFeatureView"]]
87+
] = None,
8688
sink_source: Optional[DataSource] = None,
8789
entities: Optional[List[Entity]] = None,
8890
ttl: Optional[timedelta] = None,
@@ -115,6 +117,21 @@ def __init__(
115117
f"or CUSTOM_SOURCE, got {type(source).__name__}: {source.name} instead "
116118
)
117119

120+
if source is None and aggregations:
121+
raise ValueError(
122+
"BatchFeatureView with aggregations requires a source to aggregate from."
123+
)
124+
125+
if (
126+
source is None
127+
and not udf
128+
and not feature_transformation
129+
and not aggregations
130+
):
131+
raise ValueError(
132+
"BatchFeatureView requires at least one of: source, udf, feature_transformation, or aggregations."
133+
)
134+
118135
self.mode = mode
119136
self.udf = udf
120137
self.udf_string = udf_string

sdk/python/feast/feature_store.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -702,11 +702,21 @@ def _make_inferences(
702702
)
703703

704704
update_data_sources_with_inferred_event_timestamp_col(
705-
[view.batch_source for view in views_to_update], self.config
705+
[
706+
view.batch_source
707+
for view in views_to_update
708+
if view.batch_source is not None
709+
],
710+
self.config,
706711
)
707712

708713
update_data_sources_with_inferred_event_timestamp_col(
709-
[view.batch_source for view in sfvs_to_update], self.config
714+
[
715+
view.batch_source
716+
for view in sfvs_to_update
717+
if view.batch_source is not None
718+
],
719+
self.config,
710720
)
711721

712722
# New feature views may reference previously applied entities.
@@ -2416,6 +2426,8 @@ def write_to_offline_store(
24162426

24172427
provider = self._get_provider()
24182428
# Get columns of the batch source and the input dataframe.
2429+
if feature_view.batch_source is None:
2430+
raise ValueError(f"Feature view '{feature_view.name}' has no batch_source.")
24192431
column_names_and_types = (
24202432
provider.get_table_column_names_and_types_from_data_source(
24212433
self.config, feature_view.batch_source

sdk/python/feast/feature_view.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,8 @@ class FeatureView(BaseFeatureView):
7373
ttl: The amount of time this group of features lives. A ttl of 0 indicates that
7474
this group of features lives forever. Note that large ttl's or a ttl of 0
7575
can result in extremely computationally intensive queries.
76-
batch_source: The batch source of data where this group of features
77-
is stored. This is optional ONLY if a push source is specified as the
78-
stream_source, since push sources contain their own batch sources.
76+
batch_source: Optional batch source of data where this group of features
77+
is stored. If no source is provided, this will be None.
7978
stream_source: The stream source of data where this group of features is stored.
8079
schema: The schema of the feature view, including feature, timestamp, and entity
8180
columns. If not specified, can be inferred from the underlying data source.
@@ -97,7 +96,7 @@ class FeatureView(BaseFeatureView):
9796
name: str
9897
entities: List[str]
9998
ttl: Optional[timedelta]
100-
batch_source: DataSource
99+
batch_source: Optional[DataSource]
101100
stream_source: Optional[DataSource]
102101
source_views: Optional[List["FeatureView"]]
103102
entity_columns: List[Field]
@@ -115,7 +114,7 @@ def __init__(
115114
self,
116115
*,
117116
name: str,
118-
source: Union[DataSource, "FeatureView", List["FeatureView"]],
117+
source: Optional[Union[DataSource, "FeatureView", List["FeatureView"]]] = None,
119118
sink_source: Optional[DataSource] = None,
120119
schema: Optional[List[Field]] = None,
121120
entities: Optional[List[Entity]] = None,
@@ -133,8 +132,9 @@ def __init__(
133132
134133
Args:
135134
name: The unique name of the feature view.
136-
source: The source of data for this group of features. May be a stream source, or a batch source.
137-
If a stream source, the source should contain a batch_source for backfills & batch materialization.
135+
source (optional): The source of data for this group of features. May be a stream source,
136+
a batch source, a FeatureView, or a list of FeatureViews. If None, the feature view
137+
has no associated data source.
138138
schema (optional): The schema of the feature view, including feature, timestamp,
139139
and entity columns.
140140
# TODO: clarify that schema is only useful here...
@@ -170,7 +170,9 @@ def __init__(
170170
self.data_source: Optional[DataSource] = None
171171
self.source_views: List[FeatureView] = []
172172

173-
if isinstance(source, DataSource):
173+
if source is None:
174+
pass # data_source remains None, source_views remains []
175+
elif isinstance(source, DataSource):
174176
self.data_source = source
175177
elif isinstance(source, FeatureView):
176178
self.source_views = [source]
@@ -199,11 +201,14 @@ def __init__(
199201
elif self.data_source:
200202
# Batch source definition
201203
self.batch_source = self.data_source
202-
else:
204+
elif self.source_views:
203205
# Derived view source definition
204206
if not sink_source:
205207
raise ValueError("Derived FeatureView must specify `sink_source`.")
206208
self.batch_source = sink_source
209+
else:
210+
# source=None - no batch source
211+
self.batch_source = None
207212

208213
# Initialize features and entity columns.
209214
features: List[Field] = []

sdk/python/feast/feature_view_projection.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,19 +98,17 @@ def from_proto(proto: FeatureViewProjectionProto) -> "FeatureViewProjection":
9898
@staticmethod
9999
def from_feature_view_definition(feature_view: "FeatureView"):
100100
# TODO need to implement this for StreamFeatureViews
101-
if getattr(feature_view, "batch_source", None):
101+
batch_source = getattr(feature_view, "batch_source", None)
102+
if batch_source:
102103
return FeatureViewProjection(
103104
name=feature_view.name,
104105
name_alias=None,
105106
features=feature_view.features,
106107
desired_features=[],
107-
timestamp_field=feature_view.batch_source.created_timestamp_column
108-
or None,
109-
created_timestamp_column=feature_view.batch_source.created_timestamp_column
110-
or None,
111-
date_partition_column=feature_view.batch_source.date_partition_column
112-
or None,
113-
batch_source=feature_view.batch_source or None,
108+
timestamp_field=batch_source.created_timestamp_column or None,
109+
created_timestamp_column=batch_source.created_timestamp_column or None,
110+
date_partition_column=batch_source.date_partition_column or None,
111+
batch_source=batch_source or None,
114112
)
115113
else:
116114
return FeatureViewProjection(

sdk/python/feast/feature_view_utils.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ def resolve_feature_view_source(
136136

137137
if not is_derived_view:
138138
# Regular feature view - use its batch_source directly
139+
if feature_view.batch_source is None:
140+
raise ValueError(f"Feature view '{feature_view.name}' has no batch_source.")
139141
return FeatureViewSourceInfo(
140142
data_source=feature_view.batch_source,
141143
source_type="batch_source",
@@ -178,8 +180,13 @@ def resolve_feature_view_source(
178180
if hasattr(parent_view, "source_views") and parent_view.source_views:
179181
# Parent is also a derived view - recursively find original source
180182
original_source_view = find_original_source_view(parent_view)
183+
original_batch_source = original_source_view.batch_source
184+
if original_batch_source is None:
185+
raise ValueError(
186+
f"Original source view '{original_source_view.name}' has no batch_source."
187+
)
181188
return FeatureViewSourceInfo(
182-
data_source=original_source_view.batch_source,
189+
data_source=original_batch_source,
183190
source_type="original_source",
184191
has_transformation=view_has_transformation,
185192
transformation_func=transformation_func,
@@ -229,8 +236,13 @@ def resolve_feature_view_source_with_fallback(
229236
elif hasattr(feature_view, "source_views") and feature_view.source_views:
230237
# Try the original source view as last resort
231238
original_view = find_original_source_view(feature_view)
239+
original_view_batch_source = original_view.batch_source
240+
if original_view_batch_source is None:
241+
raise ValueError(
242+
f"Original source view '{original_view.name}' has no batch_source."
243+
)
232244
return FeatureViewSourceInfo(
233-
data_source=original_view.batch_source,
245+
data_source=original_view_batch_source,
234246
source_type="fallback_original_source",
235247
has_transformation=has_transformation(feature_view),
236248
transformation_func=get_transformation_function(feature_view),

sdk/python/feast/inference.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,9 @@ def _infer_features_and_entities(
221221
fv, join_keys, run_inference_for_features, config
222222
)
223223

224+
if fv.batch_source is None:
225+
return
226+
224227
entity_columns: List[Field] = fv.entity_columns if fv.entity_columns else []
225228
columns_to_exclude = {
226229
fv.batch_source.timestamp_field,

sdk/python/feast/infra/compute_engines/aws_lambda/lambda_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def _materialize_one(
193193

194194
offline_job = self.offline_store.pull_latest_from_table_or_query(
195195
config=self.repo_config,
196-
data_source=feature_view.batch_source,
196+
data_source=feature_view.batch_source, # type: ignore[arg-type]
197197
join_key_columns=join_key_columns,
198198
feature_name_columns=feature_name_columns,
199199
timestamp_field=timestamp_field,

0 commit comments

Comments
 (0)