Skip to content

Commit 61b2f6a

Browse files
chore: Delete unused code in data source (feast-dev#3009)
* Fix data sources Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix tests Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Add back accidentally deleted test Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fully deprecate date partition column Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix snowflake source error message Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Remove unused SourceType class Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Fix __eq__ checks Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Enforce kwargs Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Format Signed-off-by: Felix Wang <wangfelix98@gmail.com> * Add back proto Signed-off-by: Felix Wang <wangfelix98@gmail.com>
1 parent e6745f9 commit 61b2f6a

File tree

18 files changed

+163
-621
lines changed

18 files changed

+163
-621
lines changed

sdk/python/feast/__init__.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,7 @@
1010
from feast.infra.offline_stores.snowflake_source import SnowflakeSource
1111

1212
from .batch_feature_view import BatchFeatureView
13-
from .data_source import (
14-
KafkaSource,
15-
KinesisSource,
16-
PushSource,
17-
RequestSource,
18-
SourceType,
19-
)
13+
from .data_source import KafkaSource, KinesisSource, PushSource, RequestSource
2014
from .entity import Entity
2115
from .feature import Feature
2216
from .feature_service import FeatureService
@@ -47,7 +41,6 @@
4741
"FeatureView",
4842
"OnDemandFeatureView",
4943
"RepoConfig",
50-
"SourceType",
5144
"StreamFeatureView",
5245
"ValueType",
5346
"BigQuerySource",

sdk/python/feast/data_source.py

Lines changed: 63 additions & 292 deletions
Large diffs are not rendered by default.

sdk/python/feast/infra/offline_stores/bigquery_source.py

Lines changed: 14 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
import warnings
21
from typing import Callable, Dict, Iterable, List, Optional, Tuple
32

43
from typeguard import typechecked
54

65
from feast import type_map
76
from feast.data_source import DataSource
8-
from feast.errors import DataSourceNotFoundException
7+
from feast.errors import DataSourceNoNameException, DataSourceNotFoundException
98
from feast.feature_logging import LoggingDestination
109
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
1110
from feast.protos.feast.core.FeatureService_pb2 import (
@@ -24,36 +23,31 @@ class BigQuerySource(DataSource):
2423
def __init__(
2524
self,
2625
*,
27-
event_timestamp_column: Optional[str] = "",
26+
name: Optional[str] = None,
27+
timestamp_field: Optional[str] = None,
2828
table: Optional[str] = None,
2929
created_timestamp_column: Optional[str] = "",
3030
field_mapping: Optional[Dict[str, str]] = None,
31-
date_partition_column: Optional[str] = None,
3231
query: Optional[str] = None,
33-
name: Optional[str] = None,
3432
description: Optional[str] = "",
3533
tags: Optional[Dict[str, str]] = None,
3634
owner: Optional[str] = "",
37-
timestamp_field: Optional[str] = None,
3835
):
3936
"""Create a BigQuerySource from an existing table or query.
4037
4138
Args:
39+
name (optional): Name for the source. Defaults to the table if not specified.
40+
timestamp_field (optional): Event timestamp field used for point in time
41+
joins of feature values.
4242
table (optional): The BigQuery table where features can be found.
43-
event_timestamp_column (optional): (Deprecated in favor of timestamp_field) Event
44-
timestamp column used for point in time joins of feature values.
4543
created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
46-
field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table
44+
field_mapping (optional): A dictionary mapping of column names in this data source to feature names in a feature table
4745
or view. Only used for feature columns, not entities or timestamp columns.
48-
date_partition_column (deprecated): Timestamp column used for partitioning.
4946
query (optional): SQL query to execute to generate data for this data source.
50-
name (optional): Name for the source. Defaults to the table if not specified.
5147
description (optional): A human-readable description.
5248
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
5349
owner (optional): The owner of the bigquery source, typically the email of the primary
5450
maintainer.
55-
timestamp_field (optional): Event timestamp field used for point in time
56-
joins of feature values.
5751
Example:
5852
>>> from feast import BigQuerySource
5953
>>> my_bigquery_source = BigQuerySource(table="gcp_project:bq_dataset.bq_table")
@@ -63,37 +57,20 @@ def __init__(
6357

6458
self.bigquery_options = BigQueryOptions(table=table, query=query)
6559

66-
if date_partition_column:
67-
warnings.warn(
68-
(
69-
"The argument 'date_partition_column' is not supported for BigQuery sources. "
70-
"It will be removed in Feast 0.24+"
71-
),
72-
DeprecationWarning,
73-
)
74-
75-
# If no name, use the table as the default name
76-
_name = name
77-
if not _name:
78-
if table:
79-
_name = table
80-
else:
81-
warnings.warn(
82-
(
83-
f"Starting in Feast 0.24, Feast will require either a name for a data source (if using query) or `table`: {self.query}"
84-
),
85-
DeprecationWarning,
86-
)
60+
# If no name, use the table as the default name.
61+
if name is None and table is None:
62+
raise DataSourceNoNameException()
63+
name = name or table
64+
assert name
8765

8866
super().__init__(
89-
name=_name if _name else "",
90-
event_timestamp_column=event_timestamp_column,
67+
name=name,
68+
timestamp_field=timestamp_field,
9169
created_timestamp_column=created_timestamp_column,
9270
field_mapping=field_mapping,
9371
description=description,
9472
tags=tags,
9573
owner=owner,
96-
timestamp_field=timestamp_field,
9774
)
9875

9976
# Note: Python requires redefining hash in child classes that override __eq__

sdk/python/feast/infra/offline_stores/contrib/postgres_offline_store/postgres_source.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ def __init__(
1717
timestamp_field: Optional[str] = "",
1818
created_timestamp_column: Optional[str] = "",
1919
field_mapping: Optional[Dict[str, str]] = None,
20-
date_partition_column: Optional[str] = "",
2120
description: Optional[str] = "",
2221
tags: Optional[Dict[str, str]] = None,
2322
owner: Optional[str] = "",
@@ -29,7 +28,6 @@ def __init__(
2928
timestamp_field=timestamp_field,
3029
created_timestamp_column=created_timestamp_column,
3130
field_mapping=field_mapping,
32-
date_partition_column=date_partition_column,
3331
description=description,
3432
tags=tags,
3533
owner=owner,
@@ -45,7 +43,8 @@ def __eq__(self, other):
4543
)
4644

4745
return (
48-
self._postgres_options._query == other._postgres_options._query
46+
super().__eq__(other)
47+
and self._postgres_options._query == other._postgres_options._query
4948
and self.timestamp_field == other.timestamp_field
5049
and self.created_timestamp_column == other.created_timestamp_column
5150
and self.field_mapping == other.field_mapping
@@ -62,7 +61,6 @@ def from_proto(data_source: DataSourceProto):
6261
field_mapping=dict(data_source.field_mapping),
6362
timestamp_field=data_source.timestamp_field,
6463
created_timestamp_column=data_source.created_timestamp_column,
65-
date_partition_column=data_source.date_partition_column,
6664
description=data_source.description,
6765
tags=dict(data_source.tags),
6866
owner=data_source.owner,
@@ -82,7 +80,6 @@ def to_proto(self) -> DataSourceProto:
8280

8381
data_source_proto.timestamp_field = self.timestamp_field
8482
data_source_proto.created_timestamp_column = self.created_timestamp_column
85-
data_source_proto.date_partition_column = self.date_partition_column
8683

8784
return data_source_proto
8885

sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,44 +41,33 @@ def __init__(
4141
event_timestamp_column: Optional[str] = None,
4242
created_timestamp_column: Optional[str] = None,
4343
field_mapping: Optional[Dict[str, str]] = None,
44-
date_partition_column: Optional[str] = None,
4544
description: Optional[str] = "",
4645
tags: Optional[Dict[str, str]] = None,
4746
owner: Optional[str] = "",
4847
timestamp_field: Optional[str] = None,
4948
):
50-
# If no name, use the table_ref as the default name
51-
_name = name
52-
if not _name:
53-
if table:
54-
_name = table
55-
else:
56-
raise DataSourceNoNameException()
57-
58-
if date_partition_column:
59-
warnings.warn(
60-
(
61-
"The argument 'date_partition_column' is not supported for Spark sources."
62-
"It will be removed in Feast 0.24+"
63-
),
64-
DeprecationWarning,
65-
)
49+
# If no name, use the table as the default name.
50+
if name is None and table is None:
51+
raise DataSourceNoNameException()
52+
name = name or table
53+
assert name
6654

6755
super().__init__(
68-
name=_name,
69-
event_timestamp_column=event_timestamp_column,
56+
name=name,
57+
timestamp_field=timestamp_field,
7058
created_timestamp_column=created_timestamp_column,
7159
field_mapping=field_mapping,
7260
description=description,
7361
tags=tags,
7462
owner=owner,
75-
timestamp_field=timestamp_field,
7663
)
64+
7765
warnings.warn(
7866
"The spark data source API is an experimental feature in alpha development. "
7967
"This API is unstable and it could and most probably will be changed in the future.",
8068
RuntimeWarning,
8169
)
70+
8271
self.spark_options = SparkOptions(
8372
table=table,
8473
query=query,

sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/tests/data_source.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ def create_data_source(
9393
table=destination_name,
9494
timestamp_field=timestamp_field,
9595
created_timestamp_column=created_timestamp_column,
96-
date_partition_column="",
9796
field_mapping=field_mapping or {"ts_1": "ts"},
9897
)
9998

sdk/python/feast/infra/offline_stores/contrib/trino_offline_store/trino_source.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from feast import ValueType
44
from feast.data_source import DataSource
5+
from feast.errors import DataSourceNoNameException
56
from feast.infra.offline_stores.contrib.trino_offline_store.trino_queries import Trino
67
from feast.infra.offline_stores.contrib.trino_offline_store.trino_type_map import (
78
trino_to_feast_value_type,
@@ -86,26 +87,30 @@ class TrinoSource(DataSource):
8687
def __init__(
8788
self,
8889
*,
89-
event_timestamp_column: Optional[str] = "",
90+
name: Optional[str] = None,
91+
timestamp_field: Optional[str] = None,
9092
table: Optional[str] = None,
9193
created_timestamp_column: Optional[str] = "",
9294
field_mapping: Optional[Dict[str, str]] = None,
9395
query: Optional[str] = None,
94-
name: Optional[str] = None,
9596
description: Optional[str] = "",
9697
tags: Optional[Dict[str, str]] = None,
9798
owner: Optional[str] = "",
98-
timestamp_field: Optional[str] = None,
9999
):
100+
# If no name, use the table as the default name.
101+
if name is None and table is None:
102+
raise DataSourceNoNameException()
103+
name = name or table
104+
assert name
105+
100106
super().__init__(
101107
name=name if name else "",
102-
event_timestamp_column=event_timestamp_column,
108+
timestamp_field=timestamp_field,
103109
created_timestamp_column=created_timestamp_column,
104110
field_mapping=field_mapping,
105111
description=description,
106112
tags=tags,
107113
owner=owner,
108-
timestamp_field=timestamp_field,
109114
)
110115

111116
self._trino_options = TrinoOptions(table=table, query=query)
@@ -120,7 +125,8 @@ def __eq__(self, other):
120125
)
121126

122127
return (
123-
self.name == other.name
128+
super().__eq__(other)
129+
and self.name == other.name
124130
and self.trino_options.table == other.trino_options.table
125131
and self.trino_options.query == other.trino_options.query
126132
and self.timestamp_field == other.timestamp_field
@@ -183,7 +189,6 @@ def to_proto(self) -> DataSourceProto:
183189

184190
data_source_proto.timestamp_field = self.timestamp_field
185191
data_source_proto.created_timestamp_column = self.created_timestamp_column
186-
data_source_proto.date_partition_column = self.date_partition_column
187192

188193
return data_source_proto
189194

sdk/python/feast/infra/offline_stores/file_source.py

Lines changed: 8 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import warnings
21
from typing import Callable, Dict, Iterable, List, Optional, Tuple
32

43
from pyarrow._fs import FileSystem
@@ -26,35 +25,33 @@
2625
class FileSource(DataSource):
2726
def __init__(
2827
self,
29-
*args,
30-
path: Optional[str] = None,
28+
*,
29+
path: str,
30+
name: Optional[str] = "",
3131
event_timestamp_column: Optional[str] = "",
3232
file_format: Optional[FileFormat] = None,
3333
created_timestamp_column: Optional[str] = "",
3434
field_mapping: Optional[Dict[str, str]] = None,
35-
date_partition_column: Optional[str] = "",
3635
s3_endpoint_override: Optional[str] = None,
37-
name: Optional[str] = "",
3836
description: Optional[str] = "",
3937
tags: Optional[Dict[str, str]] = None,
4038
owner: Optional[str] = "",
4139
timestamp_field: Optional[str] = "",
4240
):
43-
"""Create a FileSource from a file containing feature data. Only Parquet format supported.
41+
"""
42+
Creates a FileSource object.
4443
4544
Args:
46-
4745
path: File path to file containing feature data. Must contain an event_timestamp column, entity columns and
4846
feature columns.
47+
name (optional): Name for the file source. Defaults to the path.
4948
event_timestamp_column (optional): (Deprecated in favor of timestamp_field) Event
5049
timestamp column used for point in time joins of feature values.
5150
created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
5251
file_format (optional): Explicitly set the file format. Allows Feast to bypass inferring the file format.
5352
field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table
5453
or view. Only used for feature columns, not entities or timestamp columns.
55-
date_partition_column (optional): Timestamp column used for partitioning.
5654
s3_endpoint_override (optional): Overrides AWS S3 enpoint with custom S3 storage
57-
name (optional): Name for the file source. Defaults to the path.
5855
description (optional): A human-readable description.
5956
tags (optional): A dictionary of key-value pairs to store arbitrary metadata.
6057
owner (optional): The owner of the file source, typically the email of the primary
@@ -66,52 +63,20 @@ def __init__(
6663
>>> from feast import FileSource
6764
>>> file_source = FileSource(path="my_features.parquet", timestamp_field="event_timestamp")
6865
"""
69-
positional_attributes = ["path"]
70-
_path = path
71-
if args:
72-
if args:
73-
warnings.warn(
74-
(
75-
"File Source parameters should be specified as a keyword argument instead of a positional arg."
76-
"Feast 0.24+ will not support positional arguments to construct File sources"
77-
),
78-
DeprecationWarning,
79-
)
80-
if len(args) > len(positional_attributes):
81-
raise ValueError(
82-
f"Only {', '.join(positional_attributes)} are allowed as positional args when defining "
83-
f"File sources, for backwards compatibility."
84-
)
85-
if len(args) >= 1:
86-
_path = args[0]
87-
if _path is None:
88-
raise ValueError(
89-
'No "path" argument provided. Please set "path" to the location of your file source.'
90-
)
9166
self.file_options = FileOptions(
9267
file_format=file_format,
93-
uri=_path,
68+
uri=path,
9469
s3_endpoint_override=s3_endpoint_override,
9570
)
9671

97-
if date_partition_column:
98-
warnings.warn(
99-
(
100-
"The argument 'date_partition_column' is not supported for File sources."
101-
"It will be removed in Feast 0.24+"
102-
),
103-
DeprecationWarning,
104-
)
105-
10672
super().__init__(
10773
name=name if name else path,
108-
event_timestamp_column=event_timestamp_column,
74+
timestamp_field=timestamp_field,
10975
created_timestamp_column=created_timestamp_column,
11076
field_mapping=field_mapping,
11177
description=description,
11278
tags=tags,
11379
owner=owner,
114-
timestamp_field=timestamp_field,
11580
)
11681

11782
# Note: Python requires redefining hash in child classes that override __eq__

0 commit comments

Comments
 (0)