Skip to content

Commit a774cf3

Browse files
committed
Update provider docstrings
Signed-off-by: Felix Wang <wangfelix98@gmail.com>
1 parent 32d2039 commit a774cf3

File tree

2 files changed

+124
-66
lines changed

2 files changed

+124
-66
lines changed

sdk/python/feast/infra/passthrough_provider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
class PassthroughProvider(Provider):
3939
"""
40-
The Passthrough provider delegates all operations to the underlying online and offline stores.
40+
The passthrough provider delegates all operations to the underlying online and offline stores.
4141
"""
4242

4343
def __init__(self, config: RepoConfig):

sdk/python/feast/infra/provider.py

Lines changed: 123 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import abc
1+
from abc import ABC, abstractmethod
22
from datetime import datetime
33
from pathlib import Path
44
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
@@ -27,12 +27,18 @@
2727
}
2828

2929

30-
class Provider(abc.ABC):
31-
@abc.abstractmethod
30+
class Provider(ABC):
31+
"""
32+
A provider defines an implementation of a feature store object. It orchestrates the various
33+
components of a feature store, such as the offline store, online store, and materialization
34+
engine. It is configured through a RepoConfig object.
35+
"""
36+
37+
@abstractmethod
3238
def __init__(self, config: RepoConfig):
33-
...
39+
pass
3440

35-
@abc.abstractmethod
41+
@abstractmethod
3642
def update_infra(
3743
self,
3844
project: str,
@@ -43,22 +49,20 @@ def update_infra(
4349
partial: bool,
4450
):
4551
"""
46-
Reconcile cloud resources with the objects declared in the feature repo.
52+
Reconciles cloud resources with the specified set of Feast objects.
4753
4854
Args:
49-
project: Project to which tables belong
50-
tables_to_delete: Tables that were deleted from the feature repo, so provider needs to
51-
clean up the corresponding cloud resources.
52-
tables_to_keep: Tables that are still in the feature repo. Depending on implementation,
53-
provider may or may not need to update the corresponding resources.
54-
entities_to_delete: Entities that were deleted from the feature repo, so provider needs to
55-
clean up the corresponding cloud resources.
56-
entities_to_keep: Entities that are still in the feature repo. Depending on implementation,
57-
provider may or may not need to update the corresponding resources.
58-
partial: if true, then tables_to_delete and tables_to_keep are *not* exhaustive lists.
59-
There may be other tables that are not touched by this update.
55+
project: Feast project to which the objects belong.
56+
tables_to_delete: Feature views whose corresponding infrastructure should be deleted.
57+
tables_to_keep: Feature views whose corresponding infrastructure should not be deleted, and
58+
may need to be updated.
59+
entities_to_delete: Entities whose corresponding infrastructure should be deleted.
60+
entities_to_keep: Entities whose corresponding infrastructure should not be deleted, and
61+
may need to be updated.
62+
partial: If true, tables_to_delete and tables_to_keep are not exhaustive lists, so
63+
infrastructure corresponding to other feature views should be not be touched.
6064
"""
61-
...
65+
pass
6266

6367
def plan_infra(
6468
self, config: RepoConfig, desired_registry_proto: RegistryProto
@@ -72,24 +76,24 @@ def plan_infra(
7276
"""
7377
return Infra()
7478

75-
@abc.abstractmethod
79+
@abstractmethod
7680
def teardown_infra(
7781
self,
7882
project: str,
7983
tables: Sequence[FeatureView],
8084
entities: Sequence[Entity],
8185
):
8286
"""
83-
Tear down all cloud resources for a repo.
87+
Tears down all cloud resources for the specified set of Feast objects.
8488
8589
Args:
86-
project: Feast project to which tables belong
87-
tables: Tables that are declared in the feature repo.
88-
entities: Entities that are declared in the feature repo.
90+
project: Feast project to which the objects belong.
91+
tables: Feature views whose corresponding infrastructure should be deleted.
92+
entities: Entities whose corresponding infrastructure should be deleted.
8993
"""
90-
...
94+
pass
9195

92-
@abc.abstractmethod
96+
@abstractmethod
9397
def online_write_batch(
9498
self,
9599
config: RepoConfig,
@@ -100,21 +104,20 @@ def online_write_batch(
100104
progress: Optional[Callable[[int], Any]],
101105
) -> None:
102106
"""
103-
Write a batch of feature rows to the online store. This is a low level interface, not
104-
expected to be used by the users directly.
107+
Writes a batch of feature rows to the online store.
105108
106109
If a tz-naive timestamp is passed to this method, it is assumed to be UTC.
107110
108111
Args:
109-
config: The RepoConfig for the current FeatureStore.
110-
table: Feast FeatureView
111-
data: a list of quadruplets containing Feature data. Each quadruplet contains an Entity Key,
112-
a dict containing feature values, an event timestamp for the row, and
113-
the created timestamp for the row if it exists.
114-
progress: Optional function to be called once every mini-batch of rows is written to
115-
the online store. Can be used to display progress.
112+
config: The config for the current feature store.
113+
table: Feature view to which these feature rows correspond.
114+
data: A list of quadruplets containing feature data. Each quadruplet contains an entity
115+
key, a dict containing feature values, an event timestamp for the row, and the created
116+
timestamp for the row if it exists.
117+
progress: Function to be called once a batch of rows is written to the online store, used
118+
to show progress.
116119
"""
117-
...
120+
pass
118121

119122
def ingest_df(
120123
self,
@@ -123,7 +126,12 @@ def ingest_df(
123126
df: pd.DataFrame,
124127
):
125128
"""
126-
Ingests a DataFrame directly into the online store
129+
Persists a dataframe to the online store.
130+
131+
Args:
132+
feature_view: The feature view to which the dataframe corresponds.
133+
entities: The entities that are referenced by the dataframe.
134+
df: The dataframe to be persisted.
127135
"""
128136
pass
129137

@@ -133,11 +141,15 @@ def ingest_df_to_offline_store(
133141
df: pyarrow.Table,
134142
):
135143
"""
136-
Ingests a DataFrame directly into the offline store
144+
Persists a dataframe to the offline store.
145+
146+
Args:
147+
feature_view: The feature view to which the dataframe corresponds.
148+
df: The dataframe to be persisted.
137149
"""
138150
pass
139151

140-
@abc.abstractmethod
152+
@abstractmethod
141153
def materialize_single_feature_view(
142154
self,
143155
config: RepoConfig,
@@ -148,9 +160,21 @@ def materialize_single_feature_view(
148160
project: str,
149161
tqdm_builder: Callable[[int], tqdm],
150162
) -> None:
163+
"""
164+
Writes latest feature values in the specified time range to the online store.
165+
166+
Args:
167+
config: The config for the current feature store.
168+
feature_view: The feature view to materialize.
169+
start_date: The start of the time range.
170+
end_date: The end of the time range.
171+
registry: The registry for the current feature store.
172+
project: Feast project to which the objects belong.
173+
tqdm_builder: A function to monitor the progress of materialization.
174+
"""
151175
pass
152176

153-
@abc.abstractmethod
177+
@abstractmethod
154178
def get_historical_features(
155179
self,
156180
config: RepoConfig,
@@ -161,9 +185,28 @@ def get_historical_features(
161185
project: str,
162186
full_feature_names: bool,
163187
) -> RetrievalJob:
188+
"""
189+
Retrieves the point-in-time correct historical feature values for the specified entity rows.
190+
191+
Args:
192+
config: The config for the current feature store.
193+
feature_views: A list containing all feature views that are referenced in the entity rows.
194+
feature_refs: The features to be retrieved.
195+
entity_df: A collection of rows containing all entity columns on which features need to be joined,
196+
as well as the timestamp column used for point-in-time joins. Either a pandas dataframe can be
197+
provided or a SQL query.
198+
registry: The registry for the current feature store.
199+
project: Feast project to which the feature views belong.
200+
full_feature_names: If True, feature names will be prefixed with the corresponding feature view name,
201+
changing them from the format "feature" to "feature_view__feature" (e.g. "daily_transactions"
202+
changes to "customer_fv__daily_transactions").
203+
204+
Returns:
205+
A RetrievalJob that can be executed to get the features.
206+
"""
164207
pass
165208

166-
@abc.abstractmethod
209+
@abstractmethod
167210
def online_read(
168211
self,
169212
config: RepoConfig,
@@ -172,32 +215,38 @@ def online_read(
172215
requested_features: List[str] = None,
173216
) -> List[Tuple[Optional[datetime], Optional[Dict[str, ValueProto]]]]:
174217
"""
175-
Read feature values given an Entity Key. This is a low level interface, not
176-
expected to be used by the users directly.
218+
Reads features values for the given entity keys.
219+
220+
Args:
221+
config: The config for the current feature store.
222+
table: The feature view whose feature values should be read.
223+
entity_keys: The list of entity keys for which feature values should be read.
224+
requested_features: The list of features that should be read.
177225
178226
Returns:
179-
Data is returned as a list, one item per entity key. Each item in the list is a tuple
180-
of event_ts for the row, and the feature data as a dict from feature names to values.
181-
Values are returned as Value proto message.
227+
A list of the same length as entity_keys. Each item in the list is a tuple where the first
228+
item is the event timestamp for the row, and the second item is a dict mapping feature names
229+
to values, which are returned in proto format.
182230
"""
183-
...
231+
pass
184232

185-
@abc.abstractmethod
233+
@abstractmethod
186234
def retrieve_saved_dataset(
187235
self, config: RepoConfig, dataset: SavedDataset
188236
) -> RetrievalJob:
189237
"""
190-
Read saved dataset from offline store.
191-
All parameters for retrieval (like path, datetime boundaries, column names for both keys and features, etc)
192-
are determined from SavedDataset object.
238+
Reads a saved dataset.
193239
194-
Returns:
195-
RetrievalJob object, which is lazy wrapper for actual query performed under the hood.
240+
Args:
241+
config: The config for the current feature store.
242+
dataset: A SavedDataset object containing all parameters necessary for retrieving the dataset.
196243
244+
Returns:
245+
A RetrievalJob that can be executed to get the saved dataset.
197246
"""
198-
...
247+
pass
199248

200-
@abc.abstractmethod
249+
@abstractmethod
201250
def write_feature_service_logs(
202251
self,
203252
feature_service: FeatureService,
@@ -206,16 +255,20 @@ def write_feature_service_logs(
206255
registry: BaseRegistry,
207256
):
208257
"""
209-
Write features and entities logged by a feature server to an offline store.
258+
Writes features and entities logged by a feature server to the offline store.
210259
211-
Schema of logs table is being inferred from the provided feature service.
212-
Only feature services with configured logging are accepted.
260+
The schema of the logs table is inferred from the specified feature service. Only feature
261+
services with configured logging are accepted.
213262
214-
Logs dataset can be passed as Arrow Table or path to parquet directory.
263+
Args:
264+
feature_service: The feature service to be logged.
265+
logs: The logs, either as an arrow table or as a path to a parquet directory.
266+
config: The config for the current feature store.
267+
registry: The registry for the current feature store.
215268
"""
216-
...
269+
pass
217270

218-
@abc.abstractmethod
271+
@abstractmethod
219272
def retrieve_feature_service_logs(
220273
self,
221274
feature_service: FeatureService,
@@ -225,14 +278,19 @@ def retrieve_feature_service_logs(
225278
registry: BaseRegistry,
226279
) -> RetrievalJob:
227280
"""
228-
Read logged features from an offline store for a given time window [from, to).
229-
Target table is determined based on logging configuration from the feature service.
281+
Reads logged features for the specified time window.
230282
231-
Returns:
232-
RetrievalJob object, which wraps the query to the offline store.
283+
Args:
284+
feature_service: The feature service whose logs should be retrieved.
285+
start_date: The start of the window.
286+
end_date: The end of the window.
287+
config: The config for the current feature store.
288+
registry: The registry for the current feature store.
233289
290+
Returns:
291+
A RetrievalJob that can be executed to get the feature service logs.
234292
"""
235-
...
293+
pass
236294

237295
def get_feature_server_endpoint(self) -> Optional[str]:
238296
"""Returns endpoint for the feature server, if it exists."""

0 commit comments

Comments
 (0)