1- import abc
1+ from abc import ABC , abstractmethod
22from datetime import datetime
33from pathlib import Path
44from typing import Any , Callable , Dict , List , Optional , Sequence , Tuple , Union
2727}
2828
2929
30- class Provider (abc .ABC ):
31- @abc .abstractmethod
30+ class Provider (ABC ):
31+ """
32+ A provider defines an implementation of a feature store object. It orchestrates the various
33+ components of a feature store, such as the offline store, online store, and materialization
34+ engine. It is configured through a RepoConfig object.
35+ """
36+
37+ @abstractmethod
3238 def __init__ (self , config : RepoConfig ):
33- ...
39+ pass
3440
35- @abc . abstractmethod
41+ @abstractmethod
3642 def update_infra (
3743 self ,
3844 project : str ,
@@ -43,22 +49,20 @@ def update_infra(
4349 partial : bool ,
4450 ):
4551 """
46- Reconcile cloud resources with the objects declared in the feature repo .
52+ Reconciles cloud resources with the specified set of Feast objects .
4753
4854 Args:
49- project: Project to which tables belong
50- tables_to_delete: Tables that were deleted from the feature repo, so provider needs to
51- clean up the corresponding cloud resources.
52- tables_to_keep: Tables that are still in the feature repo. Depending on implementation,
53- provider may or may not need to update the corresponding resources.
54- entities_to_delete: Entities that were deleted from the feature repo, so provider needs to
55- clean up the corresponding cloud resources.
56- entities_to_keep: Entities that are still in the feature repo. Depending on implementation,
57- provider may or may not need to update the corresponding resources.
58- partial: if true, then tables_to_delete and tables_to_keep are *not* exhaustive lists.
59- There may be other tables that are not touched by this update.
55+ project: Feast project to which the objects belong.
56+ tables_to_delete: Feature views whose corresponding infrastructure should be deleted.
57+ tables_to_keep: Feature views whose corresponding infrastructure should not be deleted, and
58+ may need to be updated.
59+ entities_to_delete: Entities whose corresponding infrastructure should be deleted.
60+ entities_to_keep: Entities whose corresponding infrastructure should not be deleted, and
61+ may need to be updated.
62+ partial: If true, tables_to_delete and tables_to_keep are not exhaustive lists, so
63+ infrastructure corresponding to other feature views should be not be touched.
6064 """
61- ...
65+ pass
6266
6367 def plan_infra (
6468 self , config : RepoConfig , desired_registry_proto : RegistryProto
@@ -72,24 +76,24 @@ def plan_infra(
7276 """
7377 return Infra ()
7478
75- @abc . abstractmethod
79+ @abstractmethod
7680 def teardown_infra (
7781 self ,
7882 project : str ,
7983 tables : Sequence [FeatureView ],
8084 entities : Sequence [Entity ],
8185 ):
8286 """
83- Tear down all cloud resources for a repo .
87+ Tears down all cloud resources for the specified set of Feast objects .
8488
8589 Args:
86- project: Feast project to which tables belong
87- tables: Tables that are declared in the feature repo .
88- entities: Entities that are declared in the feature repo .
90+ project: Feast project to which the objects belong.
91+ tables: Feature views whose corresponding infrastructure should be deleted .
92+ entities: Entities whose corresponding infrastructure should be deleted .
8993 """
90- ...
94+ pass
9195
92- @abc . abstractmethod
96+ @abstractmethod
9397 def online_write_batch (
9498 self ,
9599 config : RepoConfig ,
@@ -100,21 +104,20 @@ def online_write_batch(
100104 progress : Optional [Callable [[int ], Any ]],
101105 ) -> None :
102106 """
103- Write a batch of feature rows to the online store. This is a low level interface, not
104- expected to be used by the users directly.
107+ Writes a batch of feature rows to the online store.
105108
106109 If a tz-naive timestamp is passed to this method, it is assumed to be UTC.
107110
108111 Args:
109- config: The RepoConfig for the current FeatureStore .
110- table: Feast FeatureView
111- data: a list of quadruplets containing Feature data. Each quadruplet contains an Entity Key,
112- a dict containing feature values, an event timestamp for the row, and
113- the created timestamp for the row if it exists.
114- progress: Optional function to be called once every mini- batch of rows is written to
115- the online store. Can be used to display progress.
112+ config: The config for the current feature store .
113+ table: Feature view to which these feature rows correspond.
114+ data: A list of quadruplets containing feature data. Each quadruplet contains an entity
115+ key, a dict containing feature values, an event timestamp for the row, and the created
116+ timestamp for the row if it exists.
117+ progress: Function to be called once a batch of rows is written to the online store, used
118+ to show progress.
116119 """
117- ...
120+ pass
118121
119122 def ingest_df (
120123 self ,
@@ -123,7 +126,12 @@ def ingest_df(
123126 df : pd .DataFrame ,
124127 ):
125128 """
126- Ingests a DataFrame directly into the online store
129+ Persists a dataframe to the online store.
130+
131+ Args:
132+ feature_view: The feature view to which the dataframe corresponds.
133+ entities: The entities that are referenced by the dataframe.
134+ df: The dataframe to be persisted.
127135 """
128136 pass
129137
@@ -133,11 +141,15 @@ def ingest_df_to_offline_store(
133141 df : pyarrow .Table ,
134142 ):
135143 """
136- Ingests a DataFrame directly into the offline store
144+ Persists a dataframe to the offline store.
145+
146+ Args:
147+ feature_view: The feature view to which the dataframe corresponds.
148+ df: The dataframe to be persisted.
137149 """
138150 pass
139151
140- @abc . abstractmethod
152+ @abstractmethod
141153 def materialize_single_feature_view (
142154 self ,
143155 config : RepoConfig ,
@@ -148,9 +160,21 @@ def materialize_single_feature_view(
148160 project : str ,
149161 tqdm_builder : Callable [[int ], tqdm ],
150162 ) -> None :
163+ """
164+ Writes latest feature values in the specified time range to the online store.
165+
166+ Args:
167+ config: The config for the current feature store.
168+ feature_view: The feature view to materialize.
169+ start_date: The start of the time range.
170+ end_date: The end of the time range.
171+ registry: The registry for the current feature store.
172+ project: Feast project to which the objects belong.
173+ tqdm_builder: A function to monitor the progress of materialization.
174+ """
151175 pass
152176
153- @abc . abstractmethod
177+ @abstractmethod
154178 def get_historical_features (
155179 self ,
156180 config : RepoConfig ,
@@ -161,9 +185,28 @@ def get_historical_features(
161185 project : str ,
162186 full_feature_names : bool ,
163187 ) -> RetrievalJob :
188+ """
189+ Retrieves the point-in-time correct historical feature values for the specified entity rows.
190+
191+ Args:
192+ config: The config for the current feature store.
193+ feature_views: A list containing all feature views that are referenced in the entity rows.
194+ feature_refs: The features to be retrieved.
195+ entity_df: A collection of rows containing all entity columns on which features need to be joined,
196+ as well as the timestamp column used for point-in-time joins. Either a pandas dataframe can be
197+ provided or a SQL query.
198+ registry: The registry for the current feature store.
199+ project: Feast project to which the feature views belong.
200+ full_feature_names: If True, feature names will be prefixed with the corresponding feature view name,
201+ changing them from the format "feature" to "feature_view__feature" (e.g. "daily_transactions"
202+ changes to "customer_fv__daily_transactions").
203+
204+ Returns:
205+ A RetrievalJob that can be executed to get the features.
206+ """
164207 pass
165208
166- @abc . abstractmethod
209+ @abstractmethod
167210 def online_read (
168211 self ,
169212 config : RepoConfig ,
@@ -172,32 +215,38 @@ def online_read(
172215 requested_features : List [str ] = None ,
173216 ) -> List [Tuple [Optional [datetime ], Optional [Dict [str , ValueProto ]]]]:
174217 """
175- Read feature values given an Entity Key. This is a low level interface, not
176- expected to be used by the users directly.
218+ Reads features values for the given entity keys.
219+
220+ Args:
221+ config: The config for the current feature store.
222+ table: The feature view whose feature values should be read.
223+ entity_keys: The list of entity keys for which feature values should be read.
224+ requested_features: The list of features that should be read.
177225
178226 Returns:
179- Data is returned as a list, one item per entity key . Each item in the list is a tuple
180- of event_ts for the row, and the feature data as a dict from feature names to values.
181- Values are returned as Value proto message .
227+ A list of the same length as entity_keys . Each item in the list is a tuple where the first
228+ item is the event timestamp for the row, and the second item is a dict mapping feature names
229+ to values, which are returned in proto format .
182230 """
183- ...
231+ pass
184232
185- @abc . abstractmethod
233+ @abstractmethod
186234 def retrieve_saved_dataset (
187235 self , config : RepoConfig , dataset : SavedDataset
188236 ) -> RetrievalJob :
189237 """
190- Read saved dataset from offline store.
191- All parameters for retrieval (like path, datetime boundaries, column names for both keys and features, etc)
192- are determined from SavedDataset object.
238+ Reads a saved dataset.
193239
194- Returns:
195- RetrievalJob object, which is lazy wrapper for actual query performed under the hood.
240+ Args:
241+ config: The config for the current feature store.
242+ dataset: A SavedDataset object containing all parameters necessary for retrieving the dataset.
196243
244+ Returns:
245+ A RetrievalJob that can be executed to get the saved dataset.
197246 """
198- ...
247+ pass
199248
200- @abc . abstractmethod
249+ @abstractmethod
201250 def write_feature_service_logs (
202251 self ,
203252 feature_service : FeatureService ,
@@ -206,16 +255,20 @@ def write_feature_service_logs(
206255 registry : BaseRegistry ,
207256 ):
208257 """
209- Write features and entities logged by a feature server to an offline store.
258+ Writes features and entities logged by a feature server to the offline store.
210259
211- Schema of logs table is being inferred from the provided feature service.
212- Only feature services with configured logging are accepted.
260+ The schema of the logs table is inferred from the specified feature service. Only feature
261+ services with configured logging are accepted.
213262
214- Logs dataset can be passed as Arrow Table or path to parquet directory.
263+ Args:
264+ feature_service: The feature service to be logged.
265+ logs: The logs, either as an arrow table or as a path to a parquet directory.
266+ config: The config for the current feature store.
267+ registry: The registry for the current feature store.
215268 """
216- ...
269+ pass
217270
218- @abc . abstractmethod
271+ @abstractmethod
219272 def retrieve_feature_service_logs (
220273 self ,
221274 feature_service : FeatureService ,
@@ -225,14 +278,19 @@ def retrieve_feature_service_logs(
225278 registry : BaseRegistry ,
226279 ) -> RetrievalJob :
227280 """
228- Read logged features from an offline store for a given time window [from, to).
229- Target table is determined based on logging configuration from the feature service.
281+ Reads logged features for the specified time window.
230282
231- Returns:
232- RetrievalJob object, which wraps the query to the offline store.
283+ Args:
284+ feature_service: The feature service whose logs should be retrieved.
285+ start_date: The start of the window.
286+ end_date: The end of the window.
287+ config: The config for the current feature store.
288+ registry: The registry for the current feature store.
233289
290+ Returns:
291+ A RetrievalJob that can be executed to get the feature service logs.
234292 """
235- ...
293+ pass
236294
237295 def get_feature_server_endpoint (self ) -> Optional [str ]:
238296 """Returns endpoint for the feature server, if it exists."""
0 commit comments