JunDSinfo
diff --git a/‎sdk/python/feast/data_format.py‎
Lines changed: 2 additions & 0 deletions b/‎sdk/python/feast/data_format.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎sdk/python/feast/data_source.py‎
Lines changed: 78 additions & 10 deletions b/‎sdk/python/feast/data_source.py‎
Lines changed: 78 additions & 10 deletions
diff --git a/‎sdk/python/feast/feature_store.py‎
Lines changed: 122 additions & 1 deletion b/‎sdk/python/feast/feature_store.py‎
Lines changed: 122 additions & 1 deletion
diff --git a/‎sdk/python/feast/feature_view.py‎
Lines changed: 16 additions & 13 deletions b/‎sdk/python/feast/feature_view.py‎
Lines changed: 16 additions & 13 deletions
@@ -43,6 +43,8 @@ def from_proto(cls, proto):
         fmt = proto.WhichOneof("format")
         if fmt == "parquet_format":
             return ParquetFormat()
+        if fmt is None:
+            return None
         raise NotImplementedError(f"FileFormat is unsupported: {fmt}")
 
     def __str__(self):
 
@@ -38,7 +38,7 @@ class FileOptions:
     """
 
     def __init__(
-        self, file_format: FileFormat, file_url: str,
+        self, file_format: Optional[FileFormat], file_url: Optional[str],
     ):
         self._file_format = file_format
         self._file_url = file_url
@@ -97,7 +97,10 @@ def to_proto(self) -> DataSourceProto.FileOptions:
         """
 
         file_options_proto = DataSourceProto.FileOptions(
-            file_format=self.file_format.to_proto(), file_url=self.file_url,
+            file_format=(
+                None if self.file_format is None else self.file_format.to_proto()
+            ),
+            file_url=self.file_url,
         )
 
         return file_options_proto
@@ -108,10 +111,23 @@ class BigQueryOptions:
     DataSource BigQuery options used to source features from BigQuery query
     """
 
-    def __init__(
-        self, table_ref: str,
-    ):
+    def __init__(self, table_ref: Optional[str], query: Optional[str]):
         self._table_ref = table_ref
+        self._query = query
+
+    @property
+    def query(self):
+        """
+        Returns the BigQuery SQL query referenced by this source
+        """
+        return self._query
+
+    @query.setter
+    def query(self, query):
+        """
+        Sets the BigQuery SQL query referenced by this source
+        """
+        self._query = query
 
     @property
     def table_ref(self):
@@ -139,7 +155,10 @@ def from_proto(cls, bigquery_options_proto: DataSourceProto.BigQueryOptions):
             Returns a BigQueryOptions object based on the bigquery_options protobuf
         """
 
-        bigquery_options = cls(table_ref=bigquery_options_proto.table_ref,)
+        bigquery_options = cls(
+            table_ref=bigquery_options_proto.table_ref,
+            query=bigquery_options_proto.query,
+        )
 
         return bigquery_options
 
@@ -498,18 +517,48 @@ class FileSource(DataSource):
     def __init__(
         self,
         event_timestamp_column: str,
-        file_format: FileFormat,
-        file_url: str,
+        file_url: Optional[str] = None,
+        path: Optional[str] = None,
+        file_format: FileFormat = None,
         created_timestamp_column: Optional[str] = "",
         field_mapping: Optional[Dict[str, str]] = None,
         date_partition_column: Optional[str] = "",
     ):
+        """Create a FileSource from a file containing feature data. Only Parquet format supported.
+
+        Args:
+
+            path: File path to file containing feature data. Must contain an event_timestamp column, entity columns and
+                feature columns.
+            event_timestamp_column: Event timestamp column used for point in time joins of feature values.
+            created_timestamp_column (optional): Timestamp column when row was created, used for deduplicating rows.
+            file_url: [Deprecated] Please see path
+            file_format (optional): Explicitly set the file format. Allows Feast to bypass inferring the file format.
+            field_mapping: A dictionary mapping of column names in this data source to feature names in a feature table
+                or view. Only used for feature columns, not entities or timestamp columns.
+
+        Examples:
+            >>> FileSource(path="/data/my_features.parquet", event_timestamp_column="datetime")
+        """
         super().__init__(
             event_timestamp_column,
             created_timestamp_column,
             field_mapping,
             date_partition_column,
         )
+        if path is None and file_url is None:
+            raise ValueError(
+                'No "path" argument provided. Please set "path" to the location of your file source.'
+            )
+
+        if file_url is not None:
+            from warnings import warn
+
+            warn(
+                'Argument "file_url" is being deprecated. Please use the "path" argument.'
+            )
+        else:
+            file_url = path
         self._file_options = FileOptions(file_format=file_format, file_url=file_url)
 
     def __eq__(self, other):
@@ -537,6 +586,13 @@ def file_options(self, file_options):
         """
         self._file_options = file_options
 
+    @property
+    def path(self):
+        """
+        Returns the file path of this feature data source
+        """
+        return self._file_options.file_url
+
     def to_proto(self) -> DataSourceProto:
         data_source_proto = DataSourceProto(
             type=DataSourceProto.BATCH_FILE,
@@ -555,18 +611,19 @@ class BigQuerySource(DataSource):
     def __init__(
         self,
         event_timestamp_column: str,
-        table_ref: str,
+        table_ref: Optional[str] = None,
         created_timestamp_column: Optional[str] = "",
         field_mapping: Optional[Dict[str, str]] = None,
         date_partition_column: Optional[str] = "",
+        query: Optional[str] = None,
     ):
         super().__init__(
             event_timestamp_column,
             created_timestamp_column,
             field_mapping,
             date_partition_column,
         )
-        self._bigquery_options = BigQueryOptions(table_ref=table_ref,)
+        self._bigquery_options = BigQueryOptions(table_ref=table_ref, query=query)
 
     def __eq__(self, other):
         if not isinstance(other, BigQuerySource):
@@ -583,6 +640,10 @@ def __eq__(self, other):
     def table_ref(self):
         return self._bigquery_options.table_ref
 
+    @property
+    def query(self):
+        return self._bigquery_options.query
+
     @property
     def bigquery_options(self):
         """
@@ -610,6 +671,13 @@ def to_proto(self) -> DataSourceProto:
 
         return data_source_proto
 
+    def get_table_query_string(self) -> str:
+        """Returns a string that can directly be used to reference this table in SQL"""
+        if self.table_ref is not None:
+            return f"`{self.table_ref}`"
+        else:
+            return f"({self.query})"
+
 
 class KafkaSource(DataSource):
     def __init__(
 
@@ -12,9 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from pathlib import Path
-from typing import Optional
+from typing import List, Optional, Union
 
+import pandas as pd
+
+from feast.entity import Entity
+from feast.feature_view import FeatureView
 from feast.infra.provider import Provider, get_provider
+from feast.offline_store import RetrievalJob, get_offline_store_for_retrieval
 from feast.registry import Registry
 from feast.repo_config import (
     LocalOnlineStoreConfig,
@@ -55,3 +60,119 @@ def _get_provider(self) -> Provider:
 
     def _get_registry(self) -> Registry:
         return Registry(self.config.metadata_store)
+
+    def apply(self, objects: List[Union[FeatureView, Entity]]):
+        """Register objects to metadata store and update related infrastructure.
+
+        The apply method registers one or more definitions (e.g., Entity, FeatureView) and registers or updates these
+        objects in the Feast registry. Once the registry has been updated, the apply method will update related
+        infrastructure (e.g., create tables in an online store) in order to reflect these new definitions. All
+        operations are idempotent, meaning they can safely be rerun.
+
+        Args: objects (List[Union[FeatureView, Entity]]): A list of FeatureView or Entity objects that should be
+            registered
+
+        Examples:
+            Register a single Entity and FeatureView.
+            >>> from feast.feature_store import FeatureStore
+            >>> from feast import Entity, FeatureView, Feature, ValueType, FileSource
+            >>> from datetime import timedelta
+            >>>
+            >>> fs = FeatureStore()
+            >>> customer_entity = Entity(name="customer", value_type=ValueType.INT64, description="customer entity")
+            >>> customer_feature_view = FeatureView(
+            >>>     name="customer_fv",
+            >>>     entities=["customer"],
+            >>>     features=[Feature(name="age", dtype=ValueType.INT64)],
+            >>>     input=FileSource(path="file.parquet", event_timestamp_column="timestamp"),
+            >>>     ttl=timedelta(days=1)
+            >>> )
+            >>> fs.apply([customer_entity, customer_feature_view])
+        """
+
+        # TODO: Add locking
+        # TODO: Optimize by only making a single call (read/write)
+        # TODO: Add infra update operation (currently we are just writing to registry)
+        registry = self._get_registry()
+        for ob in objects:
+            if isinstance(ob, FeatureView):
+                registry.apply_feature_view(ob, project=self.config.project)
+            elif isinstance(ob, Entity):
+                registry.apply_entity(ob, project=self.config.project)
+            else:
+                raise ValueError(
+                    f"Unknown object type ({type(ob)}) provided as part of apply() call"
+                )
+
+    def get_historical_features(
+        self, entity_df: Union[pd.DataFrame, str], feature_refs: List[str],
+    ) -> RetrievalJob:
+        """Enrich an entity dataframe with historical feature values for either training or batch scoring.
+
+        This method joins historical feature data from one or more feature views to an entity dataframe by using a time
+        travel join.
+
+        Each feature view is joined to the entity dataframe using all entities configured for the respective feature
+        view. All configured entities must be available in the entity dataframe. Therefore, the entity dataframe must
+        contain all entities found in all feature views, but the individual feature views can have different entities.
+
+        Time travel is based on the configured TTL for each feature view. A shorter TTL will limit the
+        amount of scanning that will be done in order to find feature data for a specific entity key. Setting a short
+        TTL may result in null values being returned.
+
+        Args:
+            entity_df (Union[pd.DataFrame, str]): An entity dataframe is a collection of rows containing all entity
+                columns (e.g., customer_id, driver_id) on which features need to be joined, as well as a event_timestamp
+                column used to ensure point-in-time correctness. Either a Pandas DataFrame can be provided or a string
+                SQL query. The query must be of a format supported by the configured offline store (e.g., BigQuery)
+            feature_refs: A list of features that should be retrieved from the offline store. Feature references are of
+                the format "feature_view:feature", e.g., "customer_fv:daily_transactions".
+
+        Returns:
+            RetrievalJob which can be used to materialize the results.
+
+        Examples:
+            Retrieve historical features using a BigQuery SQL entity dataframe
+            >>> from feast.feature_store import FeatureStore
+            >>>
+            >>> fs = FeatureStore(config=RepoConfig(provider="gcp"))
+            >>> retrieval_job = fs.get_historical_features(
+            >>>     entity_df="SELECT event_timestamp, order_id, customer_id from gcp_project.my_ds.customer_orders",
+            >>>     feature_refs=["customer:age", "customer:avg_orders_1d", "customer:avg_orders_7d"]
+            >>> )
+            >>> feature_data = job.to_df()
+            >>> model.fit(feature_data) # insert your modeling framework here.
+        """
+
+        registry = self._get_registry()
+        all_feature_views = registry.list_feature_views(project=self.config.project)
+        feature_views = _get_requested_feature_views(feature_refs, all_feature_views)
+        offline_store = get_offline_store_for_retrieval(feature_views)
+        job = offline_store.get_historical_features(
+            self.config, feature_views, feature_refs, entity_df
+        )
+        return job
+
+
+def _get_requested_feature_views(
+    feature_refs: List[str], all_feature_views: List[FeatureView]
+) -> List[FeatureView]:
+    """Get list of feature views based on feature references"""
+
+    feature_views_dict = {}
+    for ref in feature_refs:
+        ref_parts = ref.split(":")
+        found = False
+        for feature_view in all_feature_views:
+            if feature_view.name == ref_parts[0]:
+                found = True
+                feature_views_dict[feature_view.name] = feature_view
+                continue
+
+        if not found:
+            raise ValueError(f"Could not find feature view from reference {ref}")
+    feature_views_list = []
+    for view in feature_views_dict.values():
+        feature_views_list.append(view)
+
+    return feature_views_list
@@ -20,23 +20,23 @@
 from feast.core.FeatureView_pb2 import FeatureView as FeatureViewProto
 from feast.core.FeatureView_pb2 import FeatureViewMeta as FeatureViewMetaProto
 from feast.core.FeatureView_pb2 import FeatureViewSpec as FeatureViewSpecProto
-from feast.data_source import BigQuerySource, DataSource
+from feast.data_source import BigQuerySource, DataSource, FileSource
 from feast.feature import Feature
 from feast.value_type import ValueType
 
 
 class FeatureView:
     """
-    A FeatureView defines a logical grouping of servable features.
+    A FeatureView defines a logical grouping of serveable features.
     """
 
     name: str
     entities: List[str]
     features: List[Feature]
-    tags: Dict[str, str]
-    ttl: Optional[Duration]
+    tags: Optional[Dict[str, str]]
+    ttl: Optional[timedelta]
     online: bool
-    input: BigQuerySource
+    input: Union[BigQuerySource, FileSource]
 
     created_timestamp: Optional[Timestamp] = None
     last_updated_timestamp: Optional[Timestamp] = None
@@ -46,10 +46,10 @@ def __init__(
         name: str,
         entities: List[str],
         features: List[Feature],
-        tags: Dict[str, str],
         ttl: Optional[Union[Duration, timedelta]],
-        online: bool,
-        input: BigQuerySource,
+        input: Union[BigQuerySource, FileSource],
+        tags: Optional[Dict[str, str]] = None,
+        online: bool = True,
     ):
         cols = [entity for entity in entities] + [feat.name for feat in features]
         for col in cols:
@@ -62,10 +62,9 @@ def __init__(
         self.entities = entities
         self.features = features
         self.tags = tags
-        if isinstance(ttl, timedelta):
-            proto_ttl = Duration()
-            proto_ttl.FromTimedelta(ttl)
-            self.ttl = proto_ttl
+
+        if isinstance(ttl, Duration):
+            self.ttl = timedelta(seconds=int(ttl.seconds))
         else:
             self.ttl = ttl
 
@@ -97,12 +96,16 @@ def to_proto(self) -> FeatureViewProto:
             last_updated_timestamp=self.last_updated_timestamp,
         )
 
+        if self.ttl is not None:
+            ttl_duration = Duration()
+            ttl_duration.FromTimedelta(self.ttl)
+
         spec = FeatureViewSpecProto(
             name=self.name,
             entities=self.entities,
             features=[feature.to_proto() for feature in self.features],
             tags=self.tags,
-            ttl=self.ttl,
+            ttl=(ttl_duration if ttl_duration is not None else None),
             online=self.online,
             input=self.input.to_proto(),
         )