Working version

Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
feast-dev · feast-ci-bot · Aug 19, 2022 · Aug 5, 2022 · Aug 10, 2022 · Aug 10, 2022
commit 4616366bcef7bc074d8368afeafbeb737f954899
@@ -37,7 +37,7 @@
 class AzureProvider(Provider):
     def __init__(self, config: RepoConfig):
         warnings.warn(
-            "The azure provider  is an experimental feature in alpha development. "
+            "The azure provider is an experimental feature in alpha development. "
             "Some functionality may still be unstable so functionality can change in the future.",
             RuntimeWarning,
         )

@@ -14,6 +14,7 @@
 from sqlalchemy import create_engine
 from sqlalchemy.engine import Engine
 from sqlalchemy.orm import sessionmaker
+from sqlalchemy.dialects.mssql import DATETIME2
 
 from feast import FileSource, errors
 from feast.data_source import DataSource
@@ -185,6 +186,7 @@ def get_historical_features(
         entity_df_event_timestamp_col = (
             offline_utils.infer_event_timestamp_from_entity_df(table_schema)
         )
+
         _assert_expected_columns_in_sqlserver(
             expected_join_keys,
             entity_df_event_timestamp_col,
@@ -407,7 +409,7 @@ def _upload_entity_df_into_sqlserver_and_get_entity_schema(
         raise ValueError(
             f"The entity dataframe you have provided must be a SQL Server SQL query,"
             f" or a Pandas dataframe. But we found: {type(entity_df)} "
-        )
+    )
 
     return entity_schema
 
@@ -601,21 +603,16 @@ def _get_entity_df_event_timestamp_range(
  The entity_dataframe dataset being our source of truth here.
  */
 
-SELECT entity_dataframe.*
-{% for featureview in featureviews %}
-    {% for feature in featureview.features %}
-            ,{% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %}
-    {% endfor %}
-{% endfor %}
+SELECT {{ final_output_feature_names | join(', ')}}
 FROM entity_dataframe
 {% for featureview in featureviews %}
 LEFT JOIN (
     SELECT
         {{featureview.name}}__entity_row_unique_id
         {% for feature in featureview.features %}
-            ,{% if full_feature_names %}{{ featureview.name }}__{{feature}}{% else %}{{ feature }}{% endif %}
+            ,{% if full_feature_names %}{{ featureview.name }}__{{featureview.field_mapping.get(feature, feature)}}{% else %}{{ featureview.field_mapping.get(feature, feature) }}{% endif %}
         {% endfor %}
-    FROM {{ featureview.name }}__cleaned
+    FROM "{{ featureview.name }}__cleaned"
 ) {{ featureview.name }}__cleaned
 ON
 {{ featureview.name }}__cleaned.{{ featureview.name }}__entity_row_unique_id = entity_dataframe.{{ featureview.name }}__entity_row_unique_id

@@ -5,6 +5,7 @@
 import pytest
 from sqlalchemy import create_engine
 from testcontainers.core.container import DockerContainer
+from testcontainers.mssql import SqlServerContainer
 from testcontainers.core.waiting_utils import wait_for_logs
 
 from feast.data_source import DataSource
@@ -24,6 +25,7 @@
 MSSQL_PASSWORD = "yourStrong(!)Password"
 
 
+# This is the sql container to use if your machine doesn't support the official msql docker container.
 @pytest.fixture(scope="session")
 def mssql_container():
     container = (
@@ -43,6 +45,7 @@ def mssql_container():
 
 def _df_to_create_table_sql(df: pd.DataFrame, table_name: str) -> str:
     pa_table = pa.Table.from_pandas(df)
+
     columns = [f""""{f.name}" {pa_to_mssql_type(f.type)}""" for f in pa_table.schema]
     return f"""
         CREATE TABLE "{table_name}" (
@@ -51,6 +54,8 @@ def _df_to_create_table_sql(df: pd.DataFrame, table_name: str) -> str:
         """
 
 
+
+
 class MsSqlDataSourceCreator(DataSourceCreator):
     tables: List[str] = []
 
@@ -59,8 +64,10 @@ def __init__(
     ):
         super().__init__(project_name)
         self.tables_created: List[str] = []
-        self.container = fixture_request.getfixturevalue("mssql_container")
-        self.exposed_port = self.container.get_exposed_port("1433")
+        self.container = SqlServerContainer(user=MSSQL_USER, password=MSSQL_PASSWORD)
+        #self.container = fixture_request.getfixturevalue("mssql_container")
+        self.container.start()
+        #self.exposed_port = self.container.get_exposed_port("1433")
         if not self.container:
             raise RuntimeError(
                 "In order to use this data source "
@@ -70,10 +77,11 @@ def __init__(
 
     def create_offline_store_config(self) -> MsSqlServerOfflineStoreConfig:
         return MsSqlServerOfflineStoreConfig(
-            connection_string=(
-                f"mssql+pyodbc://{MSSQL_USER}:{MSSQL_PASSWORD}@0.0.0.0:1433/master?"
-                "driver=ODBC+Driver+17+for+SQL+Server"
-            )
+            connection_string=self.container.get_connection_url(),
+            # connection_string=(
+            #     f"mssql+pyodbc://{MSSQL_USER}:{MSSQL_PASSWORD}@0.0.0.0:1433/master?"
+            #     "driver=ODBC+Driver+17+for+SQL+Server"
+            # )
         )
 
     def create_data_source(
@@ -85,33 +93,30 @@ def create_data_source(
         field_mapping: Dict[str, str] = None,
         **kwargs,
     ) -> DataSource:
-        if timestamp_field in df:
-            df[timestamp_field] = pd.to_datetime(df[timestamp_field], utc=True)
-            # Make sure the field mapping is correct and convert the datetime datasources.
-
-        if field_mapping:
-            timestamp_mapping = {value: key for key, value in field_mapping.items()}
-            if (
-                timestamp_field in timestamp_mapping
-                and timestamp_mapping[timestamp_field] in df
-            ):
-                col = timestamp_mapping[timestamp_field]
-                df[col] = pd.to_datetime(df[col], utc=True)
+        # if timestamp_field in df:
+        #     df[timestamp_field] = pd.to_datetime(df[timestamp_field], utc=True).fillna(pd.Timestamp.now()) #.dt.tz_localize(None)
+        #     # Make sure the field mapping is correct and convert the datetime datasources.
+        # if created_timestamp_column in df:
+        #     df[created_timestamp_column] = pd.to_datetime(df[created_timestamp_column], utc=True).fillna(pd.Timestamp.now()) #.dt.tz_localize(None)
+
         connection_string = self.create_offline_store_config().connection_string
         engine = create_engine(connection_string)
         # Create table
+
         destination_name = self.get_prefixed_table_name(destination_name)
+        #_df_to_create_table_sql(df, destination_name)
         engine.execute(_df_to_create_table_sql(df, destination_name))
         # Upload dataframe to azure table
-        # TODO
+        df.to_sql(destination_name, engine, index=False, if_exists='append')
+        #, dtype={timestamp_field: DATETIME2(), created_timestamp_column: DATETIME2()}
         self.tables.append(destination_name)
         return MsSqlServerSource(
             name="ci_mssql_source",
             connection_str=connection_string,
             table_ref=destination_name,
             event_timestamp_column=timestamp_field,
             created_timestamp_column=created_timestamp_column,
-            field_mapping=field_mapping,
+            field_mapping=field_mapping or {"ts_1": "ts"},
         )
 
     def create_saved_dataset_destination(self) -> SavedDatasetStorage: