Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
e1e210d
Broken state
kevjumba Aug 5, 2022
011d1e0
working state
kevjumba Aug 10, 2022
a6a2fce
Fix the lint issues
kevjumba Aug 10, 2022
57b63bb
Semi working state
kevjumba Aug 10, 2022
ae7ed8a
Fix
kevjumba Aug 10, 2022
421645b
Fremove print
kevjumba Aug 10, 2022
07fece5
Fix lint
kevjumba Aug 11, 2022
4062031
Run build-sphinx
kevjumba Aug 11, 2022
cb39329
Add tutorials
kevjumba Aug 11, 2022
554ca1a
Fix
kevjumba Aug 11, 2022
4a969e7
Fix?
kevjumba Aug 11, 2022
116320a
Fix lint
kevjumba Aug 11, 2022
c0b16ef
Fix
kevjumba Aug 11, 2022
44d09d0
Fix lint
kevjumba Aug 12, 2022
b6f0a79
Begin configuring tests
adchia Aug 15, 2022
2b2ff40
Fix
kevjumba Aug 15, 2022
4616366
Working version
kevjumba Aug 16, 2022
c7d9852
Fix
kevjumba Aug 17, 2022
d2e290b
Fix
kevjumba Aug 17, 2022
a726a9a
Fix
kevjumba Aug 17, 2022
32992e3
Fix lint
kevjumba Aug 17, 2022
ebb934b
Fix lint
kevjumba Aug 17, 2022
e456acb
Fix
kevjumba Aug 17, 2022
45f479f
Fix lint
kevjumba Aug 17, 2022
4b8c4a2
Fix
kevjumba Aug 17, 2022
b1bf602
Fix
kevjumba Aug 17, 2022
4586f00
Fix azure
kevjumba Aug 17, 2022
3b88c0b
Fix
kevjumba Aug 17, 2022
9ae8ee3
Fix
kevjumba Aug 17, 2022
1b12e4a
Fix lint and address issues
kevjumba Aug 18, 2022
0ca5048
Fix integration tests
kevjumba Aug 18, 2022
883f314
Fix
kevjumba Aug 18, 2022
ccf8716
Fix lint and address issues
kevjumba Aug 18, 2022
f05288e
Fix
kevjumba Aug 18, 2022
ee30e73
Fix
kevjumba Aug 18, 2022
ab17db9
Fix
kevjumba Aug 18, 2022
be162f5
Revert
kevjumba Aug 18, 2022
f5aa476
Fix
kevjumba Aug 18, 2022
4423dfa
Fix
kevjumba Aug 18, 2022
5806507
Fix
kevjumba Aug 18, 2022
7a4d055
Fix lint
kevjumba Aug 19, 2022
78b74b1
Fix
kevjumba Aug 19, 2022
a9e8119
Fix lint
kevjumba Aug 19, 2022
1341e3e
Fix pyarrow
kevjumba Aug 19, 2022
3d42093
Fix lint
kevjumba Aug 19, 2022
1c591f0
add requirements files
adchia Aug 19, 2022
b4da607
fix name of docs
adchia Aug 19, 2022
c3a0423
fix offline store readme
adchia Aug 19, 2022
576b57e
fix offline store readme
adchia Aug 19, 2022
69940ac
fix
adchia Aug 19, 2022
516ff76
fix
adchia Aug 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix lint
Signed-off-by: Kevin Zhang <kzhang@tecton.ai>
  • Loading branch information
kevjumba committed Aug 19, 2022
commit 32992e364fe96665e178d0c86a69fb51d0da9bbd
6 changes: 5 additions & 1 deletion sdk/python/feast/infra/contrib/azure_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@
from feast.registry import BaseRegistry
from feast.repo_config import RepoConfig
from feast.saved_dataset import SavedDataset
from feast.infra.passthrough_provider import PassthroughProvider

from feast.utils import (
_convert_arrow_to_proto,
_get_column_names,
_run_pyarrow_field_mapping,
make_tzaware,
)

# TODO: Refactor the provider code.
DEFAULT_BATCH_SIZE = 10_000


class AzureProvider(PassthroughProvider):
def materialize_single_feature_view(
Comment thread
adchia marked this conversation as resolved.
Outdated
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

import numpy as np
import pandas
import pyarrow as pa
import pyarrow
import pyarrow as pa
import sqlalchemy
from pydantic.types import StrictStr
from pydantic.typing import Literal
Expand All @@ -29,14 +29,14 @@
build_point_in_time_query,
get_feature_view_query_context,
)
from feast.type_map import pa_to_mssql_type

from feast.infra.provider import RetrievalJob
from feast.on_demand_feature_view import OnDemandFeatureView
from feast.registry import BaseRegistry
from feast.repo_config import FeastBaseModel, RepoConfig
from feast.saved_dataset import SavedDatasetStorage
from feast.type_map import pa_to_mssql_type
from feast.usage import log_exceptions_and_usage

# Make sure warning doesn't raise more than once.
warnings.simplefilter("once", RuntimeWarning)

Expand Down Expand Up @@ -176,7 +176,9 @@ def get_historical_features(
expected_join_keys = _get_join_keys(project, feature_views, registry)
assert isinstance(config.offline_store, MsSqlServerOfflineStoreConfig)
engine = make_engine(config.offline_store)
entity_df["event_timestamp"] = pandas.to_datetime(entity_df["event_timestamp"], utc=True).fillna(pandas.Timestamp.now())
entity_df["event_timestamp"] = pandas.to_datetime(
entity_df["event_timestamp"], utc=True
).fillna(pandas.Timestamp.now())

(
table_schema,
Expand Down Expand Up @@ -337,7 +339,7 @@ def full_feature_names(self) -> bool:
@property
def on_demand_feature_views(self) -> List[OnDemandFeatureView]:
return self._on_demand_feature_views

def _to_df_internal(self) -> pandas.DataFrame:
return pandas.read_sql(self.query, con=self.engine).fillna(value=np.nan)

Expand Down Expand Up @@ -405,17 +407,18 @@ def _upload_entity_df_into_sqlserver_and_get_entity_schema(
elif isinstance(entity_df, pandas.DataFrame):
# Drop the index so that we don't have unnecessary columns
engine.execute(_df_to_create_table_sql(entity_df, table_id))
entity_df.to_sql(name=table_id, con=engine, index=False, if_exists='append')
entity_df.to_sql(name=table_id, con=engine, index=False, if_exists="append")
entity_schema = dict(zip(entity_df.columns, entity_df.dtypes)), table_id

else:
raise ValueError(
f"The entity dataframe you have provided must be a SQL Server SQL query,"
f" or a Pandas dataframe. But we found: {type(entity_df)} "
)
)

return entity_schema


def _df_to_create_table_sql(df: pandas.DataFrame, table_name: str) -> str:
pa_table = pa.Table.from_pandas(df)

Expand All @@ -427,6 +430,7 @@ def _df_to_create_table_sql(df: pandas.DataFrame, table_name: str) -> str:
);
"""


def _get_entity_df_event_timestamp_range(
entity_df: Union[pandas.DataFrame, str],
entity_df_event_timestamp_col: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pytest
from sqlalchemy import create_engine
from testcontainers.core.container import DockerContainer
from testcontainers.mssql import SqlServerContainer
from testcontainers.core.waiting_utils import wait_for_logs
from testcontainers.mssql import SqlServerContainer

from feast.data_source import DataSource
from feast.infra.offline_stores.contrib.mssql_offline_store.mssql import (
Expand Down Expand Up @@ -54,8 +54,6 @@ def _df_to_create_table_sql(df: pd.DataFrame, table_name: str) -> str:
"""




class MsSqlDataSourceCreator(DataSourceCreator):
tables: List[str] = []

Expand All @@ -65,9 +63,9 @@ def __init__(
super().__init__(project_name)
self.tables_created: List[str] = []
self.container = SqlServerContainer(user=MSSQL_USER, password=MSSQL_PASSWORD)
#self.container = fixture_request.getfixturevalue("mssql_container")
# self.container = fixture_request.getfixturevalue("mssql_container")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove all the commented line here?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed

self.container.start()
#self.exposed_port = self.container.get_exposed_port("1433")
# self.exposed_port = self.container.get_exposed_port("1433")
if not self.container:
raise RuntimeError(
"In order to use this data source "
Expand All @@ -94,20 +92,24 @@ def create_data_source(
**kwargs,
) -> DataSource:
if timestamp_field in df:
df[timestamp_field] = pd.to_datetime(df[timestamp_field], utc=True).fillna(pd.Timestamp.now())
df[timestamp_field] = pd.to_datetime(df[timestamp_field], utc=True).fillna(
pd.Timestamp.now()
)
# Make sure the field mapping is correct and convert the datetime datasources.
if created_timestamp_column in df:
df[created_timestamp_column] = pd.to_datetime(df[created_timestamp_column], utc=True).fillna(pd.Timestamp.now())

df[created_timestamp_column] = pd.to_datetime(
df[created_timestamp_column], utc=True
).fillna(pd.Timestamp.now())

connection_string = self.create_offline_store_config().connection_string
engine = create_engine(connection_string)
# Create table

destination_name = self.get_prefixed_table_name(destination_name)
#_df_to_create_table_sql(df, destination_name)
# _df_to_create_table_sql(df, destination_name)
engine.execute(_df_to_create_table_sql(df, destination_name))
# Upload dataframe to azure table
df.to_sql(destination_name, engine, index=False, if_exists='append')
df.to_sql(destination_name, engine, index=False, if_exists="append")
self.tables.append(destination_name)
return MsSqlServerSource(
name="ci_mssql_source",
Expand Down
2 changes: 1 addition & 1 deletion sdk/python/tests/utils/feature_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def validate_dataframes(expected_df, actual_df, keys, event_timestamp=None):
.drop_duplicates()
.reset_index(drop=True)
)

new_df = expected_df.drop("event_timestamp", axis=1)
new_actual_df = actual_df.drop("event_timestamp", axis=1)
keys = keys.remove("event_timestamp")
Expand Down