Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix spark_kafka_processor typing errors
Signed-off-by: Chester Ong <chester.ong.ch@gmail.com>
  • Loading branch information
bushwhackr committed Feb 16, 2024
commit 04de64ce7033108d33844442c2561400c2b45f06
11 changes: 3 additions & 8 deletions sdk/python/feast/infra/contrib/spark_kafka_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pyspark.sql import DataFrame, SparkSession
from pyspark.sql.avro.functions import from_avro
from pyspark.sql.functions import col, from_json
from pyspark.sql.streaming import StreamingQuery

from feast.data_format import AvroFormat, JsonFormat
from feast.data_source import KafkaSource, PushMode
Expand Down Expand Up @@ -68,13 +67,10 @@ def __init__(
# data_source type has been checked to be an instance of KafkaSource.
self.data_source: KafkaSource = self.data_source # type: ignore

def ingest_stream_feature_view(
self, to: PushMode = PushMode.ONLINE
) -> StreamingQuery:
def ingest_stream_feature_view(self, to: PushMode = PushMode.ONLINE) -> None:
Comment thread
sudohainguyen marked this conversation as resolved.
Outdated
ingested_stream_df = self._ingest_stream_data()
transformed_df = self._construct_transformation_plan(ingested_stream_df)
online_store_query = self._write_stream_data(transformed_df, to)
return online_store_query
self._write_stream_data(transformed_df, to)

# In the line 64 of __init__(), the "data_source" is assigned a stream_source (and has to be KafkaSource as in line 40).
@no_type_check
Expand Down Expand Up @@ -131,7 +127,7 @@ def _ingest_stream_data(self) -> StreamTable:
def _construct_transformation_plan(self, df: StreamTable) -> StreamTable:
return self.sfv.udf.__call__(df) if self.sfv.udf else df

def _write_stream_data(self, df: StreamTable, to: PushMode) -> StreamingQuery:
def _write_stream_data(self, df: StreamTable, to: PushMode) -> None:
# Validation occurs at the fs.write_to_online_store() phase against the stream feature view schema.
def batch_write(row: DataFrame, batch_id: int):
rows: pd.DataFrame = row.toPandas()
Expand Down Expand Up @@ -170,4 +166,3 @@ def batch_write(row: DataFrame, batch_id: int):
)

query.awaitTermination(timeout=self.query_timeout)
return query