Skip to content

Commit fc34981

Browse files
jbvaningenachalskevjumba
authored
test: Fix several integration tests for SparkOfflineStore plugin (#2516)
* Moved PR changes to another branch Signed-off-by: Joost van Ingen <joostingen@gmail.com> * fix lint Signed-off-by: Achal Shah <achals@gmail.com> * undo unintended changes Signed-off-by: Achal Shah <achals@gmail.com> * Fix Signed-off-by: Kevin Zhang <kzhang@tecton.ai> Co-authored-by: Achal Shah <achals@gmail.com> Co-authored-by: Kevin Zhang <kzhang@tecton.ai>
1 parent 47c792c commit fc34981

File tree

10 files changed

+359
-257
lines changed

10 files changed

+359
-257
lines changed

protos/feast/core/DataSource.proto

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ import "feast/core/DataFormat.proto";
2626
import "feast/types/Value.proto";
2727

2828
// Defines a Data Source that can be used source Feature data
29-
// Next available id: 27
29+
// Next available id: 28
3030
message DataSource {
3131
// Field indexes should *not* be reused. Not sure if fields 6-10 were used previously or not,
3232
// but they are going to be reserved for backwards compatibility.
3333
reserved 6 to 10;
3434

3535
// Type of Data Source.
36-
// Next available id: 10
36+
// Next available id: 12
3737
enum SourceType {
3838
INVALID = 0;
3939
BATCH_FILE = 1;
@@ -46,6 +46,7 @@ message DataSource {
4646
REQUEST_SOURCE = 7;
4747
PUSH_SOURCE = 9;
4848
BATCH_TRINO = 10;
49+
BATCH_SPARK = 11;
4950
}
5051

5152
// Unique name of data source within the project
@@ -185,6 +186,21 @@ message DataSource {
185186
string warehouse = 5;
186187
}
187188

189+
// Defines options for DataSource that sources features from a spark table/query
190+
message SparkOptions {
191+
// Table name
192+
string table = 1;
193+
194+
// Spark SQl query that returns the table, this is an alternative to `table`
195+
string query = 2;
196+
197+
// Path from which spark can read the table, this is an alternative to `table`
198+
string path = 3;
199+
200+
// Format of files at `path` (e.g. parquet, avro, etc)
201+
string file_format = 4;
202+
}
203+
188204
// Defines configuration for custom third-party data sources.
189205
message CustomSourceOptions {
190206
// Serialized configuration information for the data source. The implementer of the custom data source is
@@ -218,6 +234,7 @@ message DataSource {
218234
CustomSourceOptions custom_options = 16;
219235
SnowflakeOptions snowflake_options = 19;
220236
PushOptions push_options = 22;
237+
SparkOptions spark_options = 27;
221238
TrinoOptions trino_options = 30;
222239
}
223240
}

protos/feast/core/SavedDataset.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ message SavedDatasetStorage {
5757
DataSource.RedshiftOptions redshift_storage = 6;
5858
DataSource.SnowflakeOptions snowflake_storage = 7;
5959
DataSource.TrinoOptions trino_storage = 8;
60+
DataSource.SparkOptions spark_storage = 9;
6061
}
6162
}
6263

sdk/python/feast/data_source.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ def to_proto(self) -> DataSourceProto.KinesisOptions:
143143
DataSourceProto.SourceType.BATCH_REDSHIFT: "feast.infra.offline_stores.redshift_source.RedshiftSource",
144144
DataSourceProto.SourceType.BATCH_SNOWFLAKE: "feast.infra.offline_stores.snowflake_source.SnowflakeSource",
145145
DataSourceProto.SourceType.BATCH_TRINO: "feast.infra.offline_stores.contrib.trino_offline_store.trino_source.TrinoSource",
146+
DataSourceProto.SourceType.BATCH_SPARK: "feast.infra.offline_stores.contrib.spark_offline_store.spark_source.SparkSource",
146147
DataSourceProto.SourceType.STREAM_KAFKA: "feast.data_source.KafkaSource",
147148
DataSourceProto.SourceType.STREAM_KINESIS: "feast.data_source.KinesisSource",
148149
DataSourceProto.SourceType.REQUEST_SOURCE: "feast.data_source.RequestSource",

0 commit comments

Comments
 (0)