Skip to content

Commit a7f88c5

Browse files
authored
Add custom data sources (#1713)
* Refactor Batch datasources into relevant offline store files Signed-off-by: Achal Shah <achals@gmail.com> * Move batch data sources to corresponding offline stores Signed-off-by: Achal Shah <achals@gmail.com> * change default values to None Signed-off-by: Achal Shah <achals@gmail.com> * implement from_proto for File and Bigquery, and move logic out of data_source.py Signed-off-by: Achal Shah <achals@gmail.com> * Fix lint Signed-off-by: Achal Shah <achals@gmail.com> * Fix lint after rebase Signed-off-by: Achal Shah <achals@gmail.com> * more refactoring Signed-off-by: Achal Shah <achals@gmail.com> * remove prints from test Signed-off-by: Achal Shah <achals@gmail.com> * make format Signed-off-by: Achal Shah <achals@gmail.com> * Remove bad assert Signed-off-by: Achal Shah <achals@gmail.com> * move redshift over as well Signed-off-by: Achal Shah <achals@gmail.com> * Remove print Signed-off-by: Achal Shah <achals@gmail.com> * Remove incorrect logic from the get_data_source_class_from_type Signed-off-by: Achal Shah <achals@gmail.com> * fix import Signed-off-by: Achal Shah <achals@gmail.com> * make format Signed-off-by: Achal Shah <achals@gmail.com>
1 parent c5300cb commit a7f88c5

27 files changed

+759
-692
lines changed

protos/feast/core/DataSource.proto

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ import "feast/core/DataFormat.proto";
2626

2727
// Defines a Data Source that can be used source Feature data
2828
message DataSource {
29+
// Field indexes should *not* be reused. Not sure if fields 6-10 were used previously or not,
30+
// but they are going to be reserved for backwards compatibility.
31+
reserved 6 to 10;
32+
2933
// Type of Data Source.
3034
enum SourceType {
3135
INVALID = 0;
@@ -34,6 +38,7 @@ message DataSource {
3438
STREAM_KAFKA = 3;
3539
STREAM_KINESIS = 4;
3640
BATCH_REDSHIFT = 5;
41+
CUSTOM_SOURCE = 6;
3742
}
3843
SourceType type = 1;
3944

@@ -51,6 +56,10 @@ message DataSource {
5156
// Must specify creation timestamp column name
5257
string created_timestamp_column = 5;
5358

59+
// This is an internal field that is represents the python class for the data source object a proto object represents.
60+
// This should be set by feast, and not by users.
61+
string data_source_class_type = 17;
62+
5463
// Defines options for DataSource that sources features from a file
5564
message FileOptions {
5665
FileFormat file_format = 1;
@@ -111,12 +120,20 @@ message DataSource {
111120
string query = 2;
112121
}
113122

123+
// Defines configuration for custom third-party data sources.
124+
message CustomSourceOptions {
125+
// Serialized configuration information for the data source. The implementer of the custom data source is
126+
// responsible for serializing and deserializing data from bytes
127+
bytes configuration = 1;
128+
}
129+
114130
// DataSource options.
115131
oneof options {
116132
FileOptions file_options = 11;
117133
BigQueryOptions bigquery_options = 12;
118134
KafkaOptions kafka_options = 13;
119135
KinesisOptions kinesis_options = 14;
120136
RedshiftOptions redshift_options = 15;
137+
CustomSourceOptions custom_options = 16;
121138
}
122139
}

sdk/python/feast/.DS_Store

6 KB
Binary file not shown.

sdk/python/feast/__init__.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,12 @@
22

33
from pkg_resources import DistributionNotFound, get_distribution
44

5+
from feast.infra.offline_stores.bigquery import BigQuerySource
6+
from feast.infra.offline_stores.file import FileSource
7+
from feast.infra.offline_stores.redshift import RedshiftSource
8+
59
from .client import Client
6-
from .data_source import (
7-
BigQuerySource,
8-
FileSource,
9-
KafkaSource,
10-
KinesisSource,
11-
RedshiftSource,
12-
SourceType,
13-
)
10+
from .data_source import KafkaSource, KinesisSource, SourceType
1411
from .entity import Entity
1512
from .feature import Feature
1613
from .feature_store import FeatureStore
@@ -32,10 +29,9 @@
3229
pass
3330

3431
__all__ = [
32+
"BigQuerySource",
3533
"Client",
3634
"Entity",
37-
"BigQuerySource",
38-
"FileSource",
3935
"KafkaSource",
4036
"KinesisSource",
4137
"RedshiftSource",
@@ -46,4 +42,5 @@
4642
"RepoConfig",
4743
"SourceType",
4844
"ValueType",
45+
"FileSource",
4946
]

sdk/python/feast/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@
2020
import grpc
2121
import pandas as pd
2222

23+
from feast import BigQuerySource, FileSource
2324
from feast.config import Config
2425
from feast.constants import ConfigOptions as opt
2526
from feast.data_format import ParquetFormat
26-
from feast.data_source import BigQuerySource, FileSource
2727
from feast.entity import Entity
2828
from feast.feature import Feature, FeatureRef, _build_feature_references
2929
from feast.feature_table import FeatureTable

0 commit comments

Comments
 (0)