// // Copyright 2020 The Feast Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package feast.core; option go_package = "github.com/feast-dev/feast/go/protos/feast/core"; option java_outer_classname = "DataSourceProto"; option java_package = "feast.proto.core"; import "google/protobuf/duration.proto"; import "google/protobuf/timestamp.proto"; import "feast/core/DataFormat.proto"; import "feast/types/Value.proto"; import "feast/core/Feature.proto"; // Defines a Data Source that can be used source Feature data // Next available id: 28 message DataSource { // Field indexes should *not* be reused. Not sure if fields 6-10 were used previously or not, // but they are going to be reserved for backwards compatibility. reserved 6 to 10; // Type of Data Source. // Next available id: 13 enum SourceType { INVALID = 0; BATCH_FILE = 1; BATCH_SNOWFLAKE = 8; BATCH_BIGQUERY = 2; BATCH_REDSHIFT = 5; STREAM_KAFKA = 3; STREAM_KINESIS = 4; CUSTOM_SOURCE = 6; REQUEST_SOURCE = 7; PUSH_SOURCE = 9; BATCH_TRINO = 10; BATCH_SPARK = 11; BATCH_ATHENA = 12; } // Unique name of data source within the project string name = 20; // Name of Feast project that this data source belongs to. string project = 21; string description = 23; map tags = 24; string owner = 25; SourceType type = 1; // Defines mapping between fields in the sourced data // and fields in parent FeatureTable. map field_mapping = 2; // Must specify event timestamp column name string timestamp_field = 3; // (Optional) Specify partition column // useful for file sources string date_partition_column = 4; // Must specify creation timestamp column name string created_timestamp_column = 5; // This is an internal field that is represents the python class for the data source object a proto object represents. // This should be set by feast, and not by users. // The field is used primarily by custom data sources and is mandatory for them to set. Feast may set it for // first party sources as well. string data_source_class_type = 17; // Optional batch source for streaming sources for historical features and materialization. DataSource batch_source = 26; SourceMeta meta = 50; message SourceMeta { google.protobuf.Timestamp earliestEventTimestamp = 1; google.protobuf.Timestamp latestEventTimestamp = 2; google.protobuf.Timestamp created_timestamp = 3; google.protobuf.Timestamp last_updated_timestamp = 4; } // Defines options for DataSource that sources features from a file message FileOptions { FileFormat file_format = 1; // Target URL of file to retrieve and source features from. // s3://path/to/file for AWS S3 storage // gs://path/to/file for GCP GCS storage // file:///path/to/file for local storage string uri = 2; // override AWS S3 storage endpoint with custom S3 endpoint string s3_endpoint_override = 3; } // Defines options for DataSource that sources features from a BigQuery Query message BigQueryOptions { // Full table reference in the form of [project:dataset.table] string table = 1; // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective // entity columns string query = 2; } // Defines options for DataSource that sources features from a Trino Query message TrinoOptions { // Full table reference in the form of [project:dataset.table] string table = 1; // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective // entity columns string query = 2; } // Defines options for DataSource that sources features from Kafka messages. // Each message should be a Protobuf that can be decoded with the generated // Java Protobuf class at the given class path message KafkaOptions { // Comma separated list of Kafka bootstrap servers. Used for feature tables without a defined source host[:port]] string kafka_bootstrap_servers = 1; // Kafka topic to collect feature data from. string topic = 2; // Defines the stream data format encoding feature/entity data in Kafka messages. StreamFormat message_format = 3; // Watermark delay threshold for stream data google.protobuf.Duration watermark_delay_threshold = 4; } // Defines options for DataSource that sources features from Kinesis records. // Each record should be a Protobuf that can be decoded with the generated // Java Protobuf class at the given class path message KinesisOptions { // AWS region of the Kinesis stream string region = 1; // Name of the Kinesis stream to obtain feature data from. string stream_name = 2; // Defines the data format encoding the feature/entity data in Kinesis records. // Kinesis Data Sources support Avro and Proto as data formats. StreamFormat record_format = 3; } // Defines options for DataSource that sources features from a Redshift Query message RedshiftOptions { // Redshift table name string table = 1; // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective // entity columns string query = 2; // Redshift schema name string schema = 3; // Redshift database name string database = 4; } // Defines options for DataSource that sources features from a Athena Query message AthenaOptions { // Athena table name string table = 1; // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective // entity columns string query = 2; // Athena database name string database = 3; // Athena schema name string data_source = 4; } // Defines options for DataSource that sources features from a Snowflake Query message SnowflakeOptions { reserved 5; // Snowflake warehouse name // Snowflake table name string table = 1; // SQL query that returns a table containing feature data. Must contain an event_timestamp column, and respective // entity columns string query = 2; // Snowflake schema name string schema = 3; // Snowflake schema name string database = 4; } // Defines options for DataSource that sources features from a spark table/query message SparkOptions { // Table name string table = 1; // Spark SQl query that returns the table, this is an alternative to `table` string query = 2; // Path from which spark can read the table, this is an alternative to `table` string path = 3; // Format of files at `path` (e.g. parquet, avro, etc) string file_format = 4; // Date Format of date partition column (e.g. %Y-%m-%d) string date_partition_column_format = 5; // Table Format (e.g. iceberg, delta, hudi) TableFormat table_format = 6; } // Defines configuration for custom third-party data sources. message CustomSourceOptions { // Serialized configuration information for the data source. The implementer of the custom data source is // responsible for serializing and deserializing data from bytes bytes configuration = 1; } // Defines options for DataSource that sources features from request data message RequestDataOptions { reserved 1; // Mapping of feature name to type map deprecated_schema = 2; repeated FeatureSpecV2 schema = 3; } // Defines options for DataSource that supports pushing data to it. This allows data to be pushed to // the online store on-demand, such as by stream consumers. message PushOptions { reserved 1; } // DataSource options. oneof options { FileOptions file_options = 11; BigQueryOptions bigquery_options = 12; KafkaOptions kafka_options = 13; KinesisOptions kinesis_options = 14; RedshiftOptions redshift_options = 15; RequestDataOptions request_data_options = 18; CustomSourceOptions custom_options = 16; SnowflakeOptions snowflake_options = 19; PushOptions push_options = 22; SparkOptions spark_options = 27; TrinoOptions trino_options = 30; AthenaOptions athena_options = 35; } } message DataSourceList { repeated DataSource datasources = 1; }