Skip to content

Commit e19467d

Browse files
author
hao-xu5
committed
add tableformat proto
Signed-off-by: hao-xu5 <hxu44@apple.com>
1 parent 429958a commit e19467d

File tree

8 files changed

+365
-56
lines changed

8 files changed

+365
-56
lines changed

protos/feast/core/DataFormat.proto

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,65 @@ option go_package = "github.com/feast-dev/feast/go/protos/feast/core";
2222
option java_outer_classname = "DataFormatProto";
2323
option java_package = "feast.proto.core";
2424

25+
2526
// Defines the file format encoding the features/entity data in files
2627
message FileFormat {
2728
// Defines options for the Parquet data format
2829
message ParquetFormat {}
2930

30-
// Defines options for delta data format
31-
message DeltaFormat {}
32-
3331
oneof format {
3432
ParquetFormat parquet_format = 1;
33+
// Deprecated: Delta Lake is a table format, not a file format.
34+
// Use TableFormat.DeltaFormat instead for Delta Lake support.
35+
TableFormat.DeltaFormat delta_format = 2 [deprecated = true];
36+
}
37+
}
38+
39+
message TableFormat {
40+
// Defines options for Apache Iceberg table format
41+
message IcebergFormat {
42+
// Optional catalog name for the Iceberg table
43+
string catalog = 1;
44+
45+
// Optional namespace (schema/database) within the catalog
46+
string namespace = 2;
47+
48+
// Additional properties for Iceberg configuration
49+
// Examples: warehouse location, snapshot-id, as-of-timestamp, etc.
50+
map<string, string> properties = 3;
51+
}
52+
53+
// Defines options for Delta Lake table format
54+
message DeltaFormat {
55+
// Optional checkpoint location for Delta transaction logs
56+
string checkpoint_location = 1;
57+
58+
// Additional properties for Delta configuration
59+
// Examples: auto-optimize settings, vacuum settings, etc.
60+
map<string, string> properties = 2;
61+
}
62+
63+
// Defines options for Apache Hudi table format
64+
message HudiFormat {
65+
// Type of Hudi table (COPY_ON_WRITE or MERGE_ON_READ)
66+
string table_type = 1;
67+
68+
// Field(s) that uniquely identify a record
69+
string record_key = 2;
70+
71+
// Field used to determine the latest version of a record
72+
string precombine_field = 3;
73+
74+
// Additional properties for Hudi configuration
75+
// Examples: compaction strategy, indexing options, etc.
76+
map<string, string> properties = 4;
77+
}
78+
79+
// Specifies the table format and format-specific options
80+
oneof format {
81+
IcebergFormat iceberg_format = 1;
3582
DeltaFormat delta_format = 2;
83+
HudiFormat hudi_format = 3;
3684
}
3785
}
3886

protos/feast/core/DataSource.proto

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,8 @@ message DataSource {
232232
// Date Format of date partition column (e.g. %Y-%m-%d)
233233
string date_partition_column_format = 5;
234234

235-
// Table Format (e.g. iceberg, delta, etc)
236-
string table_format = 6;
235+
// Table Format (e.g. iceberg, delta, hudi)
236+
TableFormat table_format = 6;
237237
}
238238

239239
// Defines configuration for custom third-party data sources.

sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark_source.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
)
1616
from feast.repo_config import RepoConfig
1717
from feast.saved_dataset import SavedDatasetStorage
18-
from feast.table_format import TableFormat, table_format_from_dict
18+
from feast.table_format import TableFormat, table_format_from_proto
1919
from feast.type_map import spark_to_feast_value_type
2020
from feast.value_type import ValueType
2121

@@ -372,10 +372,8 @@ def from_proto(cls, spark_options_proto: DataSourceProto.SparkOptions):
372372
"""
373373
# Parse table_format if present
374374
table_format = None
375-
if spark_options_proto.table_format:
376-
table_format = table_format_from_dict(
377-
json.loads(spark_options_proto.table_format)
378-
)
375+
if spark_options_proto.HasField("table_format"):
376+
table_format = table_format_from_proto(spark_options_proto.table_format)
379377

380378
spark_options = cls(
381379
table=spark_options_proto.table,
@@ -394,17 +392,21 @@ def to_proto(self) -> DataSourceProto.SparkOptions:
394392
Returns:
395393
SparkOptionsProto protobuf
396394
"""
395+
table_format_proto = None
396+
if self.table_format:
397+
table_format_proto = self.table_format.to_proto()
398+
397399
spark_options_proto = DataSourceProto.SparkOptions(
398400
table=self.table,
399401
query=self.query,
400402
path=self.path,
401403
file_format=self.file_format,
402404
date_partition_column_format=self.date_partition_column_format,
403-
table_format=json.dumps(self.table_format.to_dict())
404-
if self.table_format
405-
else "",
406405
)
407406

407+
if table_format_proto:
408+
spark_options_proto.table_format.CopyFrom(table_format_proto)
409+
408410
return spark_options_proto
409411

410412

sdk/python/feast/protos/feast/core/DataFormat_pb2.py

Lines changed: 34 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)