From 83711841f91ab0b81f5b66f9a34542318770f360 Mon Sep 17 00:00:00 2001
From: Achal Shah <achals@gmail.com>
Date: Wed, 17 Nov 2021 23:24:04 -0800
Subject: [PATCH] Remove refs to tensorflow_metadata

Signed-off-by: Achal Shah <achals@gmail.com>
---
 Makefile                                      |   2 -
 .../tensorflow_metadata/proto/v0/path.proto   |  44 --
 .../tensorflow_metadata/proto/v0/schema.proto | 673 ------------------
 .../proto/v0/statistics.proto                 | 427 -----------
 sdk/python/setup.py                           |   1 -
 5 files changed, 1147 deletions(-)
 delete mode 100644 protos/tensorflow_metadata/proto/v0/path.proto
 delete mode 100644 protos/tensorflow_metadata/proto/v0/schema.proto
 delete mode 100644 protos/tensorflow_metadata/proto/v0/statistics.proto

diff --git a/Makefile b/Makefile
index 2daad95ccb..fa51919619 100644
--- a/Makefile
+++ b/Makefile
@@ -48,7 +48,6 @@ package-protos:
 compile-protos-python:
 	@$(foreach dir,$(PROTO_TYPE_SUBDIRS),cd ${ROOT_DIR}/protos; python -m grpc_tools.protoc -I. --grpc_python_out=../sdk/python/feast/protos/ --python_out=../sdk/python/feast/protos/ --mypy_out=../sdk/python/feast/protos/ feast/$(dir)/*.proto;)
 	@$(foreach dir,$(PROTO_TYPE_SUBDIRS),grep -rli 'from feast.$(dir)' sdk/python/feast/protos | xargs -I@ sed -i.bak 's/from feast.$(dir)/from feast.protos.feast.$(dir)/g' @;)
-	cd ${ROOT_DIR}/protos; python -m grpc_tools.protoc -I. --python_out=../sdk/python/ --mypy_out=../sdk/python/ tensorflow_metadata/proto/v0/*.proto
 
 install-python:
 	python -m pip install -e sdk/python -U --use-deprecated=legacy-resolver
@@ -114,7 +113,6 @@ install-go-ci-dependencies:
 	go get -u golang.org/x/lint/golint
 
 compile-protos-go:
-	cd ${ROOT_DIR}/protos; protoc -I/usr/local/include -I. --go_out=plugins=grpc,paths=source_relative:../sdk/go/protos/ tensorflow_metadata/proto/v0/*.proto
 	$(foreach dir,types serving core storage,cd ${ROOT_DIR}/protos; protoc -I/usr/local/include -I. --go_out=plugins=grpc,paths=source_relative:../sdk/go/protos feast/$(dir)/*.proto;)
 
 test-go:
diff --git a/protos/tensorflow_metadata/proto/v0/path.proto b/protos/tensorflow_metadata/proto/v0/path.proto
deleted file mode 100644
index 3a4e41bad9..0000000000
--- a/protos/tensorflow_metadata/proto/v0/path.proto
+++ /dev/null
@@ -1,44 +0,0 @@
-// Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-syntax = "proto2";
-option cc_enable_arenas = true;
-
-package tensorflow.metadata.v0;
-
-option java_package = "org.tensorflow.metadata.v0";
-option java_multiple_files = true;
-option go_package = "github.com/feast-dev/feast/sdk/go/protos/tensorflow_metadata/proto/v0";
-
-// A path is a more general substitute for the name of a field or feature that
-// can be used for flat examples as well as structured data. For example, if
-// we had data in a protocol buffer:
-// message Person {
-//   int age = 1;
-//   optional string gender = 2;
-//   repeated Person parent = 3;
-// }
-// Thus, here the path {step:["parent", "age"]} in statistics would refer to the
-// age of a parent, and {step:["parent", "parent", "age"]} would refer to the
-// age of a grandparent. This allows us to distinguish between the statistics
-// of parents' ages and grandparents' ages. In general, repeated messages are
-// to be preferred to linked lists of arbitrary length.
-// For SequenceExample, if we have a feature list "foo", this is represented
-// by {step:["##SEQUENCE##", "foo"]}.
-message Path {
-  // Any string is a valid step.
-  // However, whenever possible have a step be [A-Za-z0-9_]+.
-  repeated string step = 1;
-}
diff --git a/protos/tensorflow_metadata/proto/v0/schema.proto b/protos/tensorflow_metadata/proto/v0/schema.proto
deleted file mode 100644
index 00005ee913..0000000000
--- a/protos/tensorflow_metadata/proto/v0/schema.proto
+++ /dev/null
@@ -1,673 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-syntax = "proto2";
-
-package tensorflow.metadata.v0;
-
-import "google/protobuf/any.proto";
-import "tensorflow_metadata/proto/v0/path.proto";
-
-option cc_enable_arenas = true;
-option java_package = "org.tensorflow.metadata.v0";
-option java_multiple_files = true;
-option go_package = "github.com/feast-dev/feast/sdk/go/protos/tensorflow_metadata/proto/v0";
-
-// LifecycleStage. Only UNKNOWN_STAGE, BETA, and PRODUCTION features are
-// actually validated.
-// PLANNED, ALPHA, and DEBUG are treated as DEPRECATED.
-enum LifecycleStage {
-  UNKNOWN_STAGE = 0;  // Unknown stage.
-  PLANNED = 1;        // Planned feature, may not be created yet.
-  ALPHA = 2;          // Prototype feature, not used in experiments yet.
-  BETA = 3;           // Used in user-facing experiments.
-  PRODUCTION = 4;     // Used in a significant fraction of user traffic.
-  DEPRECATED = 5;     // No longer supported: do not use in new models.
-  DEBUG_ONLY = 6;     // Only exists for debugging purposes.
-}
-
-//
-// Message to represent schema information.
-// NextID: 14
-message Schema {
-  // Features described in this schema.
-  repeated Feature feature = 1;
-
-  // Sparse features described in this schema.
-  repeated SparseFeature sparse_feature = 6;
-
-  // Weighted features described in this schema.
-  repeated WeightedFeature weighted_feature = 12;
-
-  // Use StructDomain instead.
-  // Sequences described in this schema. A sequence may be described in terms of
-  // several features. Any features appearing within a sequence must *not* be
-  // declared as top-level features in <feature>.
-// GOOGLE-LEGACY   repeated Sequence sequence = 2;  
-
-  // declared as top-level features in <feature>.
-  // String domains referenced in the features.
-  repeated StringDomain string_domain = 4;
-
-  // top level float domains that can be reused by features
-  repeated FloatDomain float_domain = 9;
-
-  // top level int domains that can be reused by features
-  repeated IntDomain int_domain = 10;
-
-  // Default environments for each feature.
-  // An environment represents both a type of location (e.g. a server or phone)
-  // and a time (e.g. right before model X is run). In the standard scenario,
-  // 99% of the features should be in the default environments TRAINING,
-  // SERVING, and the LABEL (or labels) AND WEIGHT is only available at TRAINING
-  // (not at serving).
-  // Other possible variations:
-  // 1. There may be TRAINING_MOBILE, SERVING_MOBILE, TRAINING_SERVICE,
-  //    and SERVING_SERVICE.
-  // 2. If one is ensembling three models, where the predictions of the first
-  //    three models are available for the ensemble model, there may be
-  //    TRAINING, SERVING_INITIAL, SERVING_ENSEMBLE.
-  // See FeatureProto::not_in_environment and FeatureProto::in_environment.
-  repeated string default_environment = 5;
-
-  /* BEGIN GOOGLE-LEGACY
-  // TODO(b/73109633): Change default to false, before removing this field.
-  optional bool generate_legacy_feature_spec = 7 [default = true];
-  END GOOGLE-LEGACY */
-
-  // Additional information about the schema as a whole. Features may also
-  // be annotated individually.
-  optional Annotation annotation = 8;
-
-  // Dataset-level constraints. This is currently used for specifying
-  // information about changes in num_examples.
-  optional DatasetConstraints dataset_constraints = 11;
-
-  // TensorRepresentation groups. The keys are the names of the groups.
-  // Key "" (empty string) denotes the "default" group, which is what should
-  // be used when a group name is not provided.
-  // See the documentation at TensorRepresentationGroup for more info.
-  // Under development. DO NOT USE.
-  map<string, TensorRepresentationGroup> tensor_representation_group = 13;
-}
-
-// Describes schema-level information about a specific feature.
-// NextID: 31
-message Feature {
-  // The name of the feature.
-  optional string name = 1;  // required
-
-  // This field is no longer supported. Instead, use:
-  // lifecycle_stage: DEPRECATED
-  // TODO(b/111450258): remove this.
-  optional bool deprecated = 2 [deprecated = true];
-
-  // Comment field for a human readable description of the field.
-  // TODO(b/123518108): remove this.
-// GOOGLE-LEGACY   optional string comment = 3 [deprecated = true];  
-
-  oneof presence_constraints {
-    // Constraints on the presence of this feature in the examples.
-    FeaturePresence presence = 14;
-    // Only used in the context of a "group" context, e.g., inside a sequence.
-    FeaturePresenceWithinGroup group_presence = 17;
-  }
-
-  // The shape of the feature which governs the number of values that appear in
-  // each example.
-  oneof shape_type {
-    // The feature has a fixed shape corresponding to a multi-dimensional
-    // tensor.
-    FixedShape shape = 23;
-    // The feature doesn't have a well defined shape. All we know are limits on
-    // the minimum and maximum number of values.
-    ValueCount value_count = 5;
-  }
-
-  // Physical type of the feature's values.
-  // Note that you can have:
-  // type: BYTES
-  // int_domain: {
-  //   min: 0
-  //   max: 3
-  // }
-  // This would be a field that is syntactically BYTES (i.e. strings), but
-  // semantically an int, i.e. it would be "0", "1", "2", or "3".
-  optional FeatureType type = 6;
-
-  // Domain for the values of the feature.
-  oneof domain_info {
-    // Reference to a domain defined at the schema level.
-    string domain = 7;
-    // Inline definitions of domains.
-    IntDomain int_domain = 9;
-    FloatDomain float_domain = 10;
-    StringDomain string_domain = 11;
-    BoolDomain bool_domain = 13;
-    StructDomain struct_domain = 29;
-    // Supported semantic domains.
-    NaturalLanguageDomain natural_language_domain = 24;
-    ImageDomain image_domain = 25;
-    MIDDomain mid_domain = 26;
-    URLDomain url_domain = 27;
-    TimeDomain time_domain = 28;
-    TimeOfDayDomain time_of_day_domain = 30;
-  }
-
-  // Constraints on the distribution of the feature values.
-  // Currently only supported for StringDomains.
-  // TODO(b/69473628): Extend functionality to other domain types.
-  optional DistributionConstraints distribution_constraints = 15;
-
-  // Additional information about the feature for documentation purpose.
-  optional Annotation annotation = 16;
-
-  // Tests comparing the distribution to the associated serving data.
-  optional FeatureComparator skew_comparator = 18;
-
-  // Tests comparing the distribution between two consecutive spans (e.g. days).
-  optional FeatureComparator drift_comparator = 21;
-
-  // List of environments this feature is present in.
-  // Should be disjoint from not_in_environment.
-  // This feature is in environment "foo" if:
-  // ("foo" is in in_environment or default_environments) AND
-  // "foo" is not in not_in_environment.
-  // See Schema::default_environments.
-  repeated string in_environment = 20;
-
-  // List of environments this feature is not present in.
-  // Should be disjoint from of in_environment.
-  // See Schema::default_environments and in_environment.
-  repeated string not_in_environment = 19;
-
-  // The lifecycle stage of a feature. It can also apply to its descendants.
-  // i.e., if a struct is DEPRECATED, its children are implicitly deprecated.
-  optional LifecycleStage lifecycle_stage = 22;
-}
-
-// Additional information about the schema or about a feature.
-message Annotation {
-  // Tags can be used to mark features. For example, tag on user_age feature can
-  // be `user_feature`, tag on user_country feature can be `location_feature`,
-  // `user_feature`.
-  repeated string tag = 1;
-  // Free-text comments. This can be used as a description of the feature,
-  // developer notes etc.
-  repeated string comment = 2;
-  // Application-specific metadata may be attached here.
-  repeated .google.protobuf.Any extra_metadata = 3;
-}
-
-// Checks that the ratio of the current value to the previous value is not below
-// the min_fraction_threshold or above the max_fraction_threshold. That is,
-// previous value * min_fraction_threshold <= current value <=
-// previous value * max_fraction_threshold.
-// To specify that the value cannot change, set both min_fraction_threshold and
-// max_fraction_threshold to 1.0.
-message NumericValueComparator {
-  optional double min_fraction_threshold = 1;
-  optional double max_fraction_threshold = 2;
-}
-
-// Constraints on the entire dataset.
-message DatasetConstraints {
-  // Tests differences in number of examples between the current data and the
-  // previous span.
-  optional NumericValueComparator num_examples_drift_comparator = 1;
-  // Tests comparisions in number of examples between the current data and the
-  // previous version of that data.
-  optional NumericValueComparator num_examples_version_comparator = 2;
-  // Minimum number of examples in the dataset.
-  optional int64 min_examples_count = 3;
-}
-
-// Specifies a fixed shape for the feature's values. The immediate implication
-// is that each feature has a fixed number of values. Moreover, these values
-// can be parsed in a multi-dimensional tensor using the specified axis sizes.
-// The FixedShape defines a lexicographical ordering of the data. For instance,
-// if there is a FixedShape {
-//   dim {size:3} dim {size:2}
-// }
-// then tensor[0][0]=field[0]
-// then tensor[0][1]=field[1]
-// then tensor[1][0]=field[2]
-// then tensor[1][1]=field[3]
-// then tensor[2][0]=field[4]
-// then tensor[2][1]=field[5]
-//
-// The FixedShape message is identical with the TensorFlow TensorShape proto
-// message.
-message FixedShape {
-  // The dimensions that define the shape. The total number of values in each
-  // example is the product of sizes of each dimension.
-  repeated Dim dim = 2;
-
-  // An axis in a multi-dimensional feature representation.
-  message Dim {
-    optional int64 size = 1;
-
-    // Optional name of the tensor dimension.
-    optional string name = 2;
-  }
-}
-
-// Limits on maximum and minimum number of values in a
-// single example (when the feature is present). Use this when the minimum
-// value count can be different than the maximum value count. Otherwise prefer
-// FixedShape.
-message ValueCount {
-  optional int64 min = 1;
-  optional int64 max = 2;
-}
-
-/* BEGIN GOOGLE-LEGACY
-// Constraint on the number of elements in a sequence.
-message LengthConstraint {
-  optional int64 min = 1;
-  optional int64 max = 2;
-}
-
-// A sequence is a logical feature that comprises several "raw" features that
-// encode values at different "steps" within the sequence.
-// TODO(b/110490010): Delete this. This is a special case of StructDomain.
-message Sequence {
-  // An optional name for this sequence. Used mostly for debugging and
-  // presentation.
-  optional string name = 1;
-
-  // Features that comprise the sequence. These features are "zipped" together
-  // to form the values for the sequence at different steps.
-  // - Use group_presence within each feature to encode presence constraints
-  //   within the sequence.
-  // - If all features have the same value-count constraints then
-  //   declare this once using the shape_constraint below.
-  repeated Feature feature = 2;
-
-  // Constraints on the presence of the sequence across all examples in the
-  // dataset. The sequence is assumed to be present if at least one of its
-  // features is present.
-  optional FeaturePresence presence = 3;
-
-  // Shape constraints that apply on all the features that comprise the
-  // sequence. If this is set then the value_count in 'feature' is
-  // ignored.
-  // TODO(martinz): delete: there is no reason to believe the shape of the
-  // fields in a sequence will be the same. Use the fields in Feature instead.
-  oneof shape_constraint {
-    ValueCount value_count = 4;
-    FixedShape fixed_shape = 5;
-  }
-
-  // Constraint on the number of elements in a sequence.
-  optional LengthConstraint length_constraint = 6;
-}
-END GOOGLE-LEGACY */
-
-// Represents a weighted feature that is encoded as a combination of raw base
-// features. The `weight_feature` should be a float feature with identical
-// shape as the `feature`. This is useful for representing weights associated
-// with categorical tokens (e.g. a TFIDF weight associated with each token).
-// TODO(b/142122960): Handle WeightedCategorical end to end in TFX (validation,
-// TFX Unit Testing, etc)
-message WeightedFeature {
-  // Name for the weighted feature. This should not clash with other features in
-  // the same schema.
-  optional string name = 1;  // required
-  // Path of a base feature to be weighted. Required.
-  optional Path feature = 2;
-  // Path of weight feature to associate with the base feature. Must be same
-  // shape as feature. Required.
-  optional Path weight_feature = 3;
-  // The lifecycle_stage determines where a feature is expected to be used,
-  // and therefore how important issues with it are.
-  optional LifecycleStage lifecycle_stage = 4;
-}
-
-// A sparse feature represents a sparse tensor that is encoded with a
-// combination of raw features, namely index features and a value feature. Each
-// index feature defines a list of indices in a different dimension.
-message SparseFeature {
-  reserved 11;
-  // Name for the sparse feature. This should not clash with other features in
-  // the same schema.
-  optional string name = 1;  // required
-
-  // This field is no longer supported. Instead, use:
-  // lifecycle_stage: DEPRECATED
-  // TODO(b/111450258): remove this.
-  optional bool deprecated = 2 [deprecated = true];
-
-  // The lifecycle_stage determines where a feature is expected to be used,
-  // and therefore how important issues with it are.
-  optional LifecycleStage lifecycle_stage = 7;
-
-  // Comment field for a human readable description of the field.
-  // TODO(martinz): delete, convert to annotation.
-// GOOGLE-LEGACY   optional string comment = 3 [deprecated = true];  
-
-  // Constraints on the presence of this feature in examples.
-  // Deprecated, this is inferred by the referred features.
-  optional FeaturePresence presence = 4 [deprecated = true];
-
-  // Shape of the sparse tensor that this SparseFeature represents.
-  // Currently not supported.
-  // TODO(b/109669962): Consider deriving this from the referred features.
-  optional FixedShape dense_shape = 5;
-
-  // Features that represent indexes. Should be integers >= 0.
-  repeated IndexFeature index_feature = 6;  // at least one
-  message IndexFeature {
-    // Name of the index-feature. This should be a reference to an existing
-    // feature in the schema.
-    optional string name = 1;
-  }
-
-  // If true then the index values are already sorted lexicographically.
-  optional bool is_sorted = 8;
-
-  optional ValueFeature value_feature = 9;  // required
-  message ValueFeature {
-    // Name of the value-feature. This should be a reference to an existing
-    // feature in the schema.
-    optional string name = 1;
-  }
-
-  // Type of value feature.
-  // Deprecated, this is inferred by the referred features.
-  optional FeatureType type = 10 [deprecated = true];
-}
-
-// Models constraints on the distribution of a feature's values.
-// TODO(martinz): replace min_domain_mass with max_off_domain (but slowly).
-message DistributionConstraints {
-  // The minimum fraction (in [0,1]) of values across all examples that
-  // should come from the feature's domain, e.g.:
-  //   1.0  => All values must come from the domain.
-  //    .9  => At least 90% of the values must come from the domain.
-  optional double min_domain_mass = 1 [default = 1.0];
-}
-
-// Encodes information for domains of integer values.
-// Note that FeatureType could be either INT or BYTES.
-message IntDomain {
-  // Id of the domain. Required if the domain is defined at the schema level. If
-  // so, then the name must be unique within the schema.
-  optional string name = 1;
-
-  // Min and max values for the domain.
-  optional int64 min = 3;
-  optional int64 max = 4;
-
-  // If true then the domain encodes categorical values (i.e., ids) rather than
-  // ordinal values.
-  optional bool is_categorical = 5;
-}
-
-// Encodes information for domains of float values.
-// Note that FeatureType could be either INT or BYTES.
-message FloatDomain {
-  // Id of the domain. Required if the domain is defined at the schema level. If
-  // so, then the name must be unique within the schema.
-  optional string name = 1;
-
-  // Min and max values of the domain.
-  optional float min = 3;
-  optional float max = 4;
-}
-
-// Domain for a recursive struct.
-// NOTE: If a feature with a StructDomain is deprecated, then all the
-// child features (features and sparse_features of the StructDomain) are also
-// considered to be deprecated.  Similarly child features can only be in
-// environments of the parent feature.
-message StructDomain {
-  repeated Feature feature = 1;
-
-  repeated SparseFeature sparse_feature = 2;
-}
-
-// Encodes information for domains of string values.
-message StringDomain {
-  // Id of the domain. Required if the domain is defined at the schema level. If
-  // so, then the name must be unique within the schema.
-  optional string name = 1;
-
-  // The values appearing in the domain.
-  repeated string value = 2;
-}
-
-// Encodes information about the domain of a boolean attribute that encodes its
-// TRUE/FALSE values as strings, or 0=false, 1=true.
-// Note that FeatureType could be either INT or BYTES.
-message BoolDomain {
-  // Id of the domain. Required if the domain is defined at the schema level. If
-  // so, then the name must be unique within the schema.
-  optional string name = 1;
-
-  // Strings values for TRUE/FALSE.
-  optional string true_value = 2;
-  optional string false_value = 3;
-}
-
-// BEGIN SEMANTIC-TYPES-PROTOS
-// Semantic domains are specialized feature domains. For example a string
-// Feature might represent a Time of a specific format.
-// Semantic domains are defined as protocol buffers to allow further sub-types /
-// specialization, e.g: NaturalLanguageDomain can provide information on the
-// language of the text.
-
-// Natural language text.
-message NaturalLanguageDomain {}
-
-// Image data.
-message ImageDomain {}
-
-// Knowledge graph ID, see: https://www.wikidata.org/wiki/Property:P646
-message MIDDomain {}
-
-// A URL, see: https://en.wikipedia.org/wiki/URL
-message URLDomain {}
-
-// Time or date representation.
-message TimeDomain {
-  enum IntegerTimeFormat {
-    FORMAT_UNKNOWN = 0;
-    UNIX_DAYS = 5;  // Number of days since 1970-01-01.
-    UNIX_SECONDS = 1;
-    UNIX_MILLISECONDS = 2;
-    UNIX_MICROSECONDS = 3;
-    UNIX_NANOSECONDS = 4;
-  }
-
-  oneof format {
-    // Expected format that contains a combination of regular characters and
-    // special format specifiers. Format specifiers are a subset of the
-    // strptime standard.
-    string string_format = 1;
-
-    // Expected format of integer times.
-    IntegerTimeFormat integer_format = 2;
-  }
-}
-
-// Time of day, without a particular date.
-message TimeOfDayDomain {
-  enum IntegerTimeOfDayFormat {
-    FORMAT_UNKNOWN = 0;
-    // Time values, containing hour/minute/second/nanos, encoded into 8-byte
-    // bit fields following the ZetaSQL convention:
-    //        6         5         4         3         2         1
-    // MSB 3210987654321098765432109876543210987654321098765432109876543210 LSB
-    //                      | H ||  M ||  S ||---------- nanos -----------|
-    PACKED_64_NANOS = 1;
-  }
-
-  oneof format {
-    // Expected format that contains a combination of regular characters and
-    // special format specifiers. Format specifiers are a subset of the
-    // strptime standard.
-    string string_format = 1;
-
-    // Expected format of integer times.
-    IntegerTimeOfDayFormat integer_format = 2;
-  }
-}
-// END SEMANTIC-TYPES-PROTOS
-
-// Describes the physical representation of a feature.
-// It may be different than the logical representation, which
-// is represented as a Domain.
-enum FeatureType {
-  TYPE_UNKNOWN = 0;
-  BYTES = 1;
-  INT = 2;
-  FLOAT = 3;
-  STRUCT = 4;
-}
-
-// Describes constraints on the presence of the feature in the data.
-message FeaturePresence {
-  // Minimum fraction of examples that have this feature.
-  optional double min_fraction = 1;
-  // Minimum number of examples that have this feature.
-  optional int64 min_count = 2;
-}
-
-// Records constraints on the presence of a feature inside a "group" context
-// (e.g., .presence inside a group of features that define a sequence).
-message FeaturePresenceWithinGroup {
-  optional bool required = 1;
-}
-
-// Checks that the L-infinity norm is below a certain threshold between the
-// two discrete distributions. Since this is applied to a FeatureNameStatistics,
-// it only considers the top k.
-// L_infty(p,q) = max_i |p_i-q_i|
-message InfinityNorm {
-  // The InfinityNorm is in the interval [0.0, 1.0] so sensible bounds should
-  // be in the interval [0.0, 1.0).
-  optional double threshold = 1;
-}
-
-message FeatureComparator {
-  optional InfinityNorm infinity_norm = 1;
-}
-
-// A TensorRepresentation captures the intent for converting columns in a
-// dataset to TensorFlow Tensors (or more generally, tf.CompositeTensors).
-// Note that one tf.CompositeTensor may consist of data from multiple columns,
-// for example, a N-dimensional tf.SparseTensor may need N + 1 columns to
-// provide the sparse indices and values.
-// Note that the "column name" that a TensorRepresentation needs is a
-// string, not a Path -- it means that the column name identifies a top-level
-// Feature in the schema (i.e. you cannot specify a Feature nested in a STRUCT
-// Feature).
-message TensorRepresentation {
-  message DefaultValue {
-    oneof kind {
-      double float_value = 1;
-      // Note that the data column might be of a shorter integral type. It's the
-      // user's responsitiblity to make sure the default value fits that type.
-      int64 int_value = 2;
-      bytes bytes_value = 3;
-      // uint_value should only be used if the default value can't fit in a
-      // int64 (`int_value`).
-      uint64 uint_value = 4;
-    }
-  }
-
-  // A tf.Tensor
-  message DenseTensor {
-    // Identifies the column in the dataset that provides the values of this
-    // Tensor.
-    optional string column_name = 1;
-    // The shape of each row of the data (i.e. does not include the batch
-    // dimension)
-    optional FixedShape shape = 2;
-    // If this column is missing values in a row, the default_value will be
-    // used to fill that row.
-    optional DefaultValue default_value = 3;
-  }
-
-  // A ragged tf.SparseTensor that models nested lists.
-  message VarLenSparseTensor {
-    // Identifies the column in the dataset that should be converted to the
-    // VarLenSparseTensor.
-    optional string column_name = 1;
-  }
-
-  // A tf.SparseTensor whose indices and values come from separate data columns.
-  // This will replace Schema.sparse_feature eventually.
-  // The index columns must be of INT type, and all the columns must co-occur
-  // and have the same valency at the same row.
-  message SparseTensor {
-    // The dense shape of the resulting SparseTensor (does not include the batch
-    // dimension).
-    optional FixedShape dense_shape = 1;
-    // The columns constitute the coordinates of the values.
-    // indices_column[i][j] contains the coordinate of the i-th dimension of the
-    // j-th value.
-    repeated string index_column_names = 2;
-    // The column that contains the values.
-    optional string value_column_name = 3;
-  }
-
-  oneof kind {
-    DenseTensor dense_tensor = 1;
-    VarLenSparseTensor varlen_sparse_tensor = 2;
-    SparseTensor sparse_tensor = 3;
-  }
-}
-
-// A TensorRepresentationGroup is a collection of TensorRepresentations with
-// names. These names may serve as identifiers when converting the dataset
-// to a collection of Tensors or tf.CompositeTensors.
-// For example, given the following group:
-// {
-//   key: "dense_tensor"
-//   tensor_representation {
-//     dense_tensor {
-//       column_name: "univalent_feature"
-//       shape {
-//         dim {
-//           size: 1
-//         }
-//       }
-//       default_value {
-//         float_value: 0
-//       }
-//     }
-//   }
-// }
-// {
-//   key: "varlen_sparse_tensor"
-//   tensor_representation {
-//     varlen_sparse_tensor {
-//       column_name: "multivalent_feature"
-//     }
-//   }
-// }
-//
-// Then the schema is expected to have feature "univalent_feature" and
-// "multivalent_feature", and when a batch of data is converted to Tensors using
-// this TensorRepresentationGroup, the result may be the following dict:
-// {
-//   "dense_tensor": tf.Tensor(...),
-//   "varlen_sparse_tensor": tf.SparseTensor(...),
-// }
-message TensorRepresentationGroup {
-  map<string, TensorRepresentation> tensor_representation = 1;
-}
diff --git a/protos/tensorflow_metadata/proto/v0/statistics.proto b/protos/tensorflow_metadata/proto/v0/statistics.proto
deleted file mode 100644
index 3123dad874..0000000000
--- a/protos/tensorflow_metadata/proto/v0/statistics.proto
+++ /dev/null
@@ -1,427 +0,0 @@
-// Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-
-// Definitions for aggregated feature statistics for datasets.
-// TODO(b/80075690): make a Javascript build rule for this.
-// TODO(b/80075691): migrate Facets to use this.
-syntax = "proto3";
-option cc_enable_arenas = true;
-
-package tensorflow.metadata.v0;
-
-option java_package = "org.tensorflow.metadata.v0";
-option java_multiple_files = true;
-option go_package = "github.com/feast-dev/feast/sdk/go/protos/tensorflow_metadata/proto/v0";
-
-import "tensorflow_metadata/proto/v0/path.proto";
-
-// Copied from Facets feature_statistics.proto
-// Must be kept binary-compatible with the original, until all usages
-// are updated to use this version, or we write a proto-to-proto converter.
-
-// A list of features statistics for different datasets. If you wish to compare
-// different datasets using this list, then the DatasetFeatureStatistics
-// entries should all contain the same list of features.
-message DatasetFeatureStatisticsList {
-  repeated DatasetFeatureStatistics datasets = 1;
-}
-
-// The feature statistics for a single dataset.
-message DatasetFeatureStatistics {
-  // The name of the dataset.
-  string name = 1;
-  // The number of examples in the dataset.
-  uint64 num_examples = 2;
-
-  // Only valid if the weight feature was specified.
-  // Treats a missing weighted feature as zero.
-  double weighted_num_examples = 4;
-  // The feature statistics for the dataset.
-  repeated FeatureNameStatistics features = 3;
-
-  // Cross feature statistics for the dataset.
-  repeated CrossFeatureStatistics cross_features = 5;
-}
-
-message CrossFeatureStatistics {
-  // The path of feature x.
-  Path path_x = 1;
-  // The path of feature y.
-  Path path_y = 2;
-
-  // Number of occurrences of this feature cross in the data. If any of
-  // the features in the cross is missing, the example is ignored.
-  uint64 count = 3;
-
-  oneof cross_stats {
-    NumericCrossStatistics num_cross_stats = 4;
-    CategoricalCrossStatistics categorical_cross_stats = 5;
-  }
-}
-
-message NumericCrossStatistics {
-  // Pearson product-moment correlation coefficient.
-  float correlation = 1;
-  // Standard covariance. E[(X-E[X])*(Y-E[Y])]
-  float covariance = 2;
-}
-
-message CategoricalCrossStatistics {
-   LiftStatistics lift = 1;
-}
-
-message LiftStatistics {
-  // Lift information for each value of path_y. Lift is defined for each pair of
-  // values (x,y) as P(path_y=y|path_x=x)/P(path_y=y).
-  repeated LiftSeries lift_series = 1;
-  // Weighted lift information for each value of path_y. Weighted lift is
-  // defined for each pair of values (x,y) as P(path_y=y|path_x=x)/P(path_y=y)
-  // where probabilities are computed over weighted example space.
-  repeated LiftSeries weighted_lift_series = 2;
-}
-
-// Container for lift information for a specific y-value.
-message LiftSeries {
-  // A bucket for referring to binned numeric features.
-  message Bucket {
-    // The low value of the bucket, inclusive.
-    double low_value = 1;
-    // The high value of the bucket, exclusive (unless the high_value is
-    // positive infinity).
-    double high_value = 2;
-  }
-
-  // The particular value of path_y corresponding to this LiftSeries. Each
-  // element in lift_values corresponds to the lift a different x_value and
-  // this specific y_value.
-  oneof y_value {
-    int32 y_int = 1;
-    string y_string = 2;
-    Bucket y_bucket = 3;
-  }
-
-  // The number of examples in which y_value appears.
-  oneof y_count_value {
-    uint64 y_count = 4;
-    double weighted_y_count = 5;
-  }
-
-  // A container for lift information about a specific value of path_x.
-  message LiftValue {
-    oneof x_value {
-      int32 x_int = 1;
-      string x_string = 2;
-    }
-    // P(path_y=y|path_x=x) / P(path_y=y) for x_value and the enclosing y_value.
-    // In terms of concrete fields, this number represents:
-    // (x_and_y_count / x_count) / (y_count / num_examples)
-    double lift = 3;
-    // The number of examples in which x_value appears.
-    oneof x_count_value {
-      uint64 x_count = 4;
-      double weighted_x_count = 5;
-    }
-    // The number of examples in which x_value appears and y_value appears.
-    oneof x_and_y_count_value {
-      uint64 x_and_y_count = 6;
-      double weighted_x_and_y_count = 7;
-    }
-  }
-
-  // The lifts for a each path_x value and this y_value.
-  repeated LiftValue lift_values = 6;
-}
-
-// The complete set of statistics for a given feature name for a dataset.
-message FeatureNameStatistics {
-  // The types supported by the feature statistics. When aggregating
-  // tf.Examples, if the bytelist contains a string, it is recommended to encode
-  // it here as STRING instead of BYTES in order to calculate string-specific
-  // statistical measures.
-  enum Type {
-    INT = 0;
-    FLOAT = 1;
-    STRING = 2;
-    BYTES = 3;
-    STRUCT = 4;
-  }
-
-  // One can identify a field either by the name (for simple fields), or by
-  // a path (for structured fields). Note that:
-  // name: "foo"
-  // is equivalent to:
-  // path: {step:"foo"}
-  // Note: this oneof must be consistently either name or path across all
-  // FeatureNameStatistics in one DatasetFeatureStatistics.
-  oneof field_id {
-    // The feature name
-    string name = 1;
-
-    // The path of the feature.
-    Path path = 8;
-  }
-
-  // The data type of the feature
-  Type type = 2;
-
-  // The statistics of the values of the feature.
-  oneof stats {
-    NumericStatistics num_stats = 3;
-    StringStatistics string_stats = 4;
-    BytesStatistics bytes_stats = 5;
-    StructStatistics struct_stats = 7;
-  }
-
-  // Any custom statistics can be stored in this list.
-  repeated CustomStatistic custom_stats = 6;
-}
-
-// Common weighted statistics for all feature types. Statistics counting number
-// of values (i.e., avg_num_values and tot_num_values) include NaNs.
-// If the weighted column is missing, then this counts as a weight of 1
-// for that example.
-message WeightedCommonStatistics {
-  // Weighted number of examples not missing.
-  double num_non_missing = 1;
-  // Weighted number of examples missing.
-  // Note that if the weighted column is zero, this does not count
-  // as missing.
-  double num_missing = 2;
-  // average number of values, weighted by the number of examples.
-  double avg_num_values = 3;
-  // tot_num_values = avg_num_values * num_non_missing.
-  // This is calculated directly, so should have less numerical error.
-  double tot_num_values = 4;
-}
-
-// Stores the name and value of any custom statistic. The value can be a string,
-// double, or histogram.
-message CustomStatistic {
-  string name = 1;
-  oneof val {
-    double num = 2;
-    string str = 3;
-    Histogram histogram = 4;
-    RankHistogram rank_histogram = 5;
-  }
-}
-
-// Statistics for a numeric feature in a dataset.
-message NumericStatistics {
-  CommonStatistics common_stats = 1;
-  // The mean of the values
-  double mean = 2;
-  // The standard deviation of the values
-  double std_dev = 3;
-  // The number of values that equal 0
-  uint64 num_zeros = 4;
-  // The minimum value
-  double min = 5;
-  // The median value
-  double median = 6;
-  // The maximum value
-  double max = 7;
-  // The histogram(s) of the feature values.
-  repeated Histogram histograms = 8;
-
-  // Weighted statistics for the feature, if the values have weights.
-  WeightedNumericStatistics weighted_numeric_stats = 9;
-}
-
-// Statistics for a string feature in a dataset.
-message StringStatistics {
-  CommonStatistics common_stats = 1;
-  // The number of unique values
-  uint64 unique = 2;
-
-  message FreqAndValue {
-    string value = 2;
-
-    // The number of times the value occurs. Stored as a double to be able to
-    // handle weighted features.
-    double frequency = 3;
-
-    // Deleted fields.
-    reserved 1;
-  }
-  // A sorted list of the most-frequent values and their frequencies, with
-  // the most-frequent being first.
-  repeated FreqAndValue top_values = 3;
-
-  // The average length of the values
-  float avg_length = 4;
-
-  // The rank histogram for the values of the feature.
-  // The rank is used to measure of how commonly the value is found in the
-  // dataset. The most common value would have a rank of 1, with the second-most
-  // common value having a rank of 2, and so on.
-  RankHistogram rank_histogram = 5;
-
-  // Weighted statistics for the feature, if the values have weights.
-  WeightedStringStatistics weighted_string_stats = 6;
-
-  // A vocabulary file, used for vocabularies too large to store in the proto
-  // itself.  Note that the file may be relative to some context-dependent
-  // directory.  E.g. in TFX the feature statistics will live in a PPP and
-  // vocabulary file names will be relative to this PPP.
-  string vocabulary_file = 7;
-}
-
-// Statistics for a weighted numeric feature in a dataset.
-message WeightedNumericStatistics {
-  // The weighted mean of the values
-  double mean = 1;
-  // The weighted standard deviation of the values
-  double std_dev = 2;
-  // The weighted median of the values
-  double median = 3;
-
-  // The histogram(s) of the weighted feature values.
-  repeated Histogram histograms = 4;
-}
-
-// Statistics for a weighted string feature in a dataset.
-message WeightedStringStatistics {
-  // A sorted list of the most-frequent values and their weighted frequencies,
-  // with the most-frequent being first.
-  repeated StringStatistics.FreqAndValue top_values = 1;
-
-  // The rank histogram for the weighted values of the feature.
-  RankHistogram rank_histogram = 2;
-}
-
-// Statistics for a bytes feature in a dataset.
-message BytesStatistics {
-  CommonStatistics common_stats = 1;
-  // The number of unique values
-  uint64 unique = 2;
-
-  // The average number of bytes in a value
-  float avg_num_bytes = 3;
-  // The minimum number of bytes in a value
-  float min_num_bytes = 4;
-  // The maximum number of bytes in a value
-  float max_num_bytes = 5;
-}
-
-message StructStatistics {
-  CommonStatistics common_stats = 1;
-}
-
-// Common statistics for all feature types. Statistics counting number of values
-// (i.e., min_num_values, max_num_values, avg_num_values, and tot_num_values)
-// include NaNs.
-message CommonStatistics {
-  // The number of examples with at least one value for this feature.
-  uint64 num_non_missing = 1;
-  // The number of examples with no values for this feature.
-  uint64 num_missing = 2;
-  // The minimum number of values in a single example for this feature.
-  uint64 min_num_values = 3;
-  // The maximum number of values in a single example for this feature.
-  uint64 max_num_values = 4;
-  // The average number of values in a single example for this feature.
-  float avg_num_values = 5;
-  // tot_num_values = avg_num_values * num_non_missing.
-  // This is calculated directly, so should have less numerical error.
-  uint64 tot_num_values = 8;
-  // The quantiles histogram for the number of values in this feature.
-  Histogram num_values_histogram = 6;
-  WeightedCommonStatistics weighted_common_stats = 7;
-  // The histogram for the number of features in the feature list (only set if
-  // this feature is a non-context feature from a tf.SequenceExample).
-  // This is different from num_values_histogram, as num_values_histogram tracks
-  // the count of all values for a feature in an example, whereas this tracks
-  // the length of the feature list for this feature in an example (where each
-  // feature list can contain multiple values).
-  Histogram feature_list_length_histogram = 9;
-}
-
-// The data used to create a histogram of a numeric feature for a dataset.
-message Histogram {
-  // Each bucket defines its low and high values along with its count. The
-  // low and high values must be a real number or positive or negative
-  // infinity. They cannot be NaN or undefined. Counts of those special values
-  // can be found in the numNaN and numUndefined fields.
-  message Bucket {
-    // The low value of the bucket, inclusive.
-    double low_value = 1;
-    // The high value of the bucket, exclusive (unless the highValue is
-    // positive infinity).
-    double high_value = 2;
-
-    // The number of items in the bucket. Stored as a double to be able to
-    // handle weighted histograms.
-    double sample_count = 4;
-
-    // Deleted fields.
-    reserved 3;
-  }
-
-  // The number of NaN values in the dataset.
-  uint64 num_nan = 1;
-  // The number of undefined values in the dataset.
-  uint64 num_undefined = 2;
-
-  // A list of buckets in the histogram, sorted from lowest bucket to highest
-  // bucket.
-  repeated Bucket buckets = 3;
-
-  // The type of the histogram. A standard histogram has equal-width buckets.
-  // The quantiles type is used for when the histogram message is used to store
-  // quantile information (by using equal-count buckets with variable widths).
-  enum HistogramType {
-    STANDARD = 0;
-    QUANTILES = 1;
-  }
-
-  // The type of the histogram.
-  HistogramType type = 4;
-
-  // An optional descriptive name of the histogram, to be used for labeling.
-  string name = 5;
-}
-
-// The data used to create a rank histogram of a non-numeric feature of a
-// dataset. The rank of a value in a feature can be used as a measure of how
-// commonly the value is found in the entire dataset. With bucket sizes of one,
-// this becomes a distribution function of all feature values.
-message RankHistogram {
-  // Each bucket defines its start and end ranks along with its count.
-  message Bucket {
-    // The low rank of the bucket, inclusive.
-    uint64 low_rank = 1;
-    // The high rank of the bucket, exclusive.
-    uint64 high_rank = 2;
-
-    // The label for the bucket. Can be used to list or summarize the values in
-    // this rank bucket.
-    string label = 4;
-
-    // The number of items in the bucket. Stored as a double to be able to
-    // handle weighted histograms.
-    double sample_count = 5;
-
-    // Deleted fields.
-    reserved 3;
-  }
-
-  // A list of buckets in the histogram, sorted from lowest-ranked bucket to
-  // highest-ranked bucket.
-  repeated Bucket buckets = 1;
-
-  // An optional descriptive name of the histogram, to be used for labeling.
-  string name = 2;
-}
\ No newline at end of file
diff --git a/sdk/python/setup.py b/sdk/python/setup.py
index 29a018ca51..a9fdfa8293 100644
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -236,7 +236,6 @@ def run(self):
         "": [
             "protos/feast/**/*.proto",
             "protos/feast/third_party/grpc/health/v1/*.proto",
-            "protos/tensorflow_metadata/proto/v0/*.proto",
             "feast/protos/feast/**/*.py",
         ],
     },