From 264400e7b2bce75089cb427828cbae84ec152cba Mon Sep 17 00:00:00 2001 From: Terence Lim Date: Wed, 5 May 2021 14:12:00 +0800 Subject: [PATCH 1/5] Add name length restrictions for retrieval Signed-off-by: Terence Lim --- .../retriever/BigTableOnlineRetriever.java | 14 ++++++++++++++ .../retriever/CassandraOnlineRetriever.java | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java b/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java index 6e67782..2525b15 100644 --- a/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java +++ b/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java @@ -42,6 +42,7 @@ public class BigTableOnlineRetriever implements SSTableOnlineRetriever enti .getBytes()); } + /** + * Generate BigTable table name, with limit of 50 characters. + * + * @param project Name of Feast project + * @param entityNames List of entities used in retrieval call + * @return BigTable table name for retrieval + */ + @Override + public String getSSTable(String project, List entityNames) { + String tableName = String.format("%s__%s", project, String.join("__", entityNames)); + return tableName.substring(0, Math.min(tableName.length(), MAX_TABLE_NAME_LENGTH)); + } + /** * Converts rowCell feature value into @NativeFeature type. * diff --git a/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java b/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java index f97a9e0..fc5a6d9 100644 --- a/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java +++ b/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java @@ -51,6 +51,7 @@ public class CassandraOnlineRetriever implements SSTableOnlineRetriever enti .getBytes()); } + /** + * Generate Cassandra table name, with limit of 48 characters. + * + * @param project Name of Feast project + * @param entityNames List of entities used in retrieval call + * @return Cassandra table name for retrieval + */ + @Override + public String getSSTable(String project, List entityNames) { + String tableName = String.format("%s__%s", project, String.join("__", entityNames)); + return tableName.substring(0, Math.min(tableName.length(), MAX_TABLE_NAME_LENGTH)); + } + /** * Converts Cassandra rows into @NativeFeature type. * From f34b74f91501c1c6bc275c063396e9adabd2cc87 Mon Sep 17 00:00:00 2001 From: Terence Lim Date: Thu, 6 May 2021 13:46:31 +0800 Subject: [PATCH 2/5] Add hash suffix logic Signed-off-by: Terence Lim --- .../retriever/BigTableOnlineRetriever.java | 14 ----------- .../retriever/CassandraOnlineRetriever.java | 2 +- .../retriever/SSTableOnlineRetriever.java | 24 ++++++++++++++++++- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java b/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java index 2525b15..6e67782 100644 --- a/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java +++ b/storage/connectors/bigtable/src/main/java/feast/storage/connectors/bigtable/retriever/BigTableOnlineRetriever.java @@ -42,7 +42,6 @@ public class BigTableOnlineRetriever implements SSTableOnlineRetriever enti .getBytes()); } - /** - * Generate BigTable table name, with limit of 50 characters. - * - * @param project Name of Feast project - * @param entityNames List of entities used in retrieval call - * @return BigTable table name for retrieval - */ - @Override - public String getSSTable(String project, List entityNames) { - String tableName = String.format("%s__%s", project, String.join("__", entityNames)); - return tableName.substring(0, Math.min(tableName.length(), MAX_TABLE_NAME_LENGTH)); - } - /** * Converts rowCell feature value into @NativeFeature type. * diff --git a/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java b/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java index fc5a6d9..9b9de7b 100644 --- a/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java +++ b/storage/connectors/cassandra/src/main/java/feast/storage/connectors/cassandra/retriever/CassandraOnlineRetriever.java @@ -85,7 +85,7 @@ public ByteBuffer convertEntityValueToKey(EntityRow entityRow, List enti @Override public String getSSTable(String project, List entityNames) { String tableName = String.format("%s__%s", project, String.join("__", entityNames)); - return tableName.substring(0, Math.min(tableName.length(), MAX_TABLE_NAME_LENGTH)); + return trimAndHash(tableName, MAX_TABLE_NAME_LENGTH); } /** diff --git a/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java b/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java index 957f0d3..e19ddcc 100644 --- a/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java +++ b/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java @@ -16,6 +16,7 @@ */ package feast.storage.connectors.sstable.retriever; +import com.google.common.hash.Hashing; import feast.proto.serving.ServingAPIProto.FeatureReferenceV2; import feast.proto.serving.ServingAPIProto.GetOnlineFeaturesRequestV2.EntityRow; import feast.proto.types.ValueProto; @@ -31,6 +32,8 @@ */ public interface SSTableOnlineRetriever extends OnlineRetrieverV2 { + int MAX_TABLE_NAME_LENGTH = 50; + @Override default List> getOnlineFeatures( String project, @@ -93,7 +96,8 @@ List> convertRowToFeature( * @return Name of Cassandra table */ default String getSSTable(String project, List entityNames) { - return String.format("%s__%s", project, String.join("__", entityNames)); + return trimAndHash( + String.format("%s__%s", project, String.join("__", entityNames)), MAX_TABLE_NAME_LENGTH); } /** @@ -137,4 +141,22 @@ default List getSSTableColumns(List featureReference .distinct() .collect(Collectors.toList()); } + + /** + * Trims long SSTable table names and appends hash suffix for uniqueness. + * + * @param expr Original SSTable table name + * @param maxLength Maximum length allowed for SSTable + * @return Hashed suffix SSTable table name + */ + default String trimAndHash(String expr, int maxLength) { + int maxPrefixLength = 40; + String finalName = expr; + if (expr.length() > maxLength) { + String hashSuffix = + Hashing.murmur3_32().hashBytes(expr.substring(maxPrefixLength).getBytes()).toString(); + finalName = expr.substring(0, Math.min(expr.length(), maxLength)).concat(hashSuffix); + } + return finalName; + } } From 1fb6a8bb418a5d8ec755ec4efc629ab775e11360 Mon Sep 17 00:00:00 2001 From: Terence Lim Date: Thu, 6 May 2021 13:47:22 +0800 Subject: [PATCH 3/5] Add superlong hash suffix IT Signed-off-by: Terence Lim --- .../serving/it/ServingServiceBigTableIT.java | 137 +++++++++++++++++- 1 file changed, 136 insertions(+), 1 deletion(-) diff --git a/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java b/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java index 0844cbc..f62142f 100644 --- a/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java +++ b/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java @@ -158,9 +158,21 @@ static void globalSetup() throws IOException { .build(); TestUtils.applyEntity(coreClient, projectName, driverEntitySpec); + // Apply Entity (this_is_a_long_long_long_long_long_long_entity_id) + String superLongEntityName = "this_is_a_long_long_long_long_long_long_entity_id"; + String superLongEntityDescription = "My super long entity id"; + ValueProto.ValueType.Enum superLongEntityType = ValueProto.ValueType.Enum.INT64; + EntityProto.EntitySpecV2 superLongEntitySpec = + EntityProto.EntitySpecV2.newBuilder() + .setName(superLongEntityName) + .setDescription(superLongEntityDescription) + .setValueType(superLongEntityType) + .build(); + TestUtils.applyEntity(coreClient, projectName, superLongEntitySpec); + // Apply Entity (merchant_id) String merchantEntityName = "merchant_id"; - String merchantEntityDescription = "My driver id"; + String merchantEntityDescription = "My merchant id"; ValueProto.ValueType.Enum merchantEntityType = ValueProto.ValueType.Enum.INT64; EntityProto.EntitySpecV2 merchantEntitySpec = EntityProto.EntitySpecV2.newBuilder() @@ -186,6 +198,27 @@ static void globalSetup() throws IOException { TestUtils.applyFeatureTable( coreClient, projectName, ridesFeatureTableName, ridesEntities, ridesFeatures, 7200); + // Apply FeatureTable (superLong) + String superLongFeatureTableName = "superlong"; + ImmutableList superLongEntities = ImmutableList.of(superLongEntityName); + ImmutableMap superLongFeatures = + ImmutableMap.of( + "trip_cost", + ValueProto.ValueType.Enum.INT64, + "trip_distance", + ValueProto.ValueType.Enum.DOUBLE, + "trip_empty", + ValueProto.ValueType.Enum.DOUBLE, + "trip_wrong_type", + ValueProto.ValueType.Enum.STRING); + TestUtils.applyFeatureTable( + coreClient, + projectName, + superLongFeatureTableName, + superLongEntities, + superLongFeatures, + 7200); + // Apply FeatureTable (rides_merchant) String rideMerchantFeatureTableName = "rides_merchant"; ImmutableList ridesMerchantEntities = @@ -199,6 +232,8 @@ static void globalSetup() throws IOException { 7200); // BigTable Table names + String superLongBtTableName = String.format("%s__%s", projectName, superLongEntityName); + superLongBtTableName = trimAndHash(superLongBtTableName, 50); String btTableName = String.format("%s__%s", projectName, driverEntityName); String compoundBtTableName = String.format( @@ -237,6 +272,39 @@ static void globalSetup() throws IOException { ingestData( featureTableName, btTableName, entityFeatureKey, entityFeatureValue, schemaKey, ftSchema); + /** SuperLong Entity Ingestion Workflow */ + Schema superLongFtSchema = + SchemaBuilder.record("SuperLongData") + .namespace(superLongFeatureTableName) + .fields() + .requiredLong(feature1Reference.getName()) + .requiredDouble(feature2Reference.getName()) + .nullableString(feature3Reference.getName(), "null") + .requiredString(feature4Reference.getName()) + .endRecord(); + byte[] superLongSchemaReference = + Hashing.murmur3_32().hashBytes(superLongFtSchema.toString().getBytes()).asBytes(); + + GenericRecord superLongRecord = + new GenericRecordBuilder(superLongFtSchema) + .set("trip_cost", 5L) + .set("trip_distance", 3.5) + .set("trip_empty", null) + .set("trip_wrong_type", "test") + .build(); + byte[] superLongEntityFeatureKey = + String.valueOf(DataGenerator.createInt64Value(1).getInt64Val()).getBytes(); + byte[] superLongEntityFeatureValue = + createEntityValue(superLongFtSchema, superLongSchemaReference, superLongRecord); + byte[] superLongSchemaKey = createSchemaKey(superLongSchemaReference); + ingestData( + superLongFeatureTableName, + superLongBtTableName, + superLongEntityFeatureKey, + superLongEntityFeatureValue, + superLongSchemaKey, + superLongFtSchema); + /** Compound Entity Ingestion Workflow */ Schema compoundFtSchema = SchemaBuilder.record("DriverMerchantData") @@ -399,6 +467,17 @@ private static byte[] recordToAvro(GenericRecord datum, Schema schema) throws IO return output.toByteArray(); } + private static String trimAndHash(String expr, int maxLength) { + int maxPrefixLength = 40; + String finalName = expr; + if (expr.length() > maxLength) { + String hashSuffix = + Hashing.murmur3_32().hashBytes(expr.substring(maxPrefixLength).getBytes()).toString(); + finalName = expr.substring(0, Math.min(expr.length(), maxLength)).concat(hashSuffix); + } + return finalName; + } + @Test public void shouldRegisterSingleEntityAndGetOnlineFeatures() { // getOnlineFeatures Information @@ -726,6 +805,62 @@ public void shouldSupportAllFeastTypes() throws IOException { .allMatch(status -> status.equals(GetOnlineFeaturesResponse.FieldStatus.PRESENT)); } + @Test + public void shouldRegisterSuperLongEntityAndGetOnlineFeatures() { + // getOnlineFeatures Information + String projectName = "default"; + String entityName = "this_is_a_long_long_long_long_long_long_entity_id"; + ValueProto.Value entityValue = ValueProto.Value.newBuilder().setInt64Val(1).build(); + + // Instantiate EntityRows + GetOnlineFeaturesRequestV2.EntityRow entityRow1 = + DataGenerator.createEntityRow(entityName, DataGenerator.createInt64Value(1), 100); + ImmutableList entityRows = ImmutableList.of(entityRow1); + + // Instantiate FeatureReferences + FeatureReferenceV2 featureReference = + DataGenerator.createFeatureReference("superlong", "trip_cost"); + FeatureReferenceV2 notFoundFeatureReference = + DataGenerator.createFeatureReference("superlong", "trip_transaction"); + + ImmutableList featureReferences = + ImmutableList.of(featureReference, notFoundFeatureReference); + + // Build GetOnlineFeaturesRequestV2 + GetOnlineFeaturesRequestV2 onlineFeatureRequest = + TestUtils.createOnlineFeatureRequest(projectName, featureReferences, entityRows); + GetOnlineFeaturesResponse featureResponse = + servingStub.getOnlineFeaturesV2(onlineFeatureRequest); + + ImmutableMap expectedValueMap = + ImmutableMap.of( + entityName, + entityValue, + FeatureV2.getFeatureStringRef(featureReference), + DataGenerator.createInt64Value(5), + FeatureV2.getFeatureStringRef(notFoundFeatureReference), + DataGenerator.createEmptyValue()); + + ImmutableMap expectedStatusMap = + ImmutableMap.of( + entityName, + GetOnlineFeaturesResponse.FieldStatus.PRESENT, + FeatureV2.getFeatureStringRef(featureReference), + GetOnlineFeaturesResponse.FieldStatus.PRESENT, + FeatureV2.getFeatureStringRef(notFoundFeatureReference), + GetOnlineFeaturesResponse.FieldStatus.NOT_FOUND); + + GetOnlineFeaturesResponse.FieldValues expectedFieldValues = + GetOnlineFeaturesResponse.FieldValues.newBuilder() + .putAllFields(expectedValueMap) + .putAllStatuses(expectedStatusMap) + .build(); + ImmutableList expectedFieldValuesList = + ImmutableList.of(expectedFieldValues); + + assertEquals(expectedFieldValuesList, featureResponse.getFieldValuesList()); + } + @TestConfiguration public static class TestConfig { @Bean From 3961ac6ff9a06e71adaa0a42267933b7cf14380c Mon Sep 17 00:00:00 2001 From: Terence Lim Date: Fri, 7 May 2021 11:23:45 +0800 Subject: [PATCH 4/5] Address comments Signed-off-by: Terence Lim --- .../test/java/feast/serving/it/ServingServiceBigTableIT.java | 4 ++-- .../connectors/sstable/retriever/SSTableOnlineRetriever.java | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java b/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java index f62142f..d942d5a 100644 --- a/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java +++ b/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java @@ -468,12 +468,12 @@ private static byte[] recordToAvro(GenericRecord datum, Schema schema) throws IO } private static String trimAndHash(String expr, int maxLength) { - int maxPrefixLength = 40; + int maxPrefixLength = maxLength - 8; String finalName = expr; if (expr.length() > maxLength) { String hashSuffix = Hashing.murmur3_32().hashBytes(expr.substring(maxPrefixLength).getBytes()).toString(); - finalName = expr.substring(0, Math.min(expr.length(), maxLength)).concat(hashSuffix); + finalName = expr.substring(0, Math.min(expr.length(), maxPrefixLength)).concat(hashSuffix); } return finalName; } diff --git a/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java b/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java index e19ddcc..c86923a 100644 --- a/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java +++ b/storage/connectors/sstable/src/main/java/feast/storage/connectors/sstable/retriever/SSTableOnlineRetriever.java @@ -150,12 +150,13 @@ default List getSSTableColumns(List featureReference * @return Hashed suffix SSTable table name */ default String trimAndHash(String expr, int maxLength) { - int maxPrefixLength = 40; + // Length 8 as derived from murmurhash_32 implementation + int maxPrefixLength = maxLength - 8; String finalName = expr; if (expr.length() > maxLength) { String hashSuffix = Hashing.murmur3_32().hashBytes(expr.substring(maxPrefixLength).getBytes()).toString(); - finalName = expr.substring(0, Math.min(expr.length(), maxLength)).concat(hashSuffix); + finalName = expr.substring(0, Math.min(expr.length(), maxPrefixLength)).concat(hashSuffix); } return finalName; } From 1a2d88e7bfab667ff9ca88d0e168bd2221b84658 Mon Sep 17 00:00:00 2001 From: Terence Lim Date: Fri, 7 May 2021 11:29:42 +0800 Subject: [PATCH 5/5] Update IT Signed-off-by: Terence Lim --- .../serving/it/ServingServiceBigTableIT.java | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java b/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java index d942d5a..423e7c8 100644 --- a/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java +++ b/serving/src/test/java/feast/serving/it/ServingServiceBigTableIT.java @@ -233,7 +233,12 @@ static void globalSetup() throws IOException { // BigTable Table names String superLongBtTableName = String.format("%s__%s", projectName, superLongEntityName); - superLongBtTableName = trimAndHash(superLongBtTableName, 50); + String hashSuffix = + Hashing.murmur3_32().hashBytes(superLongBtTableName.substring(42).getBytes()).toString(); + superLongBtTableName = + superLongBtTableName + .substring(0, Math.min(superLongBtTableName.length(), 42)) + .concat(hashSuffix); String btTableName = String.format("%s__%s", projectName, driverEntityName); String compoundBtTableName = String.format( @@ -467,17 +472,6 @@ private static byte[] recordToAvro(GenericRecord datum, Schema schema) throws IO return output.toByteArray(); } - private static String trimAndHash(String expr, int maxLength) { - int maxPrefixLength = maxLength - 8; - String finalName = expr; - if (expr.length() > maxLength) { - String hashSuffix = - Hashing.murmur3_32().hashBytes(expr.substring(maxPrefixLength).getBytes()).toString(); - finalName = expr.substring(0, Math.min(expr.length(), maxPrefixLength)).concat(hashSuffix); - } - return finalName; - } - @Test public void shouldRegisterSingleEntityAndGetOnlineFeatures() { // getOnlineFeatures Information