From 4d8c02a91c2347afaaafbd3a29139d29ada78928 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 29 Oct 2019 15:54:33 +0800 Subject: [PATCH 01/12] Map jobs source::sink instead of fs::sink. Allow users to specify topics. Provision single topic by default. --- .../feast/core/dao/JobInfoRepository.java | 2 +- .../java/feast/core/grpc/CoreServiceImpl.java | 18 ++- .../main/java/feast/core/model/JobInfo.java | 14 ++- .../main/java/feast/core/model/Source.java | 106 ++++++++++------- .../core/service/JobCoordinatorService.java | 45 +++++--- .../core/stream/kafka/KafkaFeatureStream.java | 15 +-- .../kafka/KafkaFeatureStreamConfig.java | 6 +- core/src/main/resources/application.yml | 1 + .../core/job/ScheduledJobMonitorTest.java | 4 +- .../direct/DirectRunnerJobManagerTest.java | 1 + .../service/JobCoordinatorServiceTest.java | 29 +++-- .../feast/core/service/SpecServiceTest.java | 6 +- .../src/main/java/feast/options/Options.java | 27 ----- .../java/feast/options/OptionsParser.java | 108 ------------------ .../main/java/feast/options/Validation.java | 75 ------------ 15 files changed, 153 insertions(+), 304 deletions(-) delete mode 100644 ingestion/src/main/java/feast/options/Options.java delete mode 100644 ingestion/src/main/java/feast/options/OptionsParser.java delete mode 100644 ingestion/src/main/java/feast/options/Validation.java diff --git a/core/src/main/java/feast/core/dao/JobInfoRepository.java b/core/src/main/java/feast/core/dao/JobInfoRepository.java index ed457ef9f0b..06381aa5577 100644 --- a/core/src/main/java/feast/core/dao/JobInfoRepository.java +++ b/core/src/main/java/feast/core/dao/JobInfoRepository.java @@ -28,5 +28,5 @@ @Repository public interface JobInfoRepository extends JpaRepository { List findByStatusNotIn(Collection statuses); - List findByFeatureSetsNameAndStoreName(String featureSetsName, String storeName); + List findBySourceIdAndStoreName(String sourceId, String storeName); } \ No newline at end of file diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index 7ac1657a627..be4baa4278d 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -17,6 +17,7 @@ package feast.core.grpc; +import com.google.common.collect.Lists; import com.google.protobuf.InvalidProtocolBufferException; import feast.core.CoreServiceGrpc.CoreServiceImplBase; import feast.core.CoreServiceProto.ApplyFeatureSetRequest; @@ -32,9 +33,11 @@ import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.CoreServiceProto.UpdateStoreResponse.Status; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.Subscription; import feast.core.exception.RetrievalException; +import feast.core.model.Source; import feast.core.service.JobCoordinatorService; import feast.core.service.SpecService; import io.grpc.stub.StreamObserver; @@ -113,7 +116,7 @@ public void applyFeatureSet( return p.matcher(featureSetName).matches(); }) .collect(Collectors.toList()); - List featureSetSpecs = new ArrayList<>(); + Set featureSetSpecs = new HashSet<>(); for (Subscription subscription : relevantSubscriptions) { featureSetSpecs.addAll( specService @@ -124,8 +127,12 @@ public void applyFeatureSet( .build()) .getFeatureSetsList()); } - if (!featureSetSpecs.isEmpty()) { - jobCoordinatorService.startOrUpdateJob(featureSetSpecs, store); + if (!featureSetSpecs.isEmpty() && featureSetSpecs.contains(response.getFeatureSet())) { + // We use the request featureSet source because it contains the information + // about whether to default to the default feature stream or not + SourceProto.Source source = request.getFeatureSet().getSource(); + jobCoordinatorService + .startOrUpdateJob(Lists.newArrayList(featureSetSpecs), source, store); } } responseObserver.onNext(response); @@ -159,10 +166,11 @@ public void updateStore(UpdateStoreRequest request, ); } featureSetSpecs.stream() - .collect(Collectors.groupingBy(FeatureSetSpec::getName)) + .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) .entrySet() .stream() - .forEach(kv -> jobCoordinatorService.startOrUpdateJob(kv.getValue(), store)); + .forEach( + kv -> jobCoordinatorService.startOrUpdateJob(kv.getValue(), kv.getKey(), store)); } } catch (Exception e) { log.error("Exception has occurred in UpdateStore method: ", e); diff --git a/core/src/main/java/feast/core/model/JobInfo.java b/core/src/main/java/feast/core/model/JobInfo.java index fd523b4e4c7..4a64aefbc5c 100644 --- a/core/src/main/java/feast/core/model/JobInfo.java +++ b/core/src/main/java/feast/core/model/JobInfo.java @@ -55,19 +55,21 @@ public class JobInfo extends AbstractTimestampEntity { @Column(name = "ext_id") private String extId; - // Import job source type - @Column(name = "type") - private String type; - // Runner type @Column(name = "runner") private String runner; + // Source id + @ManyToOne + @JoinColumn(name = "source_id") + private Source source; + // Sink id @ManyToOne @JoinColumn(name = "store_name") private Store store; + // FeatureSets populated by the job @ManyToMany @JoinTable( @@ -87,11 +89,11 @@ public JobInfo() { super(); } - public JobInfo(String id, String extId, SourceType type, String runner, Store sink, + public JobInfo(String id, String extId, String runner, Source source, Store sink, List featureSets, JobStatus jobStatus) { this.id = id; this.extId = extId; - this.type = type.toString(); + this.source = source; this.runner = runner; this.store = sink; this.featureSets = featureSets; diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index f1997801597..c63bc413876 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -1,7 +1,6 @@ package feast.core.model; import com.google.common.collect.Sets; -import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Message; import feast.core.SourceProto; import feast.core.SourceProto.KafkaSourceConfig; @@ -10,10 +9,7 @@ import java.util.Set; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; import javax.persistence.Id; -import javax.persistence.Lob; import javax.persistence.Table; import lombok.Setter; @@ -25,18 +21,20 @@ public class Source { private static final Set KAFKA_OPTIONS = Sets.newHashSet("bootstrapServers"); @Id - @GeneratedValue(strategy = GenerationType.AUTO) @Column(name = "id", updatable = false, nullable = false) - private Long id; + private String id; // Type of the source. Should map to feast.types.Source.SourceType @Column(name = "type", nullable = false) private String type; - // Options for this source - @Column(name = "options") - @Lob - private byte[] options; + // Bootstrap servers, comma delimited. Used by kafka sources. + @Column(name = "bootstrap_servers") + private String bootstrapServers; + + // Topics to listen to, comma delimited. Used by kafka sources. + @Column(name = "topics") + private String topics; @Column(name = "use_default") private boolean useDefault; @@ -45,37 +43,50 @@ public Source() { super(); } - public Source(SourceType type, byte[] options) { + public Source(SourceType type, KafkaSourceConfig config, boolean isUseDefault) { this.type = type.toString(); - this.options = options; + this.bootstrapServers = config.getBootstrapServers(); + this.topics = config.getTopic(); + this.useDefault = isUseDefault; + this.id = generateId(); } + /** + * Construct a source facade object from a given proto object. + * + * @param sourceProto SourceProto.Source object + * @return Source facade object + */ public static Source fromProto(SourceProto.Source sourceProto) { if (sourceProto.equals(SourceProto.Source.getDefaultInstance())) { Source source = new Source(SourceType.UNRECOGNIZED, - KafkaSourceConfig.getDefaultInstance().toByteArray()); + KafkaSourceConfig.getDefaultInstance(), true); source.setUseDefault(true); return source; } - byte[] options; switch (sourceProto.getType()) { case KAFKA: - options = sourceProto.getKafkaSourceConfig().toByteArray(); - break; + return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig(), false); case UNRECOGNIZED: default: throw new IllegalArgumentException("Unsupported source type. Only [KAFKA] is supported."); } - return new Source(sourceProto.getType(), options); } - public SourceProto.Source toProto() throws InvalidProtocolBufferException { + /** + * Convert this object to its equivalent proto object. + * + * @return SourceProto.Source + */ + public SourceProto.Source toProto() { Builder builder = SourceProto.Source.newBuilder() .setType(SourceType.valueOf(type)); switch (SourceType.valueOf(type)) { case KAFKA: - KafkaSourceConfig config = KafkaSourceConfig.parseFrom(options); + KafkaSourceConfig config = KafkaSourceConfig.newBuilder() + .setBootstrapServers(bootstrapServers) + .setTopic(topics).build(); return builder.setKafkaSourceConfig(config).build(); case UNRECOGNIZED: default: @@ -83,17 +94,28 @@ public SourceProto.Source toProto() throws InvalidProtocolBufferException { } } + /** + * Get the id for this feature source + * + * @return feature source id in the format TYPE/options + */ + public String getId() { + return id; + } + /** * Get the options for this feature source * * @return feature source options */ - public Message getOptions() - throws InvalidProtocolBufferException { + public Message getOptions() { switch (SourceType.valueOf(type)) { case KAFKA: - KafkaSourceConfig config = KafkaSourceConfig.parseFrom(options); - return config; + return KafkaSourceConfig + .newBuilder() + .setBootstrapServers(bootstrapServers) + .setTopic(topics) + .build(); case UNRECOGNIZED: default: throw new RuntimeException("Unable to convert source to proto"); @@ -119,20 +141,14 @@ public boolean isUseDefault() { } /** - * Set the topic to the source stream. + * Override equality for sources. + * useDefault is always compared first; if both sources are using the default feature source, + * they will be equal. If not they will be compared based on their type-specific options. + * + * @param other other Source + * @return boolean equal */ - public void setTopic(String topic) throws InvalidProtocolBufferException { - switch (SourceType.valueOf(type)) { - case KAFKA: - KafkaSourceConfig kafkacfg = KafkaSourceConfig.parseFrom(options); - this.options = kafkacfg.toBuilder().setTopic(topic).build().toByteArray(); - case UNRECOGNIZED: - default: - throw new RuntimeException("Unable to convert source to proto"); - } - } - - public boolean equalTo(Source other) throws InvalidProtocolBufferException { + public boolean equalTo(Source other) { if (other.useDefault && useDefault) { return true; } @@ -143,14 +159,26 @@ public boolean equalTo(Source other) throws InvalidProtocolBufferException { switch (SourceType.valueOf(type)) { case KAFKA: - KafkaSourceConfig kafkaCfg = KafkaSourceConfig.parseFrom(options); - KafkaSourceConfig otherKafkaCfg = KafkaSourceConfig.parseFrom(other.options); - return kafkaCfg.getBootstrapServers().equals(otherKafkaCfg.getBootstrapServers()); + return bootstrapServers.equals(other.bootstrapServers) && + topics.equals(other.topics); case UNRECOGNIZED: default: return false; } } + + private String generateId() { + if (useDefault) { + return "DEFAULT"; + } + switch (SourceType.valueOf(type)) { + case KAFKA: + return String.format("KAFKA/%s/%s", bootstrapServers, topics); + default: + // should not occur + return ""; + } + } } diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 61e600c9804..116f75eea4e 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -1,7 +1,9 @@ package feast.core.service; import com.google.common.base.Strings; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.dao.JobInfoRepository; @@ -14,6 +16,7 @@ import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.model.JobStatus; +import feast.core.model.Source; import feast.core.model.Store; import java.time.Instant; import java.util.ArrayList; @@ -44,9 +47,11 @@ public JobCoordinatorService( * there has been no change in the featureSet, and there is a running job for the featureSet, this * method will do nothing. */ - public JobInfo startOrUpdateJob(List featureSetSpecs, StoreProto.Store store) { - String featureSetName = featureSetSpecs.get(0).getName(); - Optional job = getJob(featureSetName, store.getName()); + public JobInfo startOrUpdateJob(List featureSetSpecs, + SourceProto.Source sourceSpec, + StoreProto.Store store) { + Source source = Source.fromProto(sourceSpec); + Optional job = getJob(source.getId(), store.getName()); if (job.isPresent()) { Set existingFeatureSetsPopulatedByJob = job.get().getFeatureSets().stream().map(FeatureSet::getId).collect(Collectors.toSet()); @@ -61,14 +66,17 @@ public JobInfo startOrUpdateJob(List featureSetSpecs, StoreProto return updateJob(job.get(), featureSetSpecs, store); } } else { - return startJob(createJobId(featureSetName, store.getName()), featureSetSpecs, store); + return startJob(createJobId(source.getType().toString().toLowerCase(), store.getName()), + featureSetSpecs, sourceSpec, store); } } - /** Get the non-terminal job associated with the given featureSet name and store name, if any. */ - private Optional getJob(String featureSetName, String storeName) { + /** + * Get the non-terminal job associated with the given featureSet name and store name, if any. + */ + private Optional getJob(String sourceId, String storeName) { List jobs = - jobInfoRepository.findByFeatureSetsNameAndStoreName(featureSetName, storeName); + jobInfoRepository.findBySourceIdAndStoreName(sourceId, storeName); if (jobs.isEmpty()) { return Optional.empty(); } @@ -77,9 +85,12 @@ private Optional getJob(String featureSetName, String storeName) { .findFirst(); } - /** Start or update the job to ingest data to the sink. */ + /** + * Start or update the job to ingest data to the sink. + */ private JobInfo startJob( - String jobId, List featureSetSpecs, StoreProto.Store sinkSpec) { + String jobId, List featureSetSpecs, SourceProto.Source source, + StoreProto.Store sinkSpec) { try { AuditLogger.log( Resource.JOB, @@ -102,7 +113,6 @@ private JobInfo startJob( jobManager.getRunnerType().getName(), extId); - SourceType sourceType = featureSetSpecs.get(0).getSource().getType(); List featureSets = new ArrayList<>(); for (FeatureSetSpec featureSetSpec : featureSetSpecs) { @@ -115,8 +125,8 @@ private JobInfo startJob( new JobInfo( jobId, extId, - sourceType, jobManager.getRunnerType().getName(), + Source.fromProto(source), Store.fromProto(sinkSpec), featureSets, JobStatus.RUNNING); @@ -134,7 +144,9 @@ private JobInfo startJob( } } - /** Update the given job */ + /** + * Update the given job + */ private JobInfo updateJob( JobInfo jobInfo, List featureSetSpecs, StoreProto.Store store) { jobInfo.setFeatureSets( @@ -171,7 +183,9 @@ public void abortJob(String id) { jobInfoRepository.saveAndFlush(job); } - /** Update a given job's status */ + /** + * Update a given job's status + */ public void updateJobStatus(String jobId, JobStatus status) { Optional jobRecordOptional = jobInfoRepository.findById(jobId); if (jobRecordOptional.isPresent()) { @@ -181,9 +195,10 @@ public void updateJobStatus(String jobId, JobStatus status) { } } - public String createJobId(String featureSetName, String storeName) { + public String createJobId(String sourceId, String storeName) { String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); - String jobId = String.format("%s-to-%s", featureSetName, storeName) + dateSuffix; + String sourceIdTrunc = sourceId.split("/")[0].toLowerCase(); + String jobId = String.format("%s-to-%s", sourceIdTrunc, storeName) + dateSuffix; return jobId.replaceAll("_", "-"); } } diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java index 43ab3b4393d..487e15e7507 100644 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java +++ b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java @@ -38,11 +38,13 @@ public Source provision(FeatureSet featureSet) throws RuntimeException { Source source = featureSet.getSource(); KafkaSourceConfig config = KafkaSourceConfig.getDefaultInstance(); String bootstrapServers = defaultConfig.getBootstrapServers(); + String topicName = defaultConfig.getTopic(); if (!source.isUseDefault()) { try { config = (KafkaSourceConfig) source.getOptions(); + topicName = config.getTopic(); bootstrapServers = config.getBootstrapServers(); - } catch (InvalidProtocolBufferException | NullPointerException e) { + } catch (NullPointerException e) { throw new RuntimeException(String .format("Unable to retrieve bootstrap servers for featureSet %s", featureSet.getName()), e); @@ -54,7 +56,6 @@ public Source provision(FeatureSet featureSet) throws RuntimeException { map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "1000"); AdminClient client = AdminClient.create(map); - String topicName = generateTopicName(featureSet.getName()); NewTopic newTopic = new NewTopic(topicName, defaultConfig.getTopicNumPartitions(), defaultConfig.getTopicReplicationFactor()); @@ -71,14 +72,10 @@ public Source provision(FeatureSet featureSet) throws RuntimeException { throw new RuntimeException(e.getMessage(), e); } } + assert config != null; - source.setOptions( - config.toBuilder().setTopic(topicName).setBootstrapServers(bootstrapServers).build() - .toByteArray()); + source.setTopics(topicName); + source.setBootstrapServers(bootstrapServers); return source; } - - public String generateTopicName(String featureSetName) { - return Strings.lenientFormat("%s-%s-features", defaultConfig.getTopicPrefix(), featureSetName); - } } diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java index aa8248cf9d4..eb9177bbec9 100644 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java +++ b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java @@ -17,9 +17,9 @@ public class KafkaFeatureStreamConfig { String bootstrapServers; /** - * Feast stream topic prefix, to be prepended to topic name + * Feast stream topic name */ - String topicPrefix; + String topic; /** * Number of partitions per topic @@ -45,7 +45,7 @@ public class KafkaFeatureStreamConfig { */ public static KafkaFeatureStreamConfig fromMap(Map optionMap) { String bootstrapServers = optionMap.getOrDefault("bootstrapServers", "KAFKA:9092"); - String topicPrefix = optionMap.getOrDefault("topicPrefix", "feast"); + String topicPrefix = optionMap.getOrDefault("topic", "feast-features"); int numPartitions = Integer.parseInt(optionMap.getOrDefault("partitions", NUM_PARTITIONS_DEFAULT)); short replicationFactor = Short.parseShort(optionMap.getOrDefault("replicationFactor", REPLICATION_FACTOR_DEFAULT)); diff --git a/core/src/main/resources/application.yml b/core/src/main/resources/application.yml index 6867b05ac85..be4f8c6b72a 100644 --- a/core/src/main/resources/application.yml +++ b/core/src/main/resources/application.yml @@ -47,6 +47,7 @@ feast: type: kafka # Feature stream options. options: + topic: feast-features bootstrapServers: kafka:9092 replicationFactor: 1 partitions: 1 diff --git a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java index 3d7a082f711..1f4ffcf9828 100644 --- a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java +++ b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java @@ -25,11 +25,13 @@ import static org.mockito.Mockito.when; import com.google.common.collect.Lists; +import feast.core.SourceProto; import feast.core.dao.JobInfoRepository; import feast.core.dao.MetricsRepository; import feast.core.model.JobInfo; import feast.core.model.JobStatus; import feast.core.model.Metrics; +import feast.core.model.Source; import feast.core.model.Store; import java.util.ArrayList; import java.util.Arrays; @@ -62,8 +64,8 @@ public void getJobStatus_shouldUpdateJobInfoForRunningJob() { new JobInfo( "jobId", "extId1", - "Streaming", "DataflowRunner", + Source.fromProto(SourceProto.Source.getDefaultInstance()), new Store(), Collections.emptyList(), Collections.emptyList(), diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index 93f2519c383..88a674f86ec 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -74,6 +74,7 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setRunner(DirectRunner.class); expectedPipelineOptions.setBlockOnRun(false); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); + expectedPipelineOptions.setProject(""); expectedPipelineOptions .setFeatureSetSpecJson(Lists.newArrayList(printer.print(featureSetSpec))); diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index efffbcc2981..528b2f7d6d4 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -8,8 +8,7 @@ import com.google.common.collect.Lists; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto.Source; -import feast.core.SourceProto.SourceType; +import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; @@ -19,6 +18,7 @@ import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.model.JobStatus; +import feast.core.model.Source; import feast.core.model.Store; import org.junit.Before; import org.junit.Rule; @@ -38,6 +38,7 @@ public class JobCoordinatorServiceTest { private JobCoordinatorService jobCoordinatorService; private JobInfo existingJob; + @Before public void setUp() { initMocks(this); @@ -51,10 +52,11 @@ public void setUp() { featureSet1.setId("featureSet1:1"); FeatureSet featureSet2 = new FeatureSet(); featureSet2.setId("featureSet2:1"); - existingJob = new JobInfo("extid", "name", "KAFKA", "DirectRunner", store, + Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); + existingJob = new JobInfo("extid", "name", "DirectRunner", source, store, Lists.newArrayList(featureSet1, featureSet2), Lists.newArrayList(), JobStatus.RUNNING); - when(jobInfoRepository.findByFeatureSetsNameAndStoreName("featureSet1", "SERVING")) + when(jobInfoRepository.findBySourceIdAndStoreName("DEFAULT", "SERVING")) .thenReturn(Lists.newArrayList(existingJob)); jobCoordinatorService = new JobCoordinatorService(jobInfoRepository, jobManager); @@ -77,7 +79,8 @@ public void shouldNotStartOrUpdateJobIfNoChanges() { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build(); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet1, featureSet2), store); + .startOrUpdateJob(Lists.newArrayList(featureSet1, featureSet2), + SourceProto.Source.getDefaultInstance(), store); assertThat(jobInfo, equalTo(existingJob)); } @@ -86,7 +89,6 @@ public void shouldStartJobIfNotExists() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet") .setVersion(1) - .setSource(Source.newBuilder().setType(SourceType.KAFKA)) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -102,11 +104,13 @@ public void shouldStartJobIfNotExists() { when(jobManager.getRunnerType()).thenReturn(Runner.DIRECT); FeatureSet expectedFeatureSet = new FeatureSet(); expectedFeatureSet.setId("featureSet:1"); - JobInfo expectedJobInfo = new JobInfo(jobId, extJobId, SourceType.KAFKA, "DirectRunner", - Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), JobStatus.RUNNING); + Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); + JobInfo expectedJobInfo = new JobInfo(jobId, extJobId, "DirectRunner", + source, Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), JobStatus.RUNNING); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), store); + .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + store); assertThat(jobInfo, equalTo(expectedJobInfo)); } @@ -115,7 +119,6 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet1") .setVersion(1) - .setSource(Source.newBuilder().setType(SourceType.KAFKA)) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -123,15 +126,17 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build(); String extId = "extId123"; + Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); JobInfo modifiedJob = new JobInfo(existingJob.getId(), existingJob.getExtId(), - SourceType.valueOf(existingJob.getType()), existingJob.getRunner(), Store.fromProto(store), + existingJob.getRunner(), source, Store.fromProto(store), Lists.newArrayList(FeatureSet.fromProto(featureSet)), JobStatus.RUNNING); when(jobManager.updateJob(modifiedJob)).thenReturn(extId); JobInfo expectedJobInfo = modifiedJob; expectedJobInfo.setExtId(extId); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), store); + .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + store); assertThat(jobInfo, equalTo(expectedJobInfo)); } diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index 73650de1bfa..4987ac158ae 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -282,7 +282,7 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f2-features").build().toByteArray()); + .setTopic("feast-f2-features").build(), false); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = newDummyFeatureSet("f2", 1) @@ -306,7 +306,7 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() throws InvalidProtocolBufferException { Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f1-features").build().toByteArray()); + .setTopic("feast-f1-features").build(), false); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = featureSets.get(2).toProto().toBuilder() @@ -367,7 +367,7 @@ private FeatureSet newDummyFeatureSet(String name, int version) { Field entity = new Field(name, "entity", Enum.STRING); return new FeatureSet(name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), new Source( - SourceType.KAFKA, kafkaFeatureSourceOptions.toByteArray())); + SourceType.KAFKA, kafkaFeatureSourceOptions, false)); } private Store newDummyStore(String name) { diff --git a/ingestion/src/main/java/feast/options/Options.java b/ingestion/src/main/java/feast/options/Options.java deleted file mode 100644 index 47077810c30..00000000000 --- a/ingestion/src/main/java/feast/options/Options.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2018 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package feast.options; - -import java.io.Serializable; - -/** - * interface for identifying classes that can use the OptionsParser for extra type safety - */ -public interface Options extends Serializable { - -} diff --git a/ingestion/src/main/java/feast/options/OptionsParser.java b/ingestion/src/main/java/feast/options/OptionsParser.java deleted file mode 100644 index 8400f5d423d..00000000000 --- a/ingestion/src/main/java/feast/options/OptionsParser.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2018 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package feast.options; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.module.jsonSchema.JsonSchema; -import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator; -import com.google.common.collect.Lists; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Set; -import javax.validation.ConstraintViolation; -import javax.validation.Validation; -import javax.validation.Validator; -import javax.validation.ValidatorFactory; - -public class OptionsParser { - - private static final ObjectMapper strictMapper = new ObjectMapper(); - private static final ObjectMapper lenientMapper = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - private static final Validator validator; - - static { - try (ValidatorFactory validatorFactory = Validation.buildDefaultValidatorFactory()) { - validator = validatorFactory.getValidator(); - } - } - - /** - * Return a json schema string representing an options class for error messages - */ - static String getJsonSchema(Class optionsClass) { - JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(strictMapper); - JsonSchema schema = null; - try { - schema = schemaGen.generateSchema(optionsClass); - schema.setId(null); // clear the ID as it's visual noise - return strictMapper.writer().forType(JsonSchema.class).writeValueAsString(schema); - } catch (IOException e) { - return ""; - } - } - - - private static T parse(Map optionsMap, Class clazz, - boolean lenient) { - ObjectMapper mapper = lenient ? lenientMapper : strictMapper; - List messages = Lists.newArrayList(); - T options; - try { - options = mapper.convertValue(optionsMap, clazz); - } catch (IllegalArgumentException e) { - messages.add("Expecting options convertible to schema: " + getJsonSchema(clazz)); - try { - messages.add("Got " + mapper.writer().writeValueAsString(optionsMap)); - } catch (JsonProcessingException ee) { - // - } - throw new IllegalArgumentException(String.join(", ", messages), e); - } - Set> violations = validator.validate(options); - if (violations.size() > 0) { - messages.add("Expecting options convertible to schema: " + getJsonSchema(clazz)); - for (ConstraintViolation violation : violations) { - messages.add( - String.format( - "property \"%s\" %s", violation.getPropertyPath(), violation.getMessage())); - } - throw new IllegalArgumentException(String.join(", ", messages)); - } - return options; - } - - /** - * Construct a class from string options and validate with any javax validation annotations, - * unknown options are ignored - */ - public static T lenientParse(Map optionsMap, Class clazz) { - return parse(optionsMap, clazz, true); - } - - - /** - * Construct a class from string options and validate with any javax validation annotations - */ - public static T parse(Map optionsMap, Class clazz) { - return parse(optionsMap, clazz, false); - } -} diff --git a/ingestion/src/main/java/feast/options/Validation.java b/ingestion/src/main/java/feast/options/Validation.java deleted file mode 100644 index 892c9f36fa6..00000000000 --- a/ingestion/src/main/java/feast/options/Validation.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2018 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package feast.options; - -import static java.lang.annotation.ElementType.ANNOTATION_TYPE; -import static java.lang.annotation.ElementType.CONSTRUCTOR; -import static java.lang.annotation.ElementType.FIELD; -import static java.lang.annotation.ElementType.METHOD; -import static java.lang.annotation.ElementType.PARAMETER; -import static java.lang.annotation.ElementType.TYPE_USE; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -import java.lang.annotation.Documented; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; -import javax.validation.Constraint; -import javax.validation.ConstraintValidator; -import javax.validation.ConstraintValidatorContext; -import javax.validation.Payload; -import javax.validation.ReportAsSingleViolation; -import org.joda.time.format.ISOPeriodFormat; - -public @interface Validation { - @Documented - @ReportAsSingleViolation - @Target({METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER, TYPE_USE}) - @Constraint(validatedBy = ISO8601Duration.ISO8601DurationValidator.class) - @Retention(RUNTIME) - @interface ISO8601Duration { - - String message() default "must match duration format for ISO 8601 standard"; - - Class[] groups() default {}; - - Class[] payload() default {}; - - @Target({METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER, TYPE_USE}) - @Retention(RUNTIME) - @Documented - @interface List { - ISO8601Duration[] value(); - } - - class ISO8601DurationValidator implements ConstraintValidator { - - @Override - public boolean isValid(String value, ConstraintValidatorContext context) { - if (value == null) { - return true; - } - try { - ISOPeriodFormat.standard().parsePeriod(value); - return true; - } catch (Throwable e) { - return false; - } - } - } - } -} From fff15bc697678a2aaef83e1ed5c3b6f1f44f117f Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 29 Oct 2019 18:26:51 +0800 Subject: [PATCH 02/12] Add validation for kafka source --- .../main/java/feast/core/model/Source.java | 31 +++++++++++++------ .../feast/core/service/SpecServiceTest.java | 6 ++-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index c63bc413876..0d559078503 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -6,6 +6,7 @@ import feast.core.SourceProto.KafkaSourceConfig; import feast.core.SourceProto.Source.Builder; import feast.core.SourceProto.SourceType; +import io.grpc.Status; import java.util.Set; import javax.persistence.Column; import javax.persistence.Entity; @@ -43,11 +44,22 @@ public Source() { super(); } - public Source(SourceType type, KafkaSourceConfig config, boolean isUseDefault) { + public Source(SourceType type, KafkaSourceConfig config) { + if (config.getBootstrapServers().isEmpty() || config.getTopic().isEmpty()) { + throw Status.INVALID_ARGUMENT.withDescription( + "Unsupported source options. Kafka source requires bootstrap servers and topic to be specified.") + .asRuntimeException(); + } this.type = type.toString(); this.bootstrapServers = config.getBootstrapServers(); this.topics = config.getTopic(); - this.useDefault = isUseDefault; + this.useDefault = false; + this.id = generateId(); + } + + public Source(SourceType type) { + this.type = type.toString(); + this.useDefault = true; this.id = generateId(); } @@ -59,18 +71,19 @@ public Source(SourceType type, KafkaSourceConfig config, boolean isUseDefault) { */ public static Source fromProto(SourceProto.Source sourceProto) { if (sourceProto.equals(SourceProto.Source.getDefaultInstance())) { - Source source = new Source(SourceType.UNRECOGNIZED, - KafkaSourceConfig.getDefaultInstance(), true); + Source source = new Source(SourceType.UNRECOGNIZED); source.setUseDefault(true); return source; } switch (sourceProto.getType()) { case KAFKA: - return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig(), false); + return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig()); case UNRECOGNIZED: default: - throw new IllegalArgumentException("Unsupported source type. Only [KAFKA] is supported."); + throw Status.INVALID_ARGUMENT + .withDescription("Unsupported source type. Only [KAFKA] is supported.") + .asRuntimeException(); } } @@ -141,9 +154,9 @@ public boolean isUseDefault() { } /** - * Override equality for sources. - * useDefault is always compared first; if both sources are using the default feature source, - * they will be equal. If not they will be compared based on their type-specific options. + * Override equality for sources. useDefault is always compared first; if both sources are using + * the default feature source, they will be equal. If not they will be compared based on their + * type-specific options. * * @param other other Source * @return boolean equal diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index 4987ac158ae..fde90f2d422 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -282,7 +282,7 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f2-features").build(), false); + .setTopic("feast-f2-features").build()); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = newDummyFeatureSet("f2", 1) @@ -306,7 +306,7 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() throws InvalidProtocolBufferException { Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f1-features").build(), false); + .setTopic("feast-f1-features").build()); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = featureSets.get(2).toProto().toBuilder() @@ -367,7 +367,7 @@ private FeatureSet newDummyFeatureSet(String name, int version) { Field entity = new Field(name, "entity", Enum.STRING); return new FeatureSet(name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), new Source( - SourceType.KAFKA, kafkaFeatureSourceOptions, false)); + SourceType.KAFKA, kafkaFeatureSourceOptions)); } private Store newDummyStore(String name) { From b95f93a2f3ad2346d31cd653c8c1a223f1d47d26 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Wed, 30 Oct 2019 15:16:34 +0800 Subject: [PATCH 03/12] Remove feature stream service and instantiate it at initialisation --- .../java/feast/core/grpc/CoreServiceImpl.java | 7 +- .../job/direct/DirectRunnerJobManager.java | 25 +++++- .../main/java/feast/core/model/Source.java | 29 ++---- .../core/service/FeatureStreamService.java | 53 ----------- .../core/service/JobCoordinatorService.java | 2 +- .../java/feast/core/service/SpecService.java | 16 ++-- .../java/feast/core/stream/FeatureStream.java | 21 ----- .../core/stream/kafka/KafkaFeatureStream.java | 81 ----------------- .../kafka/KafkaFeatureStreamConfig.java | 54 ----------- .../core/job/ScheduledJobMonitorTest.java | 13 ++- .../service/FeatureStreamServiceTest.java | 90 ------------------- .../service/JobCoordinatorServiceTest.java | 31 ++++--- .../feast/core/service/SpecServiceTest.java | 40 +++------ .../stream/kafka/KafkaFeatureStreamTest.java | 61 ------------- 14 files changed, 87 insertions(+), 436 deletions(-) delete mode 100644 core/src/main/java/feast/core/service/FeatureStreamService.java delete mode 100644 core/src/main/java/feast/core/stream/FeatureStream.java delete mode 100644 core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java delete mode 100644 core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java delete mode 100644 core/src/test/java/feast/core/service/FeatureStreamServiceTest.java delete mode 100644 core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index be4baa4278d..18482b5e6d1 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -122,7 +122,7 @@ public void applyFeatureSet( specService .getFeatureSets( GetFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(featureSetName) + .setFeatureSetName(subscription.getName()) .setFeatureSetVersion(subscription.getVersion()) .build()) .getFeatureSetsList()); @@ -130,7 +130,7 @@ public void applyFeatureSet( if (!featureSetSpecs.isEmpty() && featureSetSpecs.contains(response.getFeatureSet())) { // We use the request featureSet source because it contains the information // about whether to default to the default feature stream or not - SourceProto.Source source = request.getFeatureSet().getSource(); + SourceProto.Source source = response.getFeatureSet().getSource(); jobCoordinatorService .startOrUpdateJob(Lists.newArrayList(featureSetSpecs), source, store); } @@ -165,6 +165,9 @@ public void updateStore(UpdateStoreRequest request, .getFeatureSetsList() ); } + if (featureSetSpecs.size() == 0) { + return; + } featureSetSpecs.stream() .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) .entrySet() diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index 621874041f4..e7d4b70a90b 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -27,6 +27,7 @@ import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; +import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; @@ -36,6 +37,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.PipelineResult; @@ -111,12 +113,29 @@ private ImportOptions getPipelineOptions(List featureSetSpecs, } /** - * Unsupported. + * Stops an existing job and restarts a new job in its place as a proxy for job updates. + * Note that since we do not maintain a consumer group across the two jobs and the old job + * is not drained, some data may be lost. + * + * As a rule of thumb, direct jobs in feast should only be used for testing. + * + * @param jobInfo jobInfo of target job to change + * @return jobId of the job */ @Override public String updateJob(JobInfo jobInfo) { - throw new UnsupportedOperationException( - "DirectRunner does not support job updates. To make changes to the worker, stop the existing job and rerun ingestion."); + String jobId = jobInfo.getExtId(); + abortJob(jobId); + try { + List featureSetSpecs = new ArrayList<>(); + for (FeatureSet featureSet : jobInfo.getFeatureSets()) { + featureSetSpecs.add(featureSet.toProto()); + } + startJob(jobId, featureSetSpecs, jobInfo.getStore().toProto()); + } catch (JobExecutionException | InvalidProtocolBufferException e) { + throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); + } + return jobId; } /** diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index 0d559078503..0f545e8cf20 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -38,13 +38,13 @@ public class Source { private String topics; @Column(name = "use_default") - private boolean useDefault; + private boolean isDefault; public Source() { super(); } - public Source(SourceType type, KafkaSourceConfig config) { + public Source(SourceType type, KafkaSourceConfig config, boolean isDefault) { if (config.getBootstrapServers().isEmpty() || config.getTopic().isEmpty()) { throw Status.INVALID_ARGUMENT.withDescription( "Unsupported source options. Kafka source requires bootstrap servers and topic to be specified.") @@ -53,13 +53,7 @@ public Source(SourceType type, KafkaSourceConfig config) { this.type = type.toString(); this.bootstrapServers = config.getBootstrapServers(); this.topics = config.getTopic(); - this.useDefault = false; - this.id = generateId(); - } - - public Source(SourceType type) { - this.type = type.toString(); - this.useDefault = true; + this.isDefault = isDefault; this.id = generateId(); } @@ -71,14 +65,12 @@ public Source(SourceType type) { */ public static Source fromProto(SourceProto.Source sourceProto) { if (sourceProto.equals(SourceProto.Source.getDefaultInstance())) { - Source source = new Source(SourceType.UNRECOGNIZED); - source.setUseDefault(true); - return source; + return new Source(); } switch (sourceProto.getType()) { case KAFKA: - return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig()); + return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig(), false); case UNRECOGNIZED: default: throw Status.INVALID_ARGUMENT @@ -149,12 +141,12 @@ public SourceType getType() { * * @return boolean indicating whether this feature set source uses defaults. */ - public boolean isUseDefault() { - return useDefault; + public boolean isDefault() { + return isDefault; } /** - * Override equality for sources. useDefault is always compared first; if both sources are using + * Override equality for sources. isDefault is always compared first; if both sources are using * the default feature source, they will be equal. If not they will be compared based on their * type-specific options. * @@ -162,7 +154,7 @@ public boolean isUseDefault() { * @return boolean equal */ public boolean equalTo(Source other) { - if (other.useDefault && useDefault) { + if (other.isDefault && isDefault) { return true; } @@ -181,9 +173,6 @@ public boolean equalTo(Source other) { } private String generateId() { - if (useDefault) { - return "DEFAULT"; - } switch (SourceType.valueOf(type)) { case KAFKA: return String.format("KAFKA/%s/%s", bootstrapServers, topics); diff --git a/core/src/main/java/feast/core/service/FeatureStreamService.java b/core/src/main/java/feast/core/service/FeatureStreamService.java deleted file mode 100644 index 2532de13e3a..00000000000 --- a/core/src/main/java/feast/core/service/FeatureStreamService.java +++ /dev/null @@ -1,53 +0,0 @@ -package feast.core.service; - -import feast.core.SourceProto.SourceType; -import feast.core.config.FeastProperties; -import feast.core.model.FeatureSet; -import feast.core.model.Source; -import feast.core.stream.FeatureStream; -import feast.core.stream.kafka.KafkaFeatureStream; -import feast.core.stream.kafka.KafkaFeatureStreamConfig; -import java.util.Map; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - -/** - * Facilitates management of the feature stream. - */ -@Slf4j -@Service -public class FeatureStreamService { - private final SourceType defaultStreamType; - private final Map defaultOptions; - - @Autowired - public FeatureStreamService(FeastProperties feastProperties) { - this.defaultOptions = feastProperties.getStream().getOptions(); - this.defaultStreamType = SourceType.valueOf(feastProperties.getStream().getType().toUpperCase()); - } - - /** - * Provisions a topic given the featureSet. If the topic already exists, and was not created by - * feast, an error will be thrown. - * - * @param featureSet featureSet to create the topic for - * @return Source updated with provisioned feature source - */ - public Source setUpSource(FeatureSet featureSet) { - if (featureSet.getSource().isUseDefault()){ - featureSet.getSource().setType(defaultStreamType.toString()); - } - - switch (featureSet.getSource().getType()) { - case KAFKA: - FeatureStream featureStream = new KafkaFeatureStream(KafkaFeatureStreamConfig.fromMap(defaultOptions)); - return featureStream.provision(featureSet); - case UNRECOGNIZED: - default: - throw new IllegalArgumentException( - String.format("Invalid source %s provided, only source of type [KAFKA] allowed", - featureSet.getSource().getType())); - } - } -} diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 116f75eea4e..2cd5cd6778a 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -66,7 +66,7 @@ public JobInfo startOrUpdateJob(List featureSetSpecs, return updateJob(job.get(), featureSetSpecs, store); } } else { - return startJob(createJobId(source.getType().toString().toLowerCase(), store.getName()), + return startJob(createJobId(source.getId(), store.getName()), featureSetSpecs, sourceSpec, store); } } diff --git a/core/src/main/java/feast/core/service/SpecService.java b/core/src/main/java/feast/core/service/SpecService.java index ea83eee262b..b7862c2dd91 100644 --- a/core/src/main/java/feast/core/service/SpecService.java +++ b/core/src/main/java/feast/core/service/SpecService.java @@ -29,6 +29,7 @@ import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.dao.FeatureSetRepository; import feast.core.dao.StoreRepository; @@ -56,8 +57,7 @@ public class SpecService { private final FeatureSetRepository featureSetRepository; private final StoreRepository storeRepository; - private final FeatureStreamService featureStreamService; - private final JobCoordinatorService jobCoordinatorService; + private final Source defaultSource; private final Pattern versionPattern = Pattern .compile("^(?[\\>\\<\\=]{0,2})(?\\d*)$"); @@ -66,12 +66,10 @@ public class SpecService { public SpecService( FeatureSetRepository featureSetRepository, StoreRepository storeRepository, - FeatureStreamService featureStreamService, - JobCoordinatorService jobCoordinatorService) { + Source defaultSource) { this.featureSetRepository = featureSetRepository; this.storeRepository = storeRepository; - this.featureStreamService = featureStreamService; - this.jobCoordinatorService = jobCoordinatorService; + this.defaultSource = defaultSource; } /** @@ -181,9 +179,9 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) .build(); } FeatureSet featureSet = FeatureSet.fromProto(newFeatureSetSpec); - Source updatedSource = featureStreamService.setUpSource(featureSet); - featureSet.setSource(updatedSource); - + if (newFeatureSetSpec.getSource() == SourceProto.Source.getDefaultInstance()) { + featureSet.setSource(defaultSource); + } featureSetRepository.saveAndFlush(featureSet); return ApplyFeatureSetResponse.newBuilder() diff --git a/core/src/main/java/feast/core/stream/FeatureStream.java b/core/src/main/java/feast/core/stream/FeatureStream.java deleted file mode 100644 index f6881998d9f..00000000000 --- a/core/src/main/java/feast/core/stream/FeatureStream.java +++ /dev/null @@ -1,21 +0,0 @@ -package feast.core.stream; - -import feast.core.SourceProto.SourceType; -import feast.core.model.FeatureSet; -import feast.core.model.Source; - -public interface FeatureStream { - - /** - * Gets the type of feature stream - * @return type of feature stream - */ - SourceType getType(); - - /** - * Provisions a sink for the feature producer to write to. For the given topic name. - * - * - */ - Source provision(FeatureSet featureSet) throws RuntimeException; -} diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java deleted file mode 100644 index 487e15e7507..00000000000 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java +++ /dev/null @@ -1,81 +0,0 @@ -package feast.core.stream.kafka; - -import com.google.common.base.Strings; -import com.google.protobuf.InvalidProtocolBufferException; -import feast.core.SourceProto.KafkaSourceConfig; -import feast.core.SourceProto.SourceType; -import feast.core.model.FeatureSet; -import feast.core.model.Source; -import feast.core.stream.FeatureStream; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ExecutionException; -import lombok.AllArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.apache.kafka.clients.admin.AdminClient; -import org.apache.kafka.clients.admin.AdminClientConfig; -import org.apache.kafka.clients.admin.CreateTopicsResult; -import org.apache.kafka.clients.admin.NewTopic; -import org.apache.kafka.common.errors.TopicExistsException; - -@Slf4j -@AllArgsConstructor -public class KafkaFeatureStream implements FeatureStream { - - private static SourceType FEATURE_STREAM_TYPE = SourceType.KAFKA; - - private KafkaFeatureStreamConfig defaultConfig; - - @Override - public SourceType getType() { - return FEATURE_STREAM_TYPE; - } - - @Override - public Source provision(FeatureSet featureSet) throws RuntimeException { - - Source source = featureSet.getSource(); - KafkaSourceConfig config = KafkaSourceConfig.getDefaultInstance(); - String bootstrapServers = defaultConfig.getBootstrapServers(); - String topicName = defaultConfig.getTopic(); - if (!source.isUseDefault()) { - try { - config = (KafkaSourceConfig) source.getOptions(); - topicName = config.getTopic(); - bootstrapServers = config.getBootstrapServers(); - } catch (NullPointerException e) { - throw new RuntimeException(String - .format("Unable to retrieve bootstrap servers for featureSet %s", featureSet.getName()), - e); - } - } - - Map map = new HashMap<>(); - map.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); - map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "1000"); - AdminClient client = AdminClient.create(map); - - NewTopic newTopic = new NewTopic(topicName, - defaultConfig.getTopicNumPartitions(), - defaultConfig.getTopicReplicationFactor()); - CreateTopicsResult createTopicsResult = client.createTopics(Collections.singleton(newTopic)); - try { - createTopicsResult.values().get(topicName).get(); - } catch (InterruptedException | ExecutionException e) { - if (e.getCause().getClass().equals(TopicExistsException.class)) { - log.warn(Strings - .lenientFormat( - "Unable to create topic %s in the feature stream, topic already exists, using existing topic.", - topicName)); - } else { - throw new RuntimeException(e.getMessage(), e); - } - } - - assert config != null; - source.setTopics(topicName); - source.setBootstrapServers(bootstrapServers); - return source; - } -} diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java deleted file mode 100644 index eb9177bbec9..00000000000 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java +++ /dev/null @@ -1,54 +0,0 @@ -package feast.core.stream.kafka; - -import feast.core.util.TypeConversion; -import java.util.Map; -import javax.naming.ConfigurationException; -import lombok.Value; - -@Value -public class KafkaFeatureStreamConfig { - - private static String NUM_PARTITIONS_DEFAULT = "1"; - private static String REPLICATION_FACTOR_DEFAULT = "1"; - - /** - * Feast stream kafka bootstrap servers - */ - String bootstrapServers; - - /** - * Feast stream topic name - */ - String topic; - - /** - * Number of partitions per topic - */ - int topicNumPartitions; - - /** - * Replication factor of each topic created - */ - short topicReplicationFactor; - - /** - * Constructor from a map - * - * @param optionMap map containing the kafka feature stream configuration in key:value - * format. The options should contain: - * 1. bootstrapServers: optional, default kafka bootstrap servers, defaults to "KAFKA:9092"
- * 2. topicPrefix: optional, topic prefix, defaults to "feast"
- * 3. partitions: optional, number of partitions per topic created, defaults to 10
- * 4. replicationFactor: optional, replication factor of topics created, defaults to 2
- * - * @return KafkaFeatureStreamConfig object - */ - public static KafkaFeatureStreamConfig fromMap(Map optionMap) { - String bootstrapServers = optionMap.getOrDefault("bootstrapServers", "KAFKA:9092"); - String topicPrefix = optionMap.getOrDefault("topic", "feast-features"); - int numPartitions = Integer.parseInt(optionMap.getOrDefault("partitions", NUM_PARTITIONS_DEFAULT)); - short replicationFactor = Short.parseShort(optionMap.getOrDefault("replicationFactor", REPLICATION_FACTOR_DEFAULT)); - - return new KafkaFeatureStreamConfig(bootstrapServers, topicPrefix, numPartitions, replicationFactor); - } -} diff --git a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java index 1f4ffcf9828..631e5a257a9 100644 --- a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java +++ b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java @@ -26,6 +26,8 @@ import com.google.common.collect.Lists; import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; import feast.core.dao.JobInfoRepository; import feast.core.dao.MetricsRepository; import feast.core.model.JobInfo; @@ -48,9 +50,11 @@ public class ScheduledJobMonitorTest { ScheduledJobMonitor scheduledJobMonitor; - @Mock JobMonitor jobMonitor; + @Mock + JobMonitor jobMonitor; - @Mock JobInfoRepository jobInfoRepository; + @Mock + JobInfoRepository jobInfoRepository; @Before public void setUp() { @@ -60,12 +64,15 @@ public void setUp() { @Test public void getJobStatus_shouldUpdateJobInfoForRunningJob() { + Source source = new Source(SourceType.KAFKA, + KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") + .setTopic("feast-topic").build(), true); JobInfo job = new JobInfo( "jobId", "extId1", "DataflowRunner", - Source.fromProto(SourceProto.Source.getDefaultInstance()), + source, new Store(), Collections.emptyList(), Collections.emptyList(), diff --git a/core/src/test/java/feast/core/service/FeatureStreamServiceTest.java b/core/src/test/java/feast/core/service/FeatureStreamServiceTest.java deleted file mode 100644 index 2603000fff6..00000000000 --- a/core/src/test/java/feast/core/service/FeatureStreamServiceTest.java +++ /dev/null @@ -1,90 +0,0 @@ -package feast.core.service; - -import static org.mockito.MockitoAnnotations.initMocks; - -import feast.core.stream.FeatureStream; -import org.junit.Before; -import org.junit.Rule; -import org.junit.rules.ExpectedException; -import org.mockito.Mock; - -public class FeatureStreamServiceTest { - - @Mock - private FeatureStream featureStream; - - @Rule - public final ExpectedException expectedException = ExpectedException.none(); - - @Before - public void setUp() { - initMocks(this); - } - -// @Test -// public void shouldProvisionTopicGivenFeatureSet() { -// String topicName = "feast-featureSet-topic"; -// FeatureSet featureSet = new FeatureSet(); -// featureSet.setName("featureSet"); -// when(featureStream.generateTopicName("featureSet")).thenReturn(topicName); -// -// FeatureStreamService featureStreamService = new FeatureStreamService( -// featureStreamTopicRepository, featureStream); -// FeatureStreamTopic actual = featureStreamService.provisionTopic(featureSet); -// -// FeatureStreamTopic expectedTopic = new FeatureStreamTopic(topicName, -// Lists.newArrayList(featureSet)); -// verify(featureStream, times(1)).provisionTopic("feast-featureSet-topic"); -// verify(featureStreamTopicRepository, times(1)).saveAndFlush(expectedTopic); -// assertThat(actual, equalTo(expectedTopic)); -// } -// -// @Test -// public void shouldUpdateRecordIfSelfCreatedTopicExistsGivenFeatureSet() { -// String topicName = "feast-featureSet-topic"; -// FeatureSet oldFeatureSet = new FeatureSet(); -// oldFeatureSet.setName("featureSet"); -// oldFeatureSet.setVersion(1); -// -// FeatureSet newFeatureSet = new FeatureSet(); -// newFeatureSet.setName("featureSet"); -// oldFeatureSet.setVersion(2); -// -// FeatureStreamTopic originalTopic = new FeatureStreamTopic(topicName, -// Lists.newArrayList(oldFeatureSet)); -// -// when(featureStream.generateTopicName("featureSet")).thenReturn(topicName); -// doThrow(new TopicExistsException()).when(featureStream).provisionTopic(topicName); -// when(featureStreamTopicRepository.findById(topicName)).thenReturn(Optional.of(originalTopic)); -// FeatureStreamService featureStreamService = new FeatureStreamService( -// featureStreamTopicRepository, featureStream); -// -// FeatureStreamTopic expectedTopic = new FeatureStreamTopic(topicName, -// Lists.newArrayList(oldFeatureSet, newFeatureSet)); -// -// FeatureStreamTopic actual = featureStreamService.provisionTopic(newFeatureSet); -// verify(featureStreamTopicRepository, times(1)).saveAndFlush(expectedTopic); -// -// assertThat(actual, equalTo(expectedTopic)); -// } -// -// @Test -// public void shouldThrowErrorIfTopicExistsGivenFeatureSet() { -// String topicName = "feast-featureSet-topic"; -// -// FeatureSet featureSet = new FeatureSet(); -// featureSet.setName("featureSet"); -// -// when(featureStream.generateTopicName("featureSet")).thenReturn(topicName); -// doThrow(new TopicExistsException()).when(featureStream).provisionTopic(topicName); -// when(featureStreamTopicRepository.findById(topicName)).thenReturn(Optional.empty()); -// -// FeatureStreamService featureStreamService = new FeatureStreamService( -// featureStreamTopicRepository, featureStream); -// -// expectedException.expect(TopicExistsException.class); -// featureStreamService.provisionTopic(featureSet); -// } - - -} \ No newline at end of file diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 528b2f7d6d4..47d3e8486da 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -9,6 +9,8 @@ import com.google.common.collect.Lists; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; @@ -37,7 +39,7 @@ public class JobCoordinatorServiceTest { private JobCoordinatorService jobCoordinatorService; private JobInfo existingJob; - + private Source defaultSource; @Before public void setUp() { @@ -48,15 +50,19 @@ public void setUp() { .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build()); + defaultSource = new Source(SourceType.KAFKA, + KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092").setTopic("feast-topic") + .build(), true); FeatureSet featureSet1 = new FeatureSet(); featureSet1.setId("featureSet1:1"); + featureSet1.setSource(defaultSource); FeatureSet featureSet2 = new FeatureSet(); featureSet2.setId("featureSet2:1"); - Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); - existingJob = new JobInfo("extid", "name", "DirectRunner", source, store, + featureSet2.setSource(defaultSource); + existingJob = new JobInfo("extid", "name", "DirectRunner", defaultSource, store, Lists.newArrayList(featureSet1, featureSet2), Lists.newArrayList(), JobStatus.RUNNING); - when(jobInfoRepository.findBySourceIdAndStoreName("DEFAULT", "SERVING")) + when(jobInfoRepository.findBySourceIdAndStoreName(defaultSource.getId(), "SERVING")) .thenReturn(Lists.newArrayList(existingJob)); jobCoordinatorService = new JobCoordinatorService(jobInfoRepository, jobManager); @@ -68,10 +74,12 @@ public void shouldNotStartOrUpdateJobIfNoChanges() { FeatureSetSpec featureSet1 = FeatureSetSpec.newBuilder() .setName("featureSet1") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); FeatureSetSpec featureSet2 = FeatureSetSpec.newBuilder() .setName("featureSet2") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -80,7 +88,7 @@ public void shouldNotStartOrUpdateJobIfNoChanges() { .build(); JobInfo jobInfo = jobCoordinatorService .startOrUpdateJob(Lists.newArrayList(featureSet1, featureSet2), - SourceProto.Source.getDefaultInstance(), store); + defaultSource.toProto(), store); assertThat(jobInfo, equalTo(existingJob)); } @@ -89,6 +97,7 @@ public void shouldStartJobIfNotExists() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -104,12 +113,12 @@ public void shouldStartJobIfNotExists() { when(jobManager.getRunnerType()).thenReturn(Runner.DIRECT); FeatureSet expectedFeatureSet = new FeatureSet(); expectedFeatureSet.setId("featureSet:1"); - Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); JobInfo expectedJobInfo = new JobInfo(jobId, extJobId, "DirectRunner", - source, Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), JobStatus.RUNNING); + defaultSource, Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), + JobStatus.RUNNING); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + .startOrUpdateJob(Lists.newArrayList(featureSet), defaultSource.toProto(), store); assertThat(jobInfo, equalTo(expectedJobInfo)); } @@ -119,6 +128,7 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet1") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -126,16 +136,15 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build(); String extId = "extId123"; - Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); JobInfo modifiedJob = new JobInfo(existingJob.getId(), existingJob.getExtId(), - existingJob.getRunner(), source, Store.fromProto(store), + existingJob.getRunner(), defaultSource, Store.fromProto(store), Lists.newArrayList(FeatureSet.fromProto(featureSet)), JobStatus.RUNNING); when(jobManager.updateJob(modifiedJob)).thenReturn(extId); JobInfo expectedJobInfo = modifiedJob; expectedJobInfo.setExtId(extId); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + .startOrUpdateJob(Lists.newArrayList(featureSet), defaultSource.toProto(), store); assertThat(jobInfo, equalTo(expectedJobInfo)); } diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index fde90f2d422..63f15c8bd28 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -72,26 +72,26 @@ public class SpecServiceTest { @Mock private StoreRepository storeRepository; - @Mock - private FeatureStreamService featureStreamService; - - @Mock - private JobCoordinatorService jobCoordinatorService; - @Rule public final ExpectedException expectedException = ExpectedException.none(); private SpecService specService; private List featureSets; private List stores; + private Source defaultSource; @Before public void setUp() { initMocks(this); + defaultSource = new Source(SourceType.KAFKA, + KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092").setTopic("my-topic") + .build(), true); + FeatureSet featureSet1v1 = newDummyFeatureSet("f1", 1); FeatureSet featureSet1v2 = newDummyFeatureSet("f1", 2); FeatureSet featureSet1v3 = newDummyFeatureSet("f1", 3); FeatureSet featureSet2v1 = newDummyFeatureSet("f2", 1); + featureSets = Arrays.asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1); when(featureSetRepository.findAll()) .thenReturn(featureSets); @@ -111,8 +111,9 @@ public void setUp() { when(storeRepository.findById("SERVING")).thenReturn(Optional.of(store1)); when(storeRepository.findById("NOTFOUND")).thenReturn(Optional.empty()); - specService = new SpecService(featureSetRepository, storeRepository, featureStreamService, - jobCoordinatorService); + + + specService = new SpecService(featureSetRepository, storeRepository, defaultSource); } @Test @@ -205,7 +206,7 @@ public void shouldGetLatestFeatureSetGivenLatestVersionFilter() GetFeatureSetsResponse actual = specService .getFeatureSets( Filter.newBuilder().setFeatureSetName("f1").setFeatureSetVersion("latest").build()); - List expectedFeatureSets = featureSets.subList(2,3); + List expectedFeatureSets = featureSets.subList(2, 3); List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { FeatureSetSpec toProto = expectedFeatureSet.toProto(); @@ -280,11 +281,6 @@ public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHa public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() throws InvalidProtocolBufferException { when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); - Source updatedSource = new Source(SourceType.KAFKA, - KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f2-features").build()); - when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) - .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = newDummyFeatureSet("f2", 1) .toProto() .toBuilder() @@ -295,7 +291,7 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); FeatureSetSpec expected = incomingFeatureSet.toBuilder() .setVersion(1) - .setSource(updatedSource.toProto()) + .setSource(defaultSource.toProto()) .build(); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); assertThat(applyFeatureSetResponse.getFeatureSet(), equalTo(expected)); @@ -304,18 +300,13 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() @Test public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() throws InvalidProtocolBufferException { - Source updatedSource = new Source(SourceType.KAFKA, - KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f1-features").build()); - when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) - .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = featureSets.get(2).toProto().toBuilder() .clearVersion() .addFeatures(FeatureSpec.newBuilder().setName("feature2").setValueType(Enum.STRING)) .build(); FeatureSetSpec expected = incomingFeatureSet.toBuilder() .setVersion(4) - .setSource(updatedSource.toProto()) + .setSource(defaultSource.toProto()) .build(); ApplyFeatureSetResponse applyFeatureSetResponse = specService .applyFeatureSet(incomingFeatureSet); @@ -359,15 +350,10 @@ public void shouldDoNothingIfNoChange() throws InvalidProtocolBufferException { } private FeatureSet newDummyFeatureSet(String name, int version) { - KafkaSourceConfig kafkaFeatureSourceOptions = - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .build(); Field feature = new Field(name, "feature", Enum.INT64); Field entity = new Field(name, "entity", Enum.STRING); return new FeatureSet(name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), - new Source( - SourceType.KAFKA, kafkaFeatureSourceOptions)); + defaultSource); } private Store newDummyStore(String name) { diff --git a/core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java b/core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java deleted file mode 100644 index a75f3848878..00000000000 --- a/core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java +++ /dev/null @@ -1,61 +0,0 @@ -package feast.core.stream.kafka; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.core.IsEqual.equalTo; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import feast.core.exception.TopicExistsException; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ExecutionException; -import org.apache.kafka.clients.admin.AdminClient; -import org.apache.kafka.clients.admin.CreateTopicsResult; -import org.apache.kafka.common.KafkaFuture; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.mockito.ArgumentMatchers; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; - -public class KafkaFeatureStreamTest { - -// @Rule -// public ExpectedException expectedException = ExpectedException.none(); -// -// @Mock -// private AdminClient kafkaAdminClient; -// @Mock -// private CreateTopicsResult createTopicsResult; -// -// private KafkaFeatureStream kafkaFeatureStream; -// -// @Before -// public void setUp() { -// MockitoAnnotations.initMocks(this); -// KafkaFeatureStreamConfig config = new KafkaFeatureStreamConfig("localhost:8121", "feast", 1, -// (short) 2); -// kafkaFeatureStream = new KafkaFeatureStream(kafkaAdminClient, config); -// } -// -// @Test -// public void shouldThrowTopicExistsExceptionIfTopicExists() -// throws ExecutionException, InterruptedException { -// -// KafkaFuture result = mock(KafkaFuture.class); -// -// when(result.get()).thenThrow(new org.apache.kafka.common.errors.TopicExistsException("")); -// Map> resultMap = new HashMap<>(); -// resultMap.put("my-topic", result); -// when(createTopicsResult.values()).thenReturn(resultMap); -// doReturn(createTopicsResult).when(kafkaAdminClient) -// .createTopics(ArgumentMatchers.anyCollection()); -// -// expectedException.expect(TopicExistsException.class); -// kafkaFeatureStream.provisionTopic("my-topic"); -// } -} - From ad8cf65495c21c789a66088a53e0a982e7784669 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Wed, 30 Oct 2019 15:33:08 +0800 Subject: [PATCH 04/12] Update end-to-end test config --- .prow/scripts/test-end-to-end.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index 89ddd826e33..2a4cb9e710c 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -97,6 +97,7 @@ feast: stream: type: kafka options: + topic: feast-features bootstrapServers: localhost:9092 replicationFactor: 1 partitions: 1 From 06754a4b506d27093f11a12f65c60b62c8e00525 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Wed, 30 Oct 2019 15:41:15 +0800 Subject: [PATCH 05/12] Add DefaultSource bean --- .../core/config/FeatureStreamConfig.java | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 core/src/main/java/feast/core/config/FeatureStreamConfig.java diff --git a/core/src/main/java/feast/core/config/FeatureStreamConfig.java b/core/src/main/java/feast/core/config/FeatureStreamConfig.java new file mode 100644 index 00000000000..40344681722 --- /dev/null +++ b/core/src/main/java/feast/core/config/FeatureStreamConfig.java @@ -0,0 +1,65 @@ +package feast.core.config; + +import com.google.common.base.Strings; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; +import feast.core.config.FeastProperties.StreamProperties; +import feast.core.model.Source; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import lombok.extern.slf4j.Slf4j; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.admin.CreateTopicsResult; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.common.errors.TopicExistsException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Slf4j +@Configuration +public class FeatureStreamConfig { + + @Autowired + @Bean + public Source getDefaultSource(FeastProperties feastProperties) { + StreamProperties streamProperties = feastProperties.getStream(); + SourceType featureStreamType = SourceType + .valueOf(streamProperties.getType().toUpperCase()); + switch (featureStreamType) { + case KAFKA: + String bootstrapServers = streamProperties.getOptions().get("bootstrapServers"); + String topicName = streamProperties.getOptions().get("topic"); + Map map = new HashMap<>(); + map.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "1000"); + AdminClient client = AdminClient.create(map); + + NewTopic newTopic = new NewTopic(topicName, + Integer.valueOf(streamProperties.getOptions().getOrDefault("numPartitions", "1")), + Short.valueOf(streamProperties.getOptions().getOrDefault("replicationFactor", "1"))); + CreateTopicsResult createTopicsResult = client + .createTopics(Collections.singleton(newTopic)); + try { + createTopicsResult.values().get(topicName).get(); + } catch (InterruptedException | ExecutionException e) { + if (e.getCause().getClass().equals(TopicExistsException.class)) { + log.warn(Strings + .lenientFormat( + "Unable to create topic %s in the feature stream, topic already exists, using existing topic.", + topicName)); + } else { + throw new RuntimeException(e.getMessage(), e); + } + } + KafkaSourceConfig sourceConfig = KafkaSourceConfig.newBuilder() + .setBootstrapServers(bootstrapServers).setTopic(topicName).build(); + return new Source(featureStreamType, sourceConfig, true); + default: + throw new RuntimeException("Unsupported source stream, only [KAFKA] is supported"); + } + } +} From 4393231a6f687e6ffd90acd089440a8e7e65eb35 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 29 Oct 2019 15:54:33 +0800 Subject: [PATCH 06/12] Map jobs source::sink instead of fs::sink. Allow users to specify topics. Provision single topic by default. --- .../feast/core/dao/JobInfoRepository.java | 2 +- .../java/feast/core/grpc/CoreServiceImpl.java | 18 ++- .../main/java/feast/core/model/JobInfo.java | 14 ++- .../main/java/feast/core/model/Source.java | 106 ++++++++++------- .../core/service/JobCoordinatorService.java | 45 +++++--- .../core/stream/kafka/KafkaFeatureStream.java | 15 +-- .../kafka/KafkaFeatureStreamConfig.java | 6 +- core/src/main/resources/application.yml | 1 + .../core/job/ScheduledJobMonitorTest.java | 4 +- .../direct/DirectRunnerJobManagerTest.java | 1 + .../service/JobCoordinatorServiceTest.java | 29 +++-- .../feast/core/service/SpecServiceTest.java | 6 +- .../src/main/java/feast/options/Options.java | 27 ----- .../java/feast/options/OptionsParser.java | 108 ------------------ .../main/java/feast/options/Validation.java | 75 ------------ 15 files changed, 153 insertions(+), 304 deletions(-) delete mode 100644 ingestion/src/main/java/feast/options/Options.java delete mode 100644 ingestion/src/main/java/feast/options/OptionsParser.java delete mode 100644 ingestion/src/main/java/feast/options/Validation.java diff --git a/core/src/main/java/feast/core/dao/JobInfoRepository.java b/core/src/main/java/feast/core/dao/JobInfoRepository.java index ed457ef9f0b..06381aa5577 100644 --- a/core/src/main/java/feast/core/dao/JobInfoRepository.java +++ b/core/src/main/java/feast/core/dao/JobInfoRepository.java @@ -28,5 +28,5 @@ @Repository public interface JobInfoRepository extends JpaRepository { List findByStatusNotIn(Collection statuses); - List findByFeatureSetsNameAndStoreName(String featureSetsName, String storeName); + List findBySourceIdAndStoreName(String sourceId, String storeName); } \ No newline at end of file diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index 7ac1657a627..be4baa4278d 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -17,6 +17,7 @@ package feast.core.grpc; +import com.google.common.collect.Lists; import com.google.protobuf.InvalidProtocolBufferException; import feast.core.CoreServiceGrpc.CoreServiceImplBase; import feast.core.CoreServiceProto.ApplyFeatureSetRequest; @@ -32,9 +33,11 @@ import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.CoreServiceProto.UpdateStoreResponse.Status; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.Subscription; import feast.core.exception.RetrievalException; +import feast.core.model.Source; import feast.core.service.JobCoordinatorService; import feast.core.service.SpecService; import io.grpc.stub.StreamObserver; @@ -113,7 +116,7 @@ public void applyFeatureSet( return p.matcher(featureSetName).matches(); }) .collect(Collectors.toList()); - List featureSetSpecs = new ArrayList<>(); + Set featureSetSpecs = new HashSet<>(); for (Subscription subscription : relevantSubscriptions) { featureSetSpecs.addAll( specService @@ -124,8 +127,12 @@ public void applyFeatureSet( .build()) .getFeatureSetsList()); } - if (!featureSetSpecs.isEmpty()) { - jobCoordinatorService.startOrUpdateJob(featureSetSpecs, store); + if (!featureSetSpecs.isEmpty() && featureSetSpecs.contains(response.getFeatureSet())) { + // We use the request featureSet source because it contains the information + // about whether to default to the default feature stream or not + SourceProto.Source source = request.getFeatureSet().getSource(); + jobCoordinatorService + .startOrUpdateJob(Lists.newArrayList(featureSetSpecs), source, store); } } responseObserver.onNext(response); @@ -159,10 +166,11 @@ public void updateStore(UpdateStoreRequest request, ); } featureSetSpecs.stream() - .collect(Collectors.groupingBy(FeatureSetSpec::getName)) + .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) .entrySet() .stream() - .forEach(kv -> jobCoordinatorService.startOrUpdateJob(kv.getValue(), store)); + .forEach( + kv -> jobCoordinatorService.startOrUpdateJob(kv.getValue(), kv.getKey(), store)); } } catch (Exception e) { log.error("Exception has occurred in UpdateStore method: ", e); diff --git a/core/src/main/java/feast/core/model/JobInfo.java b/core/src/main/java/feast/core/model/JobInfo.java index fd523b4e4c7..4a64aefbc5c 100644 --- a/core/src/main/java/feast/core/model/JobInfo.java +++ b/core/src/main/java/feast/core/model/JobInfo.java @@ -55,19 +55,21 @@ public class JobInfo extends AbstractTimestampEntity { @Column(name = "ext_id") private String extId; - // Import job source type - @Column(name = "type") - private String type; - // Runner type @Column(name = "runner") private String runner; + // Source id + @ManyToOne + @JoinColumn(name = "source_id") + private Source source; + // Sink id @ManyToOne @JoinColumn(name = "store_name") private Store store; + // FeatureSets populated by the job @ManyToMany @JoinTable( @@ -87,11 +89,11 @@ public JobInfo() { super(); } - public JobInfo(String id, String extId, SourceType type, String runner, Store sink, + public JobInfo(String id, String extId, String runner, Source source, Store sink, List featureSets, JobStatus jobStatus) { this.id = id; this.extId = extId; - this.type = type.toString(); + this.source = source; this.runner = runner; this.store = sink; this.featureSets = featureSets; diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index f1997801597..c63bc413876 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -1,7 +1,6 @@ package feast.core.model; import com.google.common.collect.Sets; -import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.Message; import feast.core.SourceProto; import feast.core.SourceProto.KafkaSourceConfig; @@ -10,10 +9,7 @@ import java.util.Set; import javax.persistence.Column; import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; import javax.persistence.Id; -import javax.persistence.Lob; import javax.persistence.Table; import lombok.Setter; @@ -25,18 +21,20 @@ public class Source { private static final Set KAFKA_OPTIONS = Sets.newHashSet("bootstrapServers"); @Id - @GeneratedValue(strategy = GenerationType.AUTO) @Column(name = "id", updatable = false, nullable = false) - private Long id; + private String id; // Type of the source. Should map to feast.types.Source.SourceType @Column(name = "type", nullable = false) private String type; - // Options for this source - @Column(name = "options") - @Lob - private byte[] options; + // Bootstrap servers, comma delimited. Used by kafka sources. + @Column(name = "bootstrap_servers") + private String bootstrapServers; + + // Topics to listen to, comma delimited. Used by kafka sources. + @Column(name = "topics") + private String topics; @Column(name = "use_default") private boolean useDefault; @@ -45,37 +43,50 @@ public Source() { super(); } - public Source(SourceType type, byte[] options) { + public Source(SourceType type, KafkaSourceConfig config, boolean isUseDefault) { this.type = type.toString(); - this.options = options; + this.bootstrapServers = config.getBootstrapServers(); + this.topics = config.getTopic(); + this.useDefault = isUseDefault; + this.id = generateId(); } + /** + * Construct a source facade object from a given proto object. + * + * @param sourceProto SourceProto.Source object + * @return Source facade object + */ public static Source fromProto(SourceProto.Source sourceProto) { if (sourceProto.equals(SourceProto.Source.getDefaultInstance())) { Source source = new Source(SourceType.UNRECOGNIZED, - KafkaSourceConfig.getDefaultInstance().toByteArray()); + KafkaSourceConfig.getDefaultInstance(), true); source.setUseDefault(true); return source; } - byte[] options; switch (sourceProto.getType()) { case KAFKA: - options = sourceProto.getKafkaSourceConfig().toByteArray(); - break; + return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig(), false); case UNRECOGNIZED: default: throw new IllegalArgumentException("Unsupported source type. Only [KAFKA] is supported."); } - return new Source(sourceProto.getType(), options); } - public SourceProto.Source toProto() throws InvalidProtocolBufferException { + /** + * Convert this object to its equivalent proto object. + * + * @return SourceProto.Source + */ + public SourceProto.Source toProto() { Builder builder = SourceProto.Source.newBuilder() .setType(SourceType.valueOf(type)); switch (SourceType.valueOf(type)) { case KAFKA: - KafkaSourceConfig config = KafkaSourceConfig.parseFrom(options); + KafkaSourceConfig config = KafkaSourceConfig.newBuilder() + .setBootstrapServers(bootstrapServers) + .setTopic(topics).build(); return builder.setKafkaSourceConfig(config).build(); case UNRECOGNIZED: default: @@ -83,17 +94,28 @@ public SourceProto.Source toProto() throws InvalidProtocolBufferException { } } + /** + * Get the id for this feature source + * + * @return feature source id in the format TYPE/options + */ + public String getId() { + return id; + } + /** * Get the options for this feature source * * @return feature source options */ - public Message getOptions() - throws InvalidProtocolBufferException { + public Message getOptions() { switch (SourceType.valueOf(type)) { case KAFKA: - KafkaSourceConfig config = KafkaSourceConfig.parseFrom(options); - return config; + return KafkaSourceConfig + .newBuilder() + .setBootstrapServers(bootstrapServers) + .setTopic(topics) + .build(); case UNRECOGNIZED: default: throw new RuntimeException("Unable to convert source to proto"); @@ -119,20 +141,14 @@ public boolean isUseDefault() { } /** - * Set the topic to the source stream. + * Override equality for sources. + * useDefault is always compared first; if both sources are using the default feature source, + * they will be equal. If not they will be compared based on their type-specific options. + * + * @param other other Source + * @return boolean equal */ - public void setTopic(String topic) throws InvalidProtocolBufferException { - switch (SourceType.valueOf(type)) { - case KAFKA: - KafkaSourceConfig kafkacfg = KafkaSourceConfig.parseFrom(options); - this.options = kafkacfg.toBuilder().setTopic(topic).build().toByteArray(); - case UNRECOGNIZED: - default: - throw new RuntimeException("Unable to convert source to proto"); - } - } - - public boolean equalTo(Source other) throws InvalidProtocolBufferException { + public boolean equalTo(Source other) { if (other.useDefault && useDefault) { return true; } @@ -143,14 +159,26 @@ public boolean equalTo(Source other) throws InvalidProtocolBufferException { switch (SourceType.valueOf(type)) { case KAFKA: - KafkaSourceConfig kafkaCfg = KafkaSourceConfig.parseFrom(options); - KafkaSourceConfig otherKafkaCfg = KafkaSourceConfig.parseFrom(other.options); - return kafkaCfg.getBootstrapServers().equals(otherKafkaCfg.getBootstrapServers()); + return bootstrapServers.equals(other.bootstrapServers) && + topics.equals(other.topics); case UNRECOGNIZED: default: return false; } } + + private String generateId() { + if (useDefault) { + return "DEFAULT"; + } + switch (SourceType.valueOf(type)) { + case KAFKA: + return String.format("KAFKA/%s/%s", bootstrapServers, topics); + default: + // should not occur + return ""; + } + } } diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 61e600c9804..116f75eea4e 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -1,7 +1,9 @@ package feast.core.service; import com.google.common.base.Strings; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.dao.JobInfoRepository; @@ -14,6 +16,7 @@ import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.model.JobStatus; +import feast.core.model.Source; import feast.core.model.Store; import java.time.Instant; import java.util.ArrayList; @@ -44,9 +47,11 @@ public JobCoordinatorService( * there has been no change in the featureSet, and there is a running job for the featureSet, this * method will do nothing. */ - public JobInfo startOrUpdateJob(List featureSetSpecs, StoreProto.Store store) { - String featureSetName = featureSetSpecs.get(0).getName(); - Optional job = getJob(featureSetName, store.getName()); + public JobInfo startOrUpdateJob(List featureSetSpecs, + SourceProto.Source sourceSpec, + StoreProto.Store store) { + Source source = Source.fromProto(sourceSpec); + Optional job = getJob(source.getId(), store.getName()); if (job.isPresent()) { Set existingFeatureSetsPopulatedByJob = job.get().getFeatureSets().stream().map(FeatureSet::getId).collect(Collectors.toSet()); @@ -61,14 +66,17 @@ public JobInfo startOrUpdateJob(List featureSetSpecs, StoreProto return updateJob(job.get(), featureSetSpecs, store); } } else { - return startJob(createJobId(featureSetName, store.getName()), featureSetSpecs, store); + return startJob(createJobId(source.getType().toString().toLowerCase(), store.getName()), + featureSetSpecs, sourceSpec, store); } } - /** Get the non-terminal job associated with the given featureSet name and store name, if any. */ - private Optional getJob(String featureSetName, String storeName) { + /** + * Get the non-terminal job associated with the given featureSet name and store name, if any. + */ + private Optional getJob(String sourceId, String storeName) { List jobs = - jobInfoRepository.findByFeatureSetsNameAndStoreName(featureSetName, storeName); + jobInfoRepository.findBySourceIdAndStoreName(sourceId, storeName); if (jobs.isEmpty()) { return Optional.empty(); } @@ -77,9 +85,12 @@ private Optional getJob(String featureSetName, String storeName) { .findFirst(); } - /** Start or update the job to ingest data to the sink. */ + /** + * Start or update the job to ingest data to the sink. + */ private JobInfo startJob( - String jobId, List featureSetSpecs, StoreProto.Store sinkSpec) { + String jobId, List featureSetSpecs, SourceProto.Source source, + StoreProto.Store sinkSpec) { try { AuditLogger.log( Resource.JOB, @@ -102,7 +113,6 @@ private JobInfo startJob( jobManager.getRunnerType().getName(), extId); - SourceType sourceType = featureSetSpecs.get(0).getSource().getType(); List featureSets = new ArrayList<>(); for (FeatureSetSpec featureSetSpec : featureSetSpecs) { @@ -115,8 +125,8 @@ private JobInfo startJob( new JobInfo( jobId, extId, - sourceType, jobManager.getRunnerType().getName(), + Source.fromProto(source), Store.fromProto(sinkSpec), featureSets, JobStatus.RUNNING); @@ -134,7 +144,9 @@ private JobInfo startJob( } } - /** Update the given job */ + /** + * Update the given job + */ private JobInfo updateJob( JobInfo jobInfo, List featureSetSpecs, StoreProto.Store store) { jobInfo.setFeatureSets( @@ -171,7 +183,9 @@ public void abortJob(String id) { jobInfoRepository.saveAndFlush(job); } - /** Update a given job's status */ + /** + * Update a given job's status + */ public void updateJobStatus(String jobId, JobStatus status) { Optional jobRecordOptional = jobInfoRepository.findById(jobId); if (jobRecordOptional.isPresent()) { @@ -181,9 +195,10 @@ public void updateJobStatus(String jobId, JobStatus status) { } } - public String createJobId(String featureSetName, String storeName) { + public String createJobId(String sourceId, String storeName) { String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); - String jobId = String.format("%s-to-%s", featureSetName, storeName) + dateSuffix; + String sourceIdTrunc = sourceId.split("/")[0].toLowerCase(); + String jobId = String.format("%s-to-%s", sourceIdTrunc, storeName) + dateSuffix; return jobId.replaceAll("_", "-"); } } diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java index 43ab3b4393d..487e15e7507 100644 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java +++ b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java @@ -38,11 +38,13 @@ public Source provision(FeatureSet featureSet) throws RuntimeException { Source source = featureSet.getSource(); KafkaSourceConfig config = KafkaSourceConfig.getDefaultInstance(); String bootstrapServers = defaultConfig.getBootstrapServers(); + String topicName = defaultConfig.getTopic(); if (!source.isUseDefault()) { try { config = (KafkaSourceConfig) source.getOptions(); + topicName = config.getTopic(); bootstrapServers = config.getBootstrapServers(); - } catch (InvalidProtocolBufferException | NullPointerException e) { + } catch (NullPointerException e) { throw new RuntimeException(String .format("Unable to retrieve bootstrap servers for featureSet %s", featureSet.getName()), e); @@ -54,7 +56,6 @@ public Source provision(FeatureSet featureSet) throws RuntimeException { map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "1000"); AdminClient client = AdminClient.create(map); - String topicName = generateTopicName(featureSet.getName()); NewTopic newTopic = new NewTopic(topicName, defaultConfig.getTopicNumPartitions(), defaultConfig.getTopicReplicationFactor()); @@ -71,14 +72,10 @@ public Source provision(FeatureSet featureSet) throws RuntimeException { throw new RuntimeException(e.getMessage(), e); } } + assert config != null; - source.setOptions( - config.toBuilder().setTopic(topicName).setBootstrapServers(bootstrapServers).build() - .toByteArray()); + source.setTopics(topicName); + source.setBootstrapServers(bootstrapServers); return source; } - - public String generateTopicName(String featureSetName) { - return Strings.lenientFormat("%s-%s-features", defaultConfig.getTopicPrefix(), featureSetName); - } } diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java index aa8248cf9d4..eb9177bbec9 100644 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java +++ b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java @@ -17,9 +17,9 @@ public class KafkaFeatureStreamConfig { String bootstrapServers; /** - * Feast stream topic prefix, to be prepended to topic name + * Feast stream topic name */ - String topicPrefix; + String topic; /** * Number of partitions per topic @@ -45,7 +45,7 @@ public class KafkaFeatureStreamConfig { */ public static KafkaFeatureStreamConfig fromMap(Map optionMap) { String bootstrapServers = optionMap.getOrDefault("bootstrapServers", "KAFKA:9092"); - String topicPrefix = optionMap.getOrDefault("topicPrefix", "feast"); + String topicPrefix = optionMap.getOrDefault("topic", "feast-features"); int numPartitions = Integer.parseInt(optionMap.getOrDefault("partitions", NUM_PARTITIONS_DEFAULT)); short replicationFactor = Short.parseShort(optionMap.getOrDefault("replicationFactor", REPLICATION_FACTOR_DEFAULT)); diff --git a/core/src/main/resources/application.yml b/core/src/main/resources/application.yml index 6867b05ac85..be4f8c6b72a 100644 --- a/core/src/main/resources/application.yml +++ b/core/src/main/resources/application.yml @@ -47,6 +47,7 @@ feast: type: kafka # Feature stream options. options: + topic: feast-features bootstrapServers: kafka:9092 replicationFactor: 1 partitions: 1 diff --git a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java index 3d7a082f711..1f4ffcf9828 100644 --- a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java +++ b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java @@ -25,11 +25,13 @@ import static org.mockito.Mockito.when; import com.google.common.collect.Lists; +import feast.core.SourceProto; import feast.core.dao.JobInfoRepository; import feast.core.dao.MetricsRepository; import feast.core.model.JobInfo; import feast.core.model.JobStatus; import feast.core.model.Metrics; +import feast.core.model.Source; import feast.core.model.Store; import java.util.ArrayList; import java.util.Arrays; @@ -62,8 +64,8 @@ public void getJobStatus_shouldUpdateJobInfoForRunningJob() { new JobInfo( "jobId", "extId1", - "Streaming", "DataflowRunner", + Source.fromProto(SourceProto.Source.getDefaultInstance()), new Store(), Collections.emptyList(), Collections.emptyList(), diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index 93f2519c383..88a674f86ec 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -74,6 +74,7 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setRunner(DirectRunner.class); expectedPipelineOptions.setBlockOnRun(false); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); + expectedPipelineOptions.setProject(""); expectedPipelineOptions .setFeatureSetSpecJson(Lists.newArrayList(printer.print(featureSetSpec))); diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index efffbcc2981..528b2f7d6d4 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -8,8 +8,7 @@ import com.google.common.collect.Lists; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto.Source; -import feast.core.SourceProto.SourceType; +import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; @@ -19,6 +18,7 @@ import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.model.JobStatus; +import feast.core.model.Source; import feast.core.model.Store; import org.junit.Before; import org.junit.Rule; @@ -38,6 +38,7 @@ public class JobCoordinatorServiceTest { private JobCoordinatorService jobCoordinatorService; private JobInfo existingJob; + @Before public void setUp() { initMocks(this); @@ -51,10 +52,11 @@ public void setUp() { featureSet1.setId("featureSet1:1"); FeatureSet featureSet2 = new FeatureSet(); featureSet2.setId("featureSet2:1"); - existingJob = new JobInfo("extid", "name", "KAFKA", "DirectRunner", store, + Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); + existingJob = new JobInfo("extid", "name", "DirectRunner", source, store, Lists.newArrayList(featureSet1, featureSet2), Lists.newArrayList(), JobStatus.RUNNING); - when(jobInfoRepository.findByFeatureSetsNameAndStoreName("featureSet1", "SERVING")) + when(jobInfoRepository.findBySourceIdAndStoreName("DEFAULT", "SERVING")) .thenReturn(Lists.newArrayList(existingJob)); jobCoordinatorService = new JobCoordinatorService(jobInfoRepository, jobManager); @@ -77,7 +79,8 @@ public void shouldNotStartOrUpdateJobIfNoChanges() { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build(); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet1, featureSet2), store); + .startOrUpdateJob(Lists.newArrayList(featureSet1, featureSet2), + SourceProto.Source.getDefaultInstance(), store); assertThat(jobInfo, equalTo(existingJob)); } @@ -86,7 +89,6 @@ public void shouldStartJobIfNotExists() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet") .setVersion(1) - .setSource(Source.newBuilder().setType(SourceType.KAFKA)) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -102,11 +104,13 @@ public void shouldStartJobIfNotExists() { when(jobManager.getRunnerType()).thenReturn(Runner.DIRECT); FeatureSet expectedFeatureSet = new FeatureSet(); expectedFeatureSet.setId("featureSet:1"); - JobInfo expectedJobInfo = new JobInfo(jobId, extJobId, SourceType.KAFKA, "DirectRunner", - Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), JobStatus.RUNNING); + Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); + JobInfo expectedJobInfo = new JobInfo(jobId, extJobId, "DirectRunner", + source, Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), JobStatus.RUNNING); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), store); + .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + store); assertThat(jobInfo, equalTo(expectedJobInfo)); } @@ -115,7 +119,6 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet1") .setVersion(1) - .setSource(Source.newBuilder().setType(SourceType.KAFKA)) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -123,15 +126,17 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build(); String extId = "extId123"; + Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); JobInfo modifiedJob = new JobInfo(existingJob.getId(), existingJob.getExtId(), - SourceType.valueOf(existingJob.getType()), existingJob.getRunner(), Store.fromProto(store), + existingJob.getRunner(), source, Store.fromProto(store), Lists.newArrayList(FeatureSet.fromProto(featureSet)), JobStatus.RUNNING); when(jobManager.updateJob(modifiedJob)).thenReturn(extId); JobInfo expectedJobInfo = modifiedJob; expectedJobInfo.setExtId(extId); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), store); + .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + store); assertThat(jobInfo, equalTo(expectedJobInfo)); } diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index 73650de1bfa..4987ac158ae 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -282,7 +282,7 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f2-features").build().toByteArray()); + .setTopic("feast-f2-features").build(), false); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = newDummyFeatureSet("f2", 1) @@ -306,7 +306,7 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() throws InvalidProtocolBufferException { Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f1-features").build().toByteArray()); + .setTopic("feast-f1-features").build(), false); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = featureSets.get(2).toProto().toBuilder() @@ -367,7 +367,7 @@ private FeatureSet newDummyFeatureSet(String name, int version) { Field entity = new Field(name, "entity", Enum.STRING); return new FeatureSet(name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), new Source( - SourceType.KAFKA, kafkaFeatureSourceOptions.toByteArray())); + SourceType.KAFKA, kafkaFeatureSourceOptions, false)); } private Store newDummyStore(String name) { diff --git a/ingestion/src/main/java/feast/options/Options.java b/ingestion/src/main/java/feast/options/Options.java deleted file mode 100644 index 47077810c30..00000000000 --- a/ingestion/src/main/java/feast/options/Options.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright 2018 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package feast.options; - -import java.io.Serializable; - -/** - * interface for identifying classes that can use the OptionsParser for extra type safety - */ -public interface Options extends Serializable { - -} diff --git a/ingestion/src/main/java/feast/options/OptionsParser.java b/ingestion/src/main/java/feast/options/OptionsParser.java deleted file mode 100644 index 8400f5d423d..00000000000 --- a/ingestion/src/main/java/feast/options/OptionsParser.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2018 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package feast.options; - -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.module.jsonSchema.JsonSchema; -import com.fasterxml.jackson.module.jsonSchema.JsonSchemaGenerator; -import com.google.common.collect.Lists; -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Set; -import javax.validation.ConstraintViolation; -import javax.validation.Validation; -import javax.validation.Validator; -import javax.validation.ValidatorFactory; - -public class OptionsParser { - - private static final ObjectMapper strictMapper = new ObjectMapper(); - private static final ObjectMapper lenientMapper = new ObjectMapper() - .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - private static final Validator validator; - - static { - try (ValidatorFactory validatorFactory = Validation.buildDefaultValidatorFactory()) { - validator = validatorFactory.getValidator(); - } - } - - /** - * Return a json schema string representing an options class for error messages - */ - static String getJsonSchema(Class optionsClass) { - JsonSchemaGenerator schemaGen = new JsonSchemaGenerator(strictMapper); - JsonSchema schema = null; - try { - schema = schemaGen.generateSchema(optionsClass); - schema.setId(null); // clear the ID as it's visual noise - return strictMapper.writer().forType(JsonSchema.class).writeValueAsString(schema); - } catch (IOException e) { - return ""; - } - } - - - private static T parse(Map optionsMap, Class clazz, - boolean lenient) { - ObjectMapper mapper = lenient ? lenientMapper : strictMapper; - List messages = Lists.newArrayList(); - T options; - try { - options = mapper.convertValue(optionsMap, clazz); - } catch (IllegalArgumentException e) { - messages.add("Expecting options convertible to schema: " + getJsonSchema(clazz)); - try { - messages.add("Got " + mapper.writer().writeValueAsString(optionsMap)); - } catch (JsonProcessingException ee) { - // - } - throw new IllegalArgumentException(String.join(", ", messages), e); - } - Set> violations = validator.validate(options); - if (violations.size() > 0) { - messages.add("Expecting options convertible to schema: " + getJsonSchema(clazz)); - for (ConstraintViolation violation : violations) { - messages.add( - String.format( - "property \"%s\" %s", violation.getPropertyPath(), violation.getMessage())); - } - throw new IllegalArgumentException(String.join(", ", messages)); - } - return options; - } - - /** - * Construct a class from string options and validate with any javax validation annotations, - * unknown options are ignored - */ - public static T lenientParse(Map optionsMap, Class clazz) { - return parse(optionsMap, clazz, true); - } - - - /** - * Construct a class from string options and validate with any javax validation annotations - */ - public static T parse(Map optionsMap, Class clazz) { - return parse(optionsMap, clazz, false); - } -} diff --git a/ingestion/src/main/java/feast/options/Validation.java b/ingestion/src/main/java/feast/options/Validation.java deleted file mode 100644 index 892c9f36fa6..00000000000 --- a/ingestion/src/main/java/feast/options/Validation.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2018 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package feast.options; - -import static java.lang.annotation.ElementType.ANNOTATION_TYPE; -import static java.lang.annotation.ElementType.CONSTRUCTOR; -import static java.lang.annotation.ElementType.FIELD; -import static java.lang.annotation.ElementType.METHOD; -import static java.lang.annotation.ElementType.PARAMETER; -import static java.lang.annotation.ElementType.TYPE_USE; -import static java.lang.annotation.RetentionPolicy.RUNTIME; - -import java.lang.annotation.Documented; -import java.lang.annotation.Retention; -import java.lang.annotation.Target; -import javax.validation.Constraint; -import javax.validation.ConstraintValidator; -import javax.validation.ConstraintValidatorContext; -import javax.validation.Payload; -import javax.validation.ReportAsSingleViolation; -import org.joda.time.format.ISOPeriodFormat; - -public @interface Validation { - @Documented - @ReportAsSingleViolation - @Target({METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER, TYPE_USE}) - @Constraint(validatedBy = ISO8601Duration.ISO8601DurationValidator.class) - @Retention(RUNTIME) - @interface ISO8601Duration { - - String message() default "must match duration format for ISO 8601 standard"; - - Class[] groups() default {}; - - Class[] payload() default {}; - - @Target({METHOD, FIELD, ANNOTATION_TYPE, CONSTRUCTOR, PARAMETER, TYPE_USE}) - @Retention(RUNTIME) - @Documented - @interface List { - ISO8601Duration[] value(); - } - - class ISO8601DurationValidator implements ConstraintValidator { - - @Override - public boolean isValid(String value, ConstraintValidatorContext context) { - if (value == null) { - return true; - } - try { - ISOPeriodFormat.standard().parsePeriod(value); - return true; - } catch (Throwable e) { - return false; - } - } - } - } -} From b9676d9429062df407a1b9aed397c23bd7ad39ee Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Mon, 28 Oct 2019 10:44:34 +0800 Subject: [PATCH 07/12] Feast 0.3 Continuous Integration (CI) Update (#271) * Update prow config and test script so it works for Feast 0.3 * Update CI test script for golang to use correct path * Set pipefail option so test exit code is correct For those commands that pipe the test output * Update group id and version grpc-spring-boot-starter dependency - Newer version is hosted on Maven central which is more maintained than other repo * Add JVM heap settings for Maven surefire Otherwise it fails to run some tests due to insufficient memory * Fix potential setup/teardown error when running ImportJobTest - Set project to empty (DataflowOptions in Beam require project to be not null) - Ignore error when shutting down Kafka server during tear down. This should not affect test result. * Update remote URI to download cached Maven packages - This tar archive contains packages used by Feast 0.3 rather than previous version of Feast * Fix path to logs artifacts * Update maven enforcer rule for Maven and JDK versions - Allow more ranges of versions as long as they are not breaking changes * Use batch mode when initializing Maven for cleaner build log * Skip maven enforcer when running test for specific project * Fix incorrect order for java-sdk test script * Install database and kafka dependencies for Feast end to end test * Update Python SDK to not set source in FeatureSet when using default source Update in progress script for integration test: TODO.sh * Fix Python SDK type mapping - float64 should map to ValueType.DOUBLE instead of ValueType.FLOAT - Ensure Python SDK installation use protobuf v3.10 from pypi. Earlier version of protobuf do not support accessing Enum value in Proto object directly. - Ensure version is passed as INT in e2e test. Set default allow_dirty to true so it's easier to debug error when running e2e test locally. * Refactor prow scripts such that every test is defined in a separate script - Since there are only 6 tests, there is not much benefit of combining tests into a single script Having each test defined on diff script file makes debugging easier * Remove reference to actual project id * Log current stage in end to end test script - Update test case DirectRunnerJobManagerTest.shouldStartDirectJobAndRegisterPipelineResult to set project option to empty string for DirectRunner - Update /usr/sbin/policy-rc.d used in Maven Docker image, so Redis installation will succeed * Fix missing option to specify miniconda download target - Also use cached maven packages when running e2e test * Fix incorrect path to installing Python SDK * Add small wait after ingestion, before validating the saved features, in feast e2e test * Increase wait time before checking, after applying feature set * Make log cleaner --- .prow/config.yaml | 112 ++------- .prow/scripts/cleanup_feast_installation.sh | 6 - ...maven_cache.sh => download-maven-cache.sh} | 6 + .../scripts/install_feast_and_run_e2e_test.sh | 60 ----- .prow/scripts/install_feast_sdk.sh | 9 - .prow/scripts/install_google_cloud_sdk.sh | 4 +- .prow/scripts/install_test_tools.sh | 24 -- .prow/scripts/prepare_integration_test.sh | 65 ------ .prow/scripts/run_unit_test.sh | 63 ------ .prow/scripts/test-core-ingestion.sh | 20 ++ .prow/scripts/test-end-to-end.sh | 212 ++++++++++++++++++ .prow/scripts/test-golang-sdk.sh | 15 ++ .prow/scripts/test-java-sdk.sh | 13 ++ .prow/scripts/test-python-sdk.sh | 11 + .prow/scripts/test-serving.sh | 17 ++ core/pom.xml | 4 +- .../direct/DirectRunnerJobManagerTest.java | 1 + infra/charts/feast/values.yaml | 5 +- ingestion/pom.xml | 12 - .../java/feast/ingestion/ImportJobTest.java | 1 + .../src/test/java/feast/test/TestUtil.java | 6 +- pom.xml | 10 +- protos/feast/core/FeatureSet.proto | 3 +- sdk/python/feast/feature_set.py | 4 +- sdk/python/feast/type_map.py | 4 +- sdk/python/setup.py | 2 +- serving/pom.xml | 4 +- tests/e2e/conftest.py | 4 +- tests/e2e/test_e2e.py | 49 +++- 29 files changed, 386 insertions(+), 360 deletions(-) delete mode 100755 .prow/scripts/cleanup_feast_installation.sh rename .prow/scripts/{prepare_maven_cache.sh => download-maven-cache.sh} (79%) delete mode 100755 .prow/scripts/install_feast_and_run_e2e_test.sh delete mode 100755 .prow/scripts/install_feast_sdk.sh delete mode 100755 .prow/scripts/install_test_tools.sh delete mode 100755 .prow/scripts/prepare_integration_test.sh delete mode 100755 .prow/scripts/run_unit_test.sh create mode 100755 .prow/scripts/test-core-ingestion.sh create mode 100755 .prow/scripts/test-end-to-end.sh create mode 100755 .prow/scripts/test-golang-sdk.sh create mode 100755 .prow/scripts/test-java-sdk.sh create mode 100755 .prow/scripts/test-python-sdk.sh create mode 100755 .prow/scripts/test-serving.sh diff --git a/.prow/config.yaml b/.prow/config.yaml index 225e89ee5b1..c1305bf6ec2 100644 --- a/.prow/config.yaml +++ b/.prow/config.yaml @@ -21,7 +21,7 @@ plank: deck: tide_update_period: 1s spyglass: - size_limit: 100e+6 # 100MB + size_limit: 50e+6 # 50MB viewers: "started.json|finished.json": ["metadata"] "build-log.txt": ["buildlog"] @@ -50,132 +50,56 @@ tide: # presubmits list Prow jobs that run on pull requests presubmits: gojek/feast: - - name: unit-test-core + - name: test-core-and-ingestion decorate: true always_run: true spec: - volumes: - - name: service-account - secret: - secretName: prow-service-account containers: - image: maven:3.6-jdk-8 - volumeMounts: - - name: service-account - mountPath: /etc/service-account - readOnly: true - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /etc/service-account/service-account.json - command: [".prow/scripts/run_unit_test.sh", "--component", "core"] + command: [".prow/scripts/test-core-ingestion.sh"] - - name: unit-test-ingestion + - name: test-serving decorate: true always_run: true spec: - volumes: - - name: service-account - secret: - secretName: prow-service-account containers: - image: maven:3.6-jdk-8 - volumeMounts: - - name: service-account - mountPath: /etc/service-account - readOnly: true - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /etc/service-account/service-account.json - command: [".prow/scripts/run_unit_test.sh", "--component", "ingestion"] + command: [".prow/scripts/test-serving.sh"] - - name: unit-test-serving + - name: test-java-sdk decorate: true always_run: true spec: containers: - image: maven:3.6-jdk-8 - command: [".prow/scripts/run_unit_test.sh", "--component", "serving"] + command: [".prow/scripts/test-java-sdk.sh"] - - name: unit-test-cli + - name: test-python-sdk decorate: true always_run: true spec: containers: - - image: golang:1.12 - env: - - name: GO111MODULE - value: "on" - command: [".prow/scripts/run_unit_test.sh", "--component", "cli"] + - image: python:3.7 + command: [".prow/scripts/test-python-sdk.sh"] - - name: unit-test-python-sdk + - name: test-golang-sdk decorate: true always_run: true spec: - volumes: - - name: service-account - secret: - secretName: prow-service-account containers: - - image: python:3.6 - volumeMounts: - - name: service-account - mountPath: /etc/service-account - readOnly: true - env: - - name: GOOGLE_APPLICATION_CREDENTIALS - value: /etc/service-account/service-account.json - command: [".prow/scripts/run_unit_test.sh", "--component", "python-sdk"] + - image: golang:1.13 + command: [".prow/scripts/test-golang-sdk.sh"] - - name: integration-test + - name: test-end-to-end decorate: true always_run: true spec: - volumes: - - name: docker-socket-volume - hostPath: - path: /var/run/docker.sock - type: File - - name: service-account - secret: - secretName: prow-service-account - nodeSelector: - os: ubuntu containers: - - image: google/cloud-sdk - # securityContext and docker socket volume mounts are needed because we are building - # Docker images in this job - securityContext: - privileged: true - volumeMounts: - - name: docker-socket-volume - mountPath: /var/run/docker.sock - - name: service-account - mountPath: /etc/service-account - readOnly: true - command: - - bash - - -c - - | - export FEAST_HOME=${PWD} - export FEAST_IMAGE_REGISTRY=us.gcr.io - export FEAST_IMAGE_TAG=${PULL_PULL_SHA} - export FEAST_WAREHOUSE_DATASET=feast_build_${BUILD_ID} - export FEAST_CORE_URL=build-${BUILD_ID:0:5}.drone.feast.ai:80 - export FEAST_SERVING_URL=build-${BUILD_ID:0:5}.drone.feast.ai:80 - export FEAST_RELEASE_NAME=feast-${BUILD_ID:0:5} - export BATCH_IMPORT_DATA_GCS_PATH=gs://feast-templocation-kf-feast/build_${BUILD_ID:0:5}/integration-tests/testdata/feature_values/ingestion_1.csv - export KAFKA_BROKERS=10.128.0.201:9092 - export KAFKA_TOPICS=feast_build_${BUILD_ID:0:5} - - . .prow/scripts/prepare_integration_test.sh - .prow/scripts/install_feast_and_run_e2e_test.sh - TEST_EXIT_CODE=$? - .prow/scripts/cleanup_feast_installation.sh - - exit ${TEST_EXIT_CODE} + - image: maven:3.6-jdk-8 + command: [".prow/scripts/test-end-to-end.sh"] # TODO: do a release when a git tag is pushed -# postsubmits list Prow jobs that run on every push # +# postsubmits list Prow jobs that run on every push # postsubmits: -# gojek/feast: +# gojek/feast: \ No newline at end of file diff --git a/.prow/scripts/cleanup_feast_installation.sh b/.prow/scripts/cleanup_feast_installation.sh deleted file mode 100755 index 78b7d83a327..00000000000 --- a/.prow/scripts/cleanup_feast_installation.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -set -e - -bq -q rm -rf --dataset ${FEAST_WAREHOUSE_DATASET} -gsutil -q rm ${BATCH_IMPORT_DATA_GCS_PATH} -helm delete --purge $FEAST_RELEASE_NAME diff --git a/.prow/scripts/prepare_maven_cache.sh b/.prow/scripts/download-maven-cache.sh similarity index 79% rename from .prow/scripts/prepare_maven_cache.sh rename to .prow/scripts/download-maven-cache.sh index e5113389426..c9704878e0a 100755 --- a/.prow/scripts/prepare_maven_cache.sh +++ b/.prow/scripts/download-maven-cache.sh @@ -23,5 +23,11 @@ done if [[ ! ${ARCHIVE_URI} ]]; then usage; exit 1; fi if [[ ! ${OUTPUT_DIR} ]]; then usage; exit 1; fi +# Install Google Cloud SDK if gsutil command not exists +if [[ ! $(command -v gsutil) ]]; then + CURRENT_DIR=$(dirname "$BASH_SOURCE") + . "${CURRENT_DIR}"/install_google_cloud_sdk.sh +fi + gsutil -q cp ${ARCHIVE_URI} /tmp/.m2.tar tar xf /tmp/.m2.tar -C ${OUTPUT_DIR} diff --git a/.prow/scripts/install_feast_and_run_e2e_test.sh b/.prow/scripts/install_feast_and_run_e2e_test.sh deleted file mode 100755 index 029f2e3e31a..00000000000 --- a/.prow/scripts/install_feast_and_run_e2e_test.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env bash - -set -e - -echo "============================================================" -echo "Installing Feast Release" -echo "============================================================" - -helm install --name ${FEAST_RELEASE_NAME} --wait --timeout 210 ${FEAST_HOME}/charts/feast -f integration-tests/feast-helm-values.yaml - -echo "============================================================" -echo "Testing Batch Import" -echo "============================================================" - -cd ${FEAST_HOME}/integration-tests/testdata - -feast apply entity entity_specs/entity_1.yaml -feast apply feature feature_specs/entity_1*.yaml -feast jobs run import_specs/batch_from_gcs.yaml --wait - -cd $FEAST_HOME/integration-tests - -python -m testutils.validate_feature_values \ - --entity_spec_file=testdata/entity_specs/entity_1.yaml \ - --feature_spec_files=testdata/feature_specs/entity_1*.yaml \ - --expected-warehouse-values-file=testdata/feature_values/ingestion_1.csv \ - --expected-serving-values-file=testdata/feature_values/serving_1.csv \ - --bigquery-dataset-for-warehouse=${FEAST_WAREHOUSE_DATASET} \ - --feast-serving-url=${FEAST_SERVING_URL} - -echo "============================================================" -echo "Testing Streaming Import" -echo "============================================================" - -cd $FEAST_HOME/integration-tests/testdata - -feast apply entity entity_specs/entity_2.yaml -feast apply feature feature_specs/entity_2*.yaml -feast jobs run import_specs/stream_from_kafka.yaml & - -IMPORT_JOB_PID=$! -sleep 20 - -cd $FEAST_HOME/integration-tests - -python -m testutils.kafka_producer \ - --bootstrap_servers=$KAFKA_BROKERS \ - --topic=$KAFKA_TOPICS \ - --entity_spec_file=testdata/entity_specs/entity_2.yaml \ - --feature_spec_files=testdata/feature_specs/entity_2*.yaml \ - --feature_values_file=testdata/feature_values/ingestion_2.csv -sleep 20 - -python -m testutils.validate_feature_values \ - --entity_spec_file=testdata/entity_specs/entity_2.yaml \ - --feature_spec_files=testdata/feature_specs/entity_2*.yaml \ - --expected-serving-values-file=testdata/feature_values/serving_2.csv \ - --feast-serving-url=$FEAST_SERVING_URL - -kill -9 ${IMPORT_JOB_PID} diff --git a/.prow/scripts/install_feast_sdk.sh b/.prow/scripts/install_feast_sdk.sh deleted file mode 100755 index 723199b9e51..00000000000 --- a/.prow/scripts/install_feast_sdk.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/env bash -set -e - -# This script ensures latest Feast Python SDK and Feast CLI are installed - -pip install -qe ${FEAST_HOME}/sdk/python -pip install -qr ${FEAST_HOME}/integration-tests/testutils/requirements.txt -go build -o /usr/local/bin/feast ./cli/feast &> /dev/null -feast config set coreURI ${FEAST_CORE_URL} diff --git a/.prow/scripts/install_google_cloud_sdk.sh b/.prow/scripts/install_google_cloud_sdk.sh index 0865f6085c3..c6356557cec 100755 --- a/.prow/scripts/install_google_cloud_sdk.sh +++ b/.prow/scripts/install_google_cloud_sdk.sh @@ -23,14 +23,14 @@ while [ "$1" != "" ]; do shift done -GOOGLE_CLOUD_SDK_ARCHIVE_URL=https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-244.0.0-linux-x86_64.tar.gz +GOOGLE_CLOUD_SDK_ARCHIVE_URL=https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-sdk-266.0.0-linux-x86_64.tar.gz GOOGLE_PROJECT_ID=kf-feast KUBE_CLUSTER_NAME=primary-test-cluster KUBE_CLUSTER_ZONE=us-central1-a curl -s ${GOOGLE_CLOUD_SDK_ARCHIVE_URL} | tar xz -C / export PATH=/google-cloud-sdk/bin:${PATH} -gcloud -q components install kubectl +gcloud -q components install kubectl &> /var/log/kubectl.install.log if [[ ${KEY_FILE} ]]; then gcloud -q auth activate-service-account --key-file=${KEY_FILE} diff --git a/.prow/scripts/install_test_tools.sh b/.prow/scripts/install_test_tools.sh deleted file mode 100755 index a2e30bbc3a2..00000000000 --- a/.prow/scripts/install_test_tools.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash -set -e - -# This script installs the following Feast test utilities: -# ============================================================ -# - gettext package so we can use envsubst command to provide values to helm template file -# - Python 3.6 because Feast requires Python version 3.6 and above -# - Golang if we need to build Feast CLI from source -# - Helm if we want to install Feast release - -apt-get -qq update -apt-get -y install curl wget gettext &> /dev/null - -curl -s https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh -o /tmp/miniconda.sh -bash /tmp/miniconda.sh -b -p /miniconda &> /dev/null -export PATH=/miniconda/bin:$PATH - -wget -qO- https://dl.google.com/go/go1.12.5.linux-amd64.tar.gz | tar xzf - -mv go /usr/local/ -export PATH=/usr/local/go/bin:$PATH -export GO111MODULE=on - -wget -qO- https://storage.googleapis.com/kubernetes-helm/helm-v2.13.1-linux-amd64.tar.gz | tar xz -mv linux-amd64/helm /usr/local/bin/helm diff --git a/.prow/scripts/prepare_integration_test.sh b/.prow/scripts/prepare_integration_test.sh deleted file mode 100755 index 1a08f26475b..00000000000 --- a/.prow/scripts/prepare_integration_test.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash -set -e - -usage() -{ - echo "usage: prepare_integration_test.sh [--skip-build true]" -} - -while [ "$1" != "" ]; do - case "$1" in - --skip-build ) SKIP_BUILD=true; shift;; - * ) usage; exit 1 - esac - shift -done - -# Authenticate to Google Cloud and GKE -# ============================================================ -GOOGLE_PROJECT_ID=kf-feast -KUBE_CLUSTER_NAME=primary-test-cluster -KUBE_CLUSTER_ZONE=us-central1-a -KEY_FILE=/etc/service-account/service-account.json - -gcloud -q auth activate-service-account --key-file=${KEY_FILE} -gcloud -q auth configure-docker -gcloud -q config set project ${GOOGLE_PROJECT_ID} -gcloud -q container clusters get-credentials ${KUBE_CLUSTER_NAME} --zone ${KUBE_CLUSTER_ZONE} --project ${GOOGLE_PROJECT_ID} -export GOOGLE_APPLICATION_CREDENTIALS=${KEY_FILE} - -# Install Python 3.6, Golang 1.12, Helm and Feast SDK -# ============================================================ -. .prow/scripts/install_test_tools.sh -. .prow/scripts/install_feast_sdk.sh -.prow/scripts/prepare_maven_cache.sh --archive-uri gs://feast-templocation-kf-feast/.m2.tar --output-dir ${FEAST_HOME} - -# Prepare Feast test data and config -# ============================================================ - -bq -q mk --dataset ${FEAST_WAREHOUSE_DATASET} -gsutil -q cp ${FEAST_HOME}/integration-tests/testdata/feature_values/ingestion_1.csv ${BATCH_IMPORT_DATA_GCS_PATH} - -BUILD_ID=${BUILD_ID:0:5} -envsubst < integration-tests/feast-helm-values.yaml.template > integration-tests/feast-helm-values.yaml -cd ${FEAST_HOME}/integration-tests/testdata/import_specs -envsubst < batch_from_gcs.yaml.template > batch_from_gcs.yaml -envsubst < stream_from_kafka.yaml.template > stream_from_kafka.yaml - -if [[ ! ${SKIP_BUILD} ]]; then - -echo "============================================================" -echo "Building Feast for Testing" -echo "============================================================" -cd ${FEAST_HOME} -docker build -t us.gcr.io/kf-feast/feast-core:${FEAST_IMAGE_TAG} -f Dockerfiles/core/Dockerfile . & -docker build -t us.gcr.io/kf-feast/feast-serving:${FEAST_IMAGE_TAG} -f Dockerfiles/serving/Dockerfile . & -wait -docker push us.gcr.io/kf-feast/feast-core:${FEAST_IMAGE_TAG} & -docker push us.gcr.io/kf-feast/feast-serving:${FEAST_IMAGE_TAG} & -wait - -fi - -# Switch back context to original directory -set +ex -cd ${FEAST_HOME} \ No newline at end of file diff --git a/.prow/scripts/run_unit_test.sh b/.prow/scripts/run_unit_test.sh deleted file mode 100755 index d2f6ea3255c..00000000000 --- a/.prow/scripts/run_unit_test.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env bash - -# This script will run unit test for a specific Feast component: -# - core, ingestion, serving or cli -# -# This script includes the pre and post test scripts, such as -# - downloading maven cache repository -# - saving the test output report so it can be viewed with Spyglass in Prow - -# Bucket in GCS used for running unit tests, when the unit tests need an -# actual running GCS (e.g. because there is no existing mock implementation of the function to test) -TEST_BUCKET=feast-templocation-kf-feast - -usage() -{ - echo "usage: run_unit_test.sh - --component {core, ingestion, serving, cli}" -} - -while [ "$1" != "" ]; do - case "$1" in - --component ) COMPONENT="$2"; shift;; - * ) usage; exit 1 - esac - shift -done - -if [[ ! ${COMPONENT} ]]; then - usage; exit 1; -fi - -. .prow/scripts/install_google_cloud_sdk.sh - -if [[ ${COMPONENT} == "core" ]] || [[ ${COMPONENT} == "ingestion" ]] || [[ ${COMPONENT} == "serving" ]]; then - - .prow/scripts/prepare_maven_cache.sh --archive-uri gs://feast-templocation-kf-feast/.m2.tar --output-dir /root/ - mvn --projects ${COMPONENT} -Dtestbucket=feast-templocation-kf-feast test - TEST_EXIT_CODE=$? - cp -r ${COMPONENT}/target/surefire-reports /logs/artifacts/surefire-reports - -elif [[ ${COMPONENT} == "cli" ]]; then - - # https://stackoverflow.com/questions/6871859/piping-command-output-to-tee-but-also-save-exit-code-of-command - set -o pipefail - - go get -u github.com/jstemmer/go-junit-report - go test -v ./cli/feast/... 2>&1 | tee test_output - TEST_EXIT_CODE=$? - cat test_output | ${GOPATH}/bin/go-junit-report > ${ARTIFACTS}/unittest-cli-report.xml - -elif [[ ${COMPONENT} == "python-sdk" ]]; then - - cd sdk/python - pip install -r requirements-test.txt - pip install . - pytest ./tests --junitxml=${ARTIFACTS}/unittest-pythonsdk-report.xml - TEST_EXIT_CODE=$? - -else - usage; exit 1 -fi - -exit ${TEST_EXIT_CODE} diff --git a/.prow/scripts/test-core-ingestion.sh b/.prow/scripts/test-core-ingestion.sh new file mode 100755 index 00000000000..98a47ca68c9 --- /dev/null +++ b/.prow/scripts/test-core-ingestion.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +.prow/scripts/download-maven-cache.sh \ + --archive-uri gs://feast-templocation-kf-feast/.m2.2019-10-24.tar \ + --output-dir /root/ + +# Core depends on Ingestion so they are tested together +# Skip Maven enforcer: https://stackoverflow.com/questions/50647223/maven-enforcer-issue-when-running-from-reactor-level +mvn --projects core,ingestion --batch-mode --define skipTests=true \ + --define enforcer.skip=true clean install +mvn --projects core,ingestion --define enforcer.skip=true test +TEST_EXIT_CODE=$? + +# Default artifact location setting in Prow jobs +LOGS_ARTIFACT_PATH=/logs/artifacts +mkdir -p ${LOGS_ARTIFACT_PATH}/surefire-reports +cp core/target/surefire-reports/* ${LOGS_ARTIFACT_PATH}/surefire-reports/ +cp ingestion/target/surefire-reports/* ${LOGS_ARTIFACT_PATH}/surefire-reports/ + +exit ${TEST_EXIT_CODE} \ No newline at end of file diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh new file mode 100755 index 00000000000..89ddd826e33 --- /dev/null +++ b/.prow/scripts/test-end-to-end.sh @@ -0,0 +1,212 @@ +#!/usr/bin/env bash + +set -e +set -o pipefail + +if ! cat /etc/*release | grep -q stretch; then + echo ${BASH_SOURCE} only supports Debian stretch. + echo Please change your operating system to use this script. + exit 1 +fi + +echo " +This script will run end-to-end tests for Feast Core and Online Serving. + +1. Install Redis as the store for Feast Online Serving. +2. Install Postgres for persisting Feast metadata. +3. Install Kafka and Zookeeper as the Source in Feast. +4. Install Python 3.7.4, Feast Python SDK and run end-to-end tests from + tests/e2e via pytest. +" + +echo " +============================================================ +Installing Redis at localhost:6379 +============================================================ +" +apt-get -qq update +# Allow starting serving in this Maven Docker image. Default set to not allowed. +echo "exit 0" > /usr/sbin/policy-rc.d +apt-get -y install redis-server wget > /var/log/redis.install.log +redis-server --daemonize yes +redis-cli ping + +echo " +============================================================ +Installing Postgres at localhost:5432 +============================================================ +" +apt-get -y install postgresql > /var/log/postgresql.install.log +service postgresql start +# Initialize with database: 'postgres', user: 'postgres', password: 'password' +cat < /tmp/update-postgres-role.sh +psql -c "ALTER USER postgres PASSWORD 'password';" +EOF +chmod +x /tmp/update-postgres-role.sh +su -s /bin/bash -c /tmp/update-postgres-role.sh postgres +export PGPASSWORD=password +pg_isready + +echo " +============================================================ +Installing Zookeeper at localhost:2181 +Installing Kafka at localhost:9092 +============================================================ +" +wget -qO- https://www-eu.apache.org/dist/kafka/2.3.0/kafka_2.12-2.3.0.tgz | tar xz +mv kafka_2.12-2.3.0/ /tmp/kafka +nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.properties &> /var/log/zookeeper.log 2>&1 & +sleep 5 +tail -n10 /var/log/zookeeper.log +nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & +sleep 5 +tail -n10 /var/log/kafka.log + +echo " +============================================================ +Building jars for Feast +============================================================ +" + +.prow/scripts/download-maven-cache.sh \ + --archive-uri gs://feast-templocation-kf-feast/.m2.2019-10-24.tar \ + --output-dir /root/ + +# Build jars for Feast +mvn --quiet --batch-mode --define skipTests=true clean package + +echo " +============================================================ +Starting Feast Core +============================================================ +" +# Start Feast Core in background +cat < /tmp/core.application.yml +grpc: + port: 6565 + enable-reflection: true + +feast: + version: 0.3 + jobs: + runner: DirectRunner + options: {} + metrics: + enabled: false + + stream: + type: kafka + options: + bootstrapServers: localhost:9092 + replicationFactor: 1 + partitions: 1 + +spring: + jpa: + properties.hibernate.format_sql: true + hibernate.naming.physical-strategy=org.hibernate.boot.model.naming: PhysicalNamingStrategyStandardImpl + hibernate.ddl-auto: update + datasource: + url: jdbc:postgresql://localhost:5432/postgres + username: postgres + password: password + +management: + metrics: + export: + simple: + enabled: false + statsd: + enabled: false +EOF + +nohup java -jar core/target/feast-core-0.3.0-SNAPSHOT.jar \ + --spring.config.location=file:///tmp/core.application.yml \ + &> /var/log/feast-core.log & +sleep 20 +tail -n10 /var/log/feast-core.log + +echo " +============================================================ +Starting Feast Online Serving +============================================================ +" +# Start Feast Online Serving in background +cat < /tmp/serving.store.redis.yml +name: serving +type: REDIS +redis_config: + host: localhost + port: 6379 +subscriptions: + - name: .* + version: ">0" +EOF + +cat < /tmp/serving.online.application.yml +feast: + version: 0.3 + core-host: localhost + core-grpc-port: 6565 + + tracing: + enabled: false + + store: + config-path: /tmp/serving.store.redis.yml + redis-pool-max-size: 128 + redis-pool-max-idle: 16 + + jobs: + staging-location: gs://feast-templocation-kf-feast/staging-location + store-type: + store-options: {} + +grpc: + port: 6566 + enable-reflection: true + +spring: + main: + web-environment: false +EOF + +nohup java -jar serving/target/feast-serving-0.3.0-SNAPSHOT.jar \ + --spring.config.location=file:///tmp/serving.online.application.yml \ + &> /var/log/feast-serving-online.log & +sleep 15 +tail -n10 /var/log/feast-serving-online.log + +echo " +============================================================ +Installing Python 3.7 with Miniconda and Feast SDK +============================================================ +" +# Install Python 3.7 with Miniconda +wget -q https://repo.continuum.io/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh \ + -O /tmp/miniconda.sh +bash /tmp/miniconda.sh -b -p /root/miniconda -f +/root/miniconda/bin/conda init +source ~/.bashrc + +# Install Feast Python SDK and test requirements +pip install -q sdk/python +pip install -qr tests/e2e/requirements.txt + +echo " +============================================================ +Running end-to-end tests with pytest at 'tests/e2e' +============================================================ +" +# Default artifact location setting in Prow jobs +LOGS_ARTIFACT_PATH=/logs/artifacts + +ORIGINAL_DIR=$(pwd) +cd tests/e2e + +set +e +pytest --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml +TEST_EXIT_CODE=$? + +cd ${ORIGINAL_DIR} +exit ${TEST_EXIT_CODE} diff --git a/.prow/scripts/test-golang-sdk.sh b/.prow/scripts/test-golang-sdk.sh new file mode 100755 index 00000000000..b586927a512 --- /dev/null +++ b/.prow/scripts/test-golang-sdk.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +set -o pipefail + +cd sdk/go +go test -v 2>&1 | tee /tmp/test_output +TEST_EXIT_CODE=$? + +# Default artifact location setting in Prow jobs +LOGS_ARTIFACT_PATH=/logs/artifacts + +go get -u github.com/jstemmer/go-junit-report +cat /tmp/test_output | ${GOPATH}/bin/go-junit-report > ${LOGS_ARTIFACT_PATH}/golang-sdk-test-report.xml + +exit ${TEST_EXIT_CODE} \ No newline at end of file diff --git a/.prow/scripts/test-java-sdk.sh b/.prow/scripts/test-java-sdk.sh new file mode 100755 index 00000000000..0731b77976f --- /dev/null +++ b/.prow/scripts/test-java-sdk.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# Skip Maven enforcer: https://stackoverflow.com/questions/50647223/maven-enforcer-issue-when-running-from-reactor-level +mvn --projects sdk/java --batch-mode --define skipTests=true \ + --define enforcer.skip=true clean install +mvn --projects sdk/java --define enforcer.skip=true test +TEST_EXIT_CODE=$? + +# Default artifact location setting in Prow jobs +LOGS_ARTIFACT_PATH=/logs/artifacts +cp -r sdk/java/target/surefire-reports ${LOGS_ARTIFACT_PATH}/surefire-reports + +exit ${TEST_EXIT_CODE} \ No newline at end of file diff --git a/.prow/scripts/test-python-sdk.sh b/.prow/scripts/test-python-sdk.sh new file mode 100755 index 00000000000..eb40f921c7b --- /dev/null +++ b/.prow/scripts/test-python-sdk.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -e + +# Default artifact location setting in Prow jobs +LOGS_ARTIFACT_PATH=/logs/artifacts + +cd sdk/python +pip install -r requirements-test.txt +pip install . +pytest --junitxml=${LOGS_ARTIFACT_PATH}/python-sdk-test-report.xml diff --git a/.prow/scripts/test-serving.sh b/.prow/scripts/test-serving.sh new file mode 100755 index 00000000000..d105f733827 --- /dev/null +++ b/.prow/scripts/test-serving.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +.prow/scripts/download-maven-cache.sh \ + --archive-uri gs://feast-templocation-kf-feast/.m2.2019-10-24.tar \ + --output-dir /root/ + +# Skip Maven enforcer: https://stackoverflow.com/questions/50647223/maven-enforcer-issue-when-running-from-reactor-level +mvn --projects serving --batch-mode --define skipTests=true \ + --define enforcer.skip=true clean install +mvn --projects serving --define enforcer.skip=true test +TEST_EXIT_CODE=$? + +# Default artifact location setting in Prow jobs +LOGS_ARTIFACT_PATH=/logs/artifacts +cp -r serving/target/surefire-reports ${LOGS_ARTIFACT_PATH}/surefire-reports + +exit ${TEST_EXIT_CODE} \ No newline at end of file diff --git a/core/pom.xml b/core/pom.xml index db92236c830..0dbbeb82b62 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -102,9 +102,9 @@ org.springframework.boot spring-boot-starter-log4j2 - + - org.lognet + io.github.lognet grpc-spring-boot-starter diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index 88a674f86ec..2dbd53105e0 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -73,6 +73,7 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setAppName("DirectRunnerJobManager"); expectedPipelineOptions.setRunner(DirectRunner.class); expectedPipelineOptions.setBlockOnRun(false); + expectedPipelineOptions.setProject(""); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); expectedPipelineOptions.setProject(""); expectedPipelineOptions diff --git a/infra/charts/feast/values.yaml b/infra/charts/feast/values.yaml index c879363611c..be3b4138290 100644 --- a/infra/charts/feast/values.yaml +++ b/infra/charts/feast/values.yaml @@ -205,8 +205,9 @@ warehouse-serving: name: warehouse type: BIGQUERY bigquery_config: - projectId: the-big-data-staging-007 - datasetId: feast + # Replace with your Google Cloud project configuration + projectId: google-project-id + datasetId: bigquery-dataset subscriptions: - name: .* version: ">0" diff --git a/ingestion/pom.xml b/ingestion/pom.xml index b7e8033c23a..8811613ede3 100644 --- a/ingestion/pom.xml +++ b/ingestion/pom.xml @@ -86,18 +86,6 @@ - - org.apache.maven.plugins - maven-javadoc-plugin - - - attach-javadocs - - jar - - - - diff --git a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java index 1e9f0bf0129..d08e569518c 100644 --- a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java +++ b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java @@ -116,6 +116,7 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() options.setStoreJson( Collections.singletonList( JsonFormat.printer().omittingInsignificantWhitespace().print(redis))); + options.setProject(""); options.setBlockOnRun(false); int inputSize = 4096; diff --git a/ingestion/src/test/java/feast/test/TestUtil.java b/ingestion/src/test/java/feast/test/TestUtil.java index 8e30edb9aef..8c03d6da172 100644 --- a/ingestion/src/test/java/feast/test/TestUtil.java +++ b/ingestion/src/test/java/feast/test/TestUtil.java @@ -92,7 +92,11 @@ public static void start(String kafkaHost, int kafkaPort, short kafkaReplication public static void stop() { if (server != null) { - server.shutdown(); + try { + server.shutdown(); + } catch (Exception e) { + e.printStackTrace(); + } } } } diff --git a/pom.xml b/pom.xml index 4d24212cb91..c88606df6f5 100644 --- a/pom.xml +++ b/pom.xml @@ -250,9 +250,9 @@ - org.lognet + io.github.lognet grpc-spring-boot-starter - 2.4.1 + 3.0.2 @@ -392,10 +392,10 @@ - 3.0.5 + [3.5,4.0) - 1.8.0 + [1.8,1.9) @@ -432,7 +432,7 @@ maven-surefire-plugin 2.22.1 - -Djdk.net.URLClassPath.disableClassPathURLCheck=true + -Xms2048m -Xmx2048m -Djdk.net.URLClassPath.disableClassPathURLCheck=true IntegrationTest diff --git a/protos/feast/core/FeatureSet.proto b/protos/feast/core/FeatureSet.proto index 24939055fc2..a80ae36f088 100644 --- a/protos/feast/core/FeatureSet.proto +++ b/protos/feast/core/FeatureSet.proto @@ -47,7 +47,8 @@ message FeatureSetSpec { // as nulls and indicated to end user google.protobuf.Duration max_age = 5; - // Source on which feature rows can be found + // Optional. Source on which feature rows can be found. + // If not set, source will be set to the default value configured in Feast Core. Source source = 6; } diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index 87b95684d03..3760300056a 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -69,7 +69,7 @@ def __init__( if entities is not None: self.entities = entities if source is None: - self._source = KafkaSource() + self._source = None else: self._source = source self._max_age = max_age @@ -504,7 +504,7 @@ def to_proto(self) -> FeatureSetSpecProto: name=self.name, version=self.version, max_age=self.max_age, - source=self.source.to_proto(), + source=self.source.to_proto() if self.source is not None else None, features=[ field.to_proto() for field in self._fields.values() diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 30a97169e4d..d48acdf3842 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -106,7 +106,7 @@ def dtype_to_value_type(dtype): # TODO: to pass test_importer def pandas_dtype_to_feast_value_type(dtype: pd.DataFrame.dtypes) -> ValueType: type_map = { - "float64": ValueType.FLOAT, + "float64": ValueType.DOUBLE, "float32": ValueType.FLOAT, "int64": ValueType.INT64, "uint64": ValueType.INT64, @@ -247,7 +247,7 @@ def pd_value_to_proto_value(feast_value_type, value) -> ProtoValue: return ProtoValue(float_val=float(value)) elif feast_value_type == ValueType.DOUBLE: assert type(value) is float - return ProtoValue(float_val=value) + return ProtoValue(double_val=value) elif feast_value_type == ValueType.STRING: return ProtoValue(string_val=str(value)) elif feast_value_type == ValueType.BYTES: diff --git a/sdk/python/setup.py b/sdk/python/setup.py index b78cb8a6c86..24f9c3f2402 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -36,7 +36,7 @@ "grpcio==1.*", "pandas==0.*", "pandavro==1.5.1", - "protobuf==3.*", + "protobuf==3.10.*", "PyYAML==5.1.2", "fastavro==0.*", "kafka-python==1.4.*", diff --git a/serving/pom.xml b/serving/pom.xml index 2bec449afb0..248f453d36c 100644 --- a/serving/pom.xml +++ b/serving/pom.xml @@ -100,9 +100,9 @@ true - + - org.lognet + io.github.lognet grpc-spring-boot-starter diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 6d862ec32a5..60c0076bce4 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -1,4 +1,4 @@ def pytest_addoption(parser): parser.addoption("--core_url", action="store", default="localhost:6565") - parser.addoption("--serving_url", action="store", default="localhost:6565") - parser.addoption("--allow_dirty", action="store", default="false") + parser.addoption("--serving_url", action="store", default="localhost:6566") + parser.addoption("--allow_dirty", action="store", default="true") diff --git a/tests/e2e/test_e2e.py b/tests/e2e/test_e2e.py index e602ef89369..a529dd1a24b 100644 --- a/tests/e2e/test_e2e.py +++ b/tests/e2e/test_e2e.py @@ -71,8 +71,19 @@ def test_basic(client): # Register feature set client.apply(cust_trans_fs) + # Feast Core needs some time to fully commit the FeatureSet applied + # when there is no existing job yet for the Featureset + time.sleep(15) cust_trans_fs = client.get_feature_set(name="customer_transactions", version=1) + if cust_trans_fs is None: + raise Exception( + "Client cannot retrieve 'customer_transactions' FeatureSet " + "after registration. Either Feast Core does not save the " + "FeatureSet correctly or the client needs to wait longer for FeatureSet " + "to be committed." + ) + offset = random.randint(1000, 100000) # ensure a unique key space is used customer_data = pd.DataFrame( { @@ -88,6 +99,8 @@ def test_basic(client): # Poll serving for feature values until the correct values are returned while True: + time.sleep(1) + response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( @@ -103,8 +116,8 @@ def test_basic(client): "customer_transactions:1:total_transactions", ], ) # type: GetOnlineFeaturesResponse + if response is None: - time.sleep(1) continue returned_daily_transactions = float( @@ -124,7 +137,7 @@ def test_basic(client): @pytest.mark.timeout(300) def test_all_types(client): - all_types_fs = client.get_feature_set(name="all_types", version="1") + all_types_fs = client.get_feature_set(name="all_types", version=1) if all_types_fs is None: # Register new feature set if it doesnt exist @@ -152,7 +165,19 @@ def test_all_types(client): # Register feature set client.apply(all_types_fs) - all_types_fs = client.get_feature_set(name="all_types", version="1") + + # Feast Core needs some time to fully commit the FeatureSet applied + # when there is no existing job yet for the Featureset + time.sleep(10) + all_types_fs = client.get_feature_set(name="all_types", version=1) + + if all_types_fs is None: + raise Exception( + "Client cannot retrieve 'all_types_fs' FeatureSet " + "after registration. Either Feast Core does not save the " + "FeatureSet correctly or the client needs to wait longer for FeatureSet " + "to be committed." + ) all_types_df = pd.DataFrame( { @@ -205,9 +230,12 @@ def test_all_types(client): # Ingest user embedding data all_types_fs.ingest(dataframe=all_types_df) + time.sleep(3) # Poll serving for feature values until the correct values are returned while True: + time.sleep(1) + response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( @@ -233,7 +261,6 @@ def test_all_types(client): ) # type: GetOnlineFeaturesResponse if response is None: - time.sleep(1) continue returned_float_list = ( @@ -268,10 +295,21 @@ def test_large_volume(client): # Register feature set client.apply(cust_trans_fs) + # Feast Core needs some time to fully commit the FeatureSet applied + # when there is no existing job yet for the Featureset + time.sleep(10) cust_trans_fs = client.get_feature_set( name="customer_transactions_large", version=1 ) + if cust_trans_fs is None: + raise Exception( + "Client cannot retrieve 'customer_transactions' FeatureSet " + "after registration. Either Feast Core does not save the " + "FeatureSet correctly or the client needs to wait longer for FeatureSet " + "to be committed." + ) + offset = random.randint(1000000, 10000000) # ensure a unique key space customer_data = pd.DataFrame( { @@ -289,6 +327,8 @@ def test_large_volume(client): # Poll serving for feature values until the correct values are returned while True: + time.sleep(1) + response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( @@ -306,7 +346,6 @@ def test_large_volume(client): ) # type: GetOnlineFeaturesResponse if response is None: - time.sleep(1) continue returned_daily_transactions = float( From 2d3483f349dc0de68dd26e09f381b8cdd144c91e Mon Sep 17 00:00:00 2001 From: zhilingc Date: Tue, 29 Oct 2019 18:26:51 +0800 Subject: [PATCH 08/12] Add validation for kafka source --- .../main/java/feast/core/model/Source.java | 31 +++++++++++++------ .../feast/core/service/SpecServiceTest.java | 6 ++-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index c63bc413876..0d559078503 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -6,6 +6,7 @@ import feast.core.SourceProto.KafkaSourceConfig; import feast.core.SourceProto.Source.Builder; import feast.core.SourceProto.SourceType; +import io.grpc.Status; import java.util.Set; import javax.persistence.Column; import javax.persistence.Entity; @@ -43,11 +44,22 @@ public Source() { super(); } - public Source(SourceType type, KafkaSourceConfig config, boolean isUseDefault) { + public Source(SourceType type, KafkaSourceConfig config) { + if (config.getBootstrapServers().isEmpty() || config.getTopic().isEmpty()) { + throw Status.INVALID_ARGUMENT.withDescription( + "Unsupported source options. Kafka source requires bootstrap servers and topic to be specified.") + .asRuntimeException(); + } this.type = type.toString(); this.bootstrapServers = config.getBootstrapServers(); this.topics = config.getTopic(); - this.useDefault = isUseDefault; + this.useDefault = false; + this.id = generateId(); + } + + public Source(SourceType type) { + this.type = type.toString(); + this.useDefault = true; this.id = generateId(); } @@ -59,18 +71,19 @@ public Source(SourceType type, KafkaSourceConfig config, boolean isUseDefault) { */ public static Source fromProto(SourceProto.Source sourceProto) { if (sourceProto.equals(SourceProto.Source.getDefaultInstance())) { - Source source = new Source(SourceType.UNRECOGNIZED, - KafkaSourceConfig.getDefaultInstance(), true); + Source source = new Source(SourceType.UNRECOGNIZED); source.setUseDefault(true); return source; } switch (sourceProto.getType()) { case KAFKA: - return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig(), false); + return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig()); case UNRECOGNIZED: default: - throw new IllegalArgumentException("Unsupported source type. Only [KAFKA] is supported."); + throw Status.INVALID_ARGUMENT + .withDescription("Unsupported source type. Only [KAFKA] is supported.") + .asRuntimeException(); } } @@ -141,9 +154,9 @@ public boolean isUseDefault() { } /** - * Override equality for sources. - * useDefault is always compared first; if both sources are using the default feature source, - * they will be equal. If not they will be compared based on their type-specific options. + * Override equality for sources. useDefault is always compared first; if both sources are using + * the default feature source, they will be equal. If not they will be compared based on their + * type-specific options. * * @param other other Source * @return boolean equal diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index 4987ac158ae..fde90f2d422 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -282,7 +282,7 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f2-features").build(), false); + .setTopic("feast-f2-features").build()); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = newDummyFeatureSet("f2", 1) @@ -306,7 +306,7 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() throws InvalidProtocolBufferException { Source updatedSource = new Source(SourceType.KAFKA, KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f1-features").build(), false); + .setTopic("feast-f1-features").build()); when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = featureSets.get(2).toProto().toBuilder() @@ -367,7 +367,7 @@ private FeatureSet newDummyFeatureSet(String name, int version) { Field entity = new Field(name, "entity", Enum.STRING); return new FeatureSet(name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), new Source( - SourceType.KAFKA, kafkaFeatureSourceOptions, false)); + SourceType.KAFKA, kafkaFeatureSourceOptions)); } private Store newDummyStore(String name) { From 6c8a07a7289e61379d6fe28ae476e4dd731bbb16 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Wed, 30 Oct 2019 15:16:34 +0800 Subject: [PATCH 09/12] Remove feature stream service and instantiate it at initialisation --- .../java/feast/core/grpc/CoreServiceImpl.java | 7 +- .../job/direct/DirectRunnerJobManager.java | 25 +++++- .../main/java/feast/core/model/Source.java | 29 ++---- .../core/service/FeatureStreamService.java | 53 ----------- .../core/service/JobCoordinatorService.java | 2 +- .../java/feast/core/service/SpecService.java | 16 ++-- .../java/feast/core/stream/FeatureStream.java | 21 ----- .../core/stream/kafka/KafkaFeatureStream.java | 81 ----------------- .../kafka/KafkaFeatureStreamConfig.java | 54 ----------- .../core/job/ScheduledJobMonitorTest.java | 13 ++- .../service/FeatureStreamServiceTest.java | 90 ------------------- .../service/JobCoordinatorServiceTest.java | 31 ++++--- .../feast/core/service/SpecServiceTest.java | 40 +++------ .../stream/kafka/KafkaFeatureStreamTest.java | 61 ------------- 14 files changed, 87 insertions(+), 436 deletions(-) delete mode 100644 core/src/main/java/feast/core/service/FeatureStreamService.java delete mode 100644 core/src/main/java/feast/core/stream/FeatureStream.java delete mode 100644 core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java delete mode 100644 core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java delete mode 100644 core/src/test/java/feast/core/service/FeatureStreamServiceTest.java delete mode 100644 core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index be4baa4278d..18482b5e6d1 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -122,7 +122,7 @@ public void applyFeatureSet( specService .getFeatureSets( GetFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(featureSetName) + .setFeatureSetName(subscription.getName()) .setFeatureSetVersion(subscription.getVersion()) .build()) .getFeatureSetsList()); @@ -130,7 +130,7 @@ public void applyFeatureSet( if (!featureSetSpecs.isEmpty() && featureSetSpecs.contains(response.getFeatureSet())) { // We use the request featureSet source because it contains the information // about whether to default to the default feature stream or not - SourceProto.Source source = request.getFeatureSet().getSource(); + SourceProto.Source source = response.getFeatureSet().getSource(); jobCoordinatorService .startOrUpdateJob(Lists.newArrayList(featureSetSpecs), source, store); } @@ -165,6 +165,9 @@ public void updateStore(UpdateStoreRequest request, .getFeatureSetsList() ); } + if (featureSetSpecs.size() == 0) { + return; + } featureSetSpecs.stream() .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) .entrySet() diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index 621874041f4..e7d4b70a90b 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -27,6 +27,7 @@ import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; +import feast.core.model.FeatureSet; import feast.core.model.JobInfo; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; @@ -36,6 +37,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.PipelineResult; @@ -111,12 +113,29 @@ private ImportOptions getPipelineOptions(List featureSetSpecs, } /** - * Unsupported. + * Stops an existing job and restarts a new job in its place as a proxy for job updates. + * Note that since we do not maintain a consumer group across the two jobs and the old job + * is not drained, some data may be lost. + * + * As a rule of thumb, direct jobs in feast should only be used for testing. + * + * @param jobInfo jobInfo of target job to change + * @return jobId of the job */ @Override public String updateJob(JobInfo jobInfo) { - throw new UnsupportedOperationException( - "DirectRunner does not support job updates. To make changes to the worker, stop the existing job and rerun ingestion."); + String jobId = jobInfo.getExtId(); + abortJob(jobId); + try { + List featureSetSpecs = new ArrayList<>(); + for (FeatureSet featureSet : jobInfo.getFeatureSets()) { + featureSetSpecs.add(featureSet.toProto()); + } + startJob(jobId, featureSetSpecs, jobInfo.getStore().toProto()); + } catch (JobExecutionException | InvalidProtocolBufferException e) { + throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); + } + return jobId; } /** diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index 0d559078503..0f545e8cf20 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -38,13 +38,13 @@ public class Source { private String topics; @Column(name = "use_default") - private boolean useDefault; + private boolean isDefault; public Source() { super(); } - public Source(SourceType type, KafkaSourceConfig config) { + public Source(SourceType type, KafkaSourceConfig config, boolean isDefault) { if (config.getBootstrapServers().isEmpty() || config.getTopic().isEmpty()) { throw Status.INVALID_ARGUMENT.withDescription( "Unsupported source options. Kafka source requires bootstrap servers and topic to be specified.") @@ -53,13 +53,7 @@ public Source(SourceType type, KafkaSourceConfig config) { this.type = type.toString(); this.bootstrapServers = config.getBootstrapServers(); this.topics = config.getTopic(); - this.useDefault = false; - this.id = generateId(); - } - - public Source(SourceType type) { - this.type = type.toString(); - this.useDefault = true; + this.isDefault = isDefault; this.id = generateId(); } @@ -71,14 +65,12 @@ public Source(SourceType type) { */ public static Source fromProto(SourceProto.Source sourceProto) { if (sourceProto.equals(SourceProto.Source.getDefaultInstance())) { - Source source = new Source(SourceType.UNRECOGNIZED); - source.setUseDefault(true); - return source; + return new Source(); } switch (sourceProto.getType()) { case KAFKA: - return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig()); + return new Source(sourceProto.getType(), sourceProto.getKafkaSourceConfig(), false); case UNRECOGNIZED: default: throw Status.INVALID_ARGUMENT @@ -149,12 +141,12 @@ public SourceType getType() { * * @return boolean indicating whether this feature set source uses defaults. */ - public boolean isUseDefault() { - return useDefault; + public boolean isDefault() { + return isDefault; } /** - * Override equality for sources. useDefault is always compared first; if both sources are using + * Override equality for sources. isDefault is always compared first; if both sources are using * the default feature source, they will be equal. If not they will be compared based on their * type-specific options. * @@ -162,7 +154,7 @@ public boolean isUseDefault() { * @return boolean equal */ public boolean equalTo(Source other) { - if (other.useDefault && useDefault) { + if (other.isDefault && isDefault) { return true; } @@ -181,9 +173,6 @@ public boolean equalTo(Source other) { } private String generateId() { - if (useDefault) { - return "DEFAULT"; - } switch (SourceType.valueOf(type)) { case KAFKA: return String.format("KAFKA/%s/%s", bootstrapServers, topics); diff --git a/core/src/main/java/feast/core/service/FeatureStreamService.java b/core/src/main/java/feast/core/service/FeatureStreamService.java deleted file mode 100644 index 2532de13e3a..00000000000 --- a/core/src/main/java/feast/core/service/FeatureStreamService.java +++ /dev/null @@ -1,53 +0,0 @@ -package feast.core.service; - -import feast.core.SourceProto.SourceType; -import feast.core.config.FeastProperties; -import feast.core.model.FeatureSet; -import feast.core.model.Source; -import feast.core.stream.FeatureStream; -import feast.core.stream.kafka.KafkaFeatureStream; -import feast.core.stream.kafka.KafkaFeatureStreamConfig; -import java.util.Map; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.stereotype.Service; - -/** - * Facilitates management of the feature stream. - */ -@Slf4j -@Service -public class FeatureStreamService { - private final SourceType defaultStreamType; - private final Map defaultOptions; - - @Autowired - public FeatureStreamService(FeastProperties feastProperties) { - this.defaultOptions = feastProperties.getStream().getOptions(); - this.defaultStreamType = SourceType.valueOf(feastProperties.getStream().getType().toUpperCase()); - } - - /** - * Provisions a topic given the featureSet. If the topic already exists, and was not created by - * feast, an error will be thrown. - * - * @param featureSet featureSet to create the topic for - * @return Source updated with provisioned feature source - */ - public Source setUpSource(FeatureSet featureSet) { - if (featureSet.getSource().isUseDefault()){ - featureSet.getSource().setType(defaultStreamType.toString()); - } - - switch (featureSet.getSource().getType()) { - case KAFKA: - FeatureStream featureStream = new KafkaFeatureStream(KafkaFeatureStreamConfig.fromMap(defaultOptions)); - return featureStream.provision(featureSet); - case UNRECOGNIZED: - default: - throw new IllegalArgumentException( - String.format("Invalid source %s provided, only source of type [KAFKA] allowed", - featureSet.getSource().getType())); - } - } -} diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 116f75eea4e..2cd5cd6778a 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -66,7 +66,7 @@ public JobInfo startOrUpdateJob(List featureSetSpecs, return updateJob(job.get(), featureSetSpecs, store); } } else { - return startJob(createJobId(source.getType().toString().toLowerCase(), store.getName()), + return startJob(createJobId(source.getId(), store.getName()), featureSetSpecs, sourceSpec, store); } } diff --git a/core/src/main/java/feast/core/service/SpecService.java b/core/src/main/java/feast/core/service/SpecService.java index ea83eee262b..b7862c2dd91 100644 --- a/core/src/main/java/feast/core/service/SpecService.java +++ b/core/src/main/java/feast/core/service/SpecService.java @@ -29,6 +29,7 @@ import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.dao.FeatureSetRepository; import feast.core.dao.StoreRepository; @@ -56,8 +57,7 @@ public class SpecService { private final FeatureSetRepository featureSetRepository; private final StoreRepository storeRepository; - private final FeatureStreamService featureStreamService; - private final JobCoordinatorService jobCoordinatorService; + private final Source defaultSource; private final Pattern versionPattern = Pattern .compile("^(?[\\>\\<\\=]{0,2})(?\\d*)$"); @@ -66,12 +66,10 @@ public class SpecService { public SpecService( FeatureSetRepository featureSetRepository, StoreRepository storeRepository, - FeatureStreamService featureStreamService, - JobCoordinatorService jobCoordinatorService) { + Source defaultSource) { this.featureSetRepository = featureSetRepository; this.storeRepository = storeRepository; - this.featureStreamService = featureStreamService; - this.jobCoordinatorService = jobCoordinatorService; + this.defaultSource = defaultSource; } /** @@ -181,9 +179,9 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) .build(); } FeatureSet featureSet = FeatureSet.fromProto(newFeatureSetSpec); - Source updatedSource = featureStreamService.setUpSource(featureSet); - featureSet.setSource(updatedSource); - + if (newFeatureSetSpec.getSource() == SourceProto.Source.getDefaultInstance()) { + featureSet.setSource(defaultSource); + } featureSetRepository.saveAndFlush(featureSet); return ApplyFeatureSetResponse.newBuilder() diff --git a/core/src/main/java/feast/core/stream/FeatureStream.java b/core/src/main/java/feast/core/stream/FeatureStream.java deleted file mode 100644 index f6881998d9f..00000000000 --- a/core/src/main/java/feast/core/stream/FeatureStream.java +++ /dev/null @@ -1,21 +0,0 @@ -package feast.core.stream; - -import feast.core.SourceProto.SourceType; -import feast.core.model.FeatureSet; -import feast.core.model.Source; - -public interface FeatureStream { - - /** - * Gets the type of feature stream - * @return type of feature stream - */ - SourceType getType(); - - /** - * Provisions a sink for the feature producer to write to. For the given topic name. - * - * - */ - Source provision(FeatureSet featureSet) throws RuntimeException; -} diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java deleted file mode 100644 index 487e15e7507..00000000000 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStream.java +++ /dev/null @@ -1,81 +0,0 @@ -package feast.core.stream.kafka; - -import com.google.common.base.Strings; -import com.google.protobuf.InvalidProtocolBufferException; -import feast.core.SourceProto.KafkaSourceConfig; -import feast.core.SourceProto.SourceType; -import feast.core.model.FeatureSet; -import feast.core.model.Source; -import feast.core.stream.FeatureStream; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ExecutionException; -import lombok.AllArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.apache.kafka.clients.admin.AdminClient; -import org.apache.kafka.clients.admin.AdminClientConfig; -import org.apache.kafka.clients.admin.CreateTopicsResult; -import org.apache.kafka.clients.admin.NewTopic; -import org.apache.kafka.common.errors.TopicExistsException; - -@Slf4j -@AllArgsConstructor -public class KafkaFeatureStream implements FeatureStream { - - private static SourceType FEATURE_STREAM_TYPE = SourceType.KAFKA; - - private KafkaFeatureStreamConfig defaultConfig; - - @Override - public SourceType getType() { - return FEATURE_STREAM_TYPE; - } - - @Override - public Source provision(FeatureSet featureSet) throws RuntimeException { - - Source source = featureSet.getSource(); - KafkaSourceConfig config = KafkaSourceConfig.getDefaultInstance(); - String bootstrapServers = defaultConfig.getBootstrapServers(); - String topicName = defaultConfig.getTopic(); - if (!source.isUseDefault()) { - try { - config = (KafkaSourceConfig) source.getOptions(); - topicName = config.getTopic(); - bootstrapServers = config.getBootstrapServers(); - } catch (NullPointerException e) { - throw new RuntimeException(String - .format("Unable to retrieve bootstrap servers for featureSet %s", featureSet.getName()), - e); - } - } - - Map map = new HashMap<>(); - map.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); - map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "1000"); - AdminClient client = AdminClient.create(map); - - NewTopic newTopic = new NewTopic(topicName, - defaultConfig.getTopicNumPartitions(), - defaultConfig.getTopicReplicationFactor()); - CreateTopicsResult createTopicsResult = client.createTopics(Collections.singleton(newTopic)); - try { - createTopicsResult.values().get(topicName).get(); - } catch (InterruptedException | ExecutionException e) { - if (e.getCause().getClass().equals(TopicExistsException.class)) { - log.warn(Strings - .lenientFormat( - "Unable to create topic %s in the feature stream, topic already exists, using existing topic.", - topicName)); - } else { - throw new RuntimeException(e.getMessage(), e); - } - } - - assert config != null; - source.setTopics(topicName); - source.setBootstrapServers(bootstrapServers); - return source; - } -} diff --git a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java b/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java deleted file mode 100644 index eb9177bbec9..00000000000 --- a/core/src/main/java/feast/core/stream/kafka/KafkaFeatureStreamConfig.java +++ /dev/null @@ -1,54 +0,0 @@ -package feast.core.stream.kafka; - -import feast.core.util.TypeConversion; -import java.util.Map; -import javax.naming.ConfigurationException; -import lombok.Value; - -@Value -public class KafkaFeatureStreamConfig { - - private static String NUM_PARTITIONS_DEFAULT = "1"; - private static String REPLICATION_FACTOR_DEFAULT = "1"; - - /** - * Feast stream kafka bootstrap servers - */ - String bootstrapServers; - - /** - * Feast stream topic name - */ - String topic; - - /** - * Number of partitions per topic - */ - int topicNumPartitions; - - /** - * Replication factor of each topic created - */ - short topicReplicationFactor; - - /** - * Constructor from a map - * - * @param optionMap map containing the kafka feature stream configuration in key:value - * format. The options should contain: - * 1. bootstrapServers: optional, default kafka bootstrap servers, defaults to "KAFKA:9092"
- * 2. topicPrefix: optional, topic prefix, defaults to "feast"
- * 3. partitions: optional, number of partitions per topic created, defaults to 10
- * 4. replicationFactor: optional, replication factor of topics created, defaults to 2
- * - * @return KafkaFeatureStreamConfig object - */ - public static KafkaFeatureStreamConfig fromMap(Map optionMap) { - String bootstrapServers = optionMap.getOrDefault("bootstrapServers", "KAFKA:9092"); - String topicPrefix = optionMap.getOrDefault("topic", "feast-features"); - int numPartitions = Integer.parseInt(optionMap.getOrDefault("partitions", NUM_PARTITIONS_DEFAULT)); - short replicationFactor = Short.parseShort(optionMap.getOrDefault("replicationFactor", REPLICATION_FACTOR_DEFAULT)); - - return new KafkaFeatureStreamConfig(bootstrapServers, topicPrefix, numPartitions, replicationFactor); - } -} diff --git a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java index 1f4ffcf9828..631e5a257a9 100644 --- a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java +++ b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java @@ -26,6 +26,8 @@ import com.google.common.collect.Lists; import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; import feast.core.dao.JobInfoRepository; import feast.core.dao.MetricsRepository; import feast.core.model.JobInfo; @@ -48,9 +50,11 @@ public class ScheduledJobMonitorTest { ScheduledJobMonitor scheduledJobMonitor; - @Mock JobMonitor jobMonitor; + @Mock + JobMonitor jobMonitor; - @Mock JobInfoRepository jobInfoRepository; + @Mock + JobInfoRepository jobInfoRepository; @Before public void setUp() { @@ -60,12 +64,15 @@ public void setUp() { @Test public void getJobStatus_shouldUpdateJobInfoForRunningJob() { + Source source = new Source(SourceType.KAFKA, + KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") + .setTopic("feast-topic").build(), true); JobInfo job = new JobInfo( "jobId", "extId1", "DataflowRunner", - Source.fromProto(SourceProto.Source.getDefaultInstance()), + source, new Store(), Collections.emptyList(), Collections.emptyList(), diff --git a/core/src/test/java/feast/core/service/FeatureStreamServiceTest.java b/core/src/test/java/feast/core/service/FeatureStreamServiceTest.java deleted file mode 100644 index 2603000fff6..00000000000 --- a/core/src/test/java/feast/core/service/FeatureStreamServiceTest.java +++ /dev/null @@ -1,90 +0,0 @@ -package feast.core.service; - -import static org.mockito.MockitoAnnotations.initMocks; - -import feast.core.stream.FeatureStream; -import org.junit.Before; -import org.junit.Rule; -import org.junit.rules.ExpectedException; -import org.mockito.Mock; - -public class FeatureStreamServiceTest { - - @Mock - private FeatureStream featureStream; - - @Rule - public final ExpectedException expectedException = ExpectedException.none(); - - @Before - public void setUp() { - initMocks(this); - } - -// @Test -// public void shouldProvisionTopicGivenFeatureSet() { -// String topicName = "feast-featureSet-topic"; -// FeatureSet featureSet = new FeatureSet(); -// featureSet.setName("featureSet"); -// when(featureStream.generateTopicName("featureSet")).thenReturn(topicName); -// -// FeatureStreamService featureStreamService = new FeatureStreamService( -// featureStreamTopicRepository, featureStream); -// FeatureStreamTopic actual = featureStreamService.provisionTopic(featureSet); -// -// FeatureStreamTopic expectedTopic = new FeatureStreamTopic(topicName, -// Lists.newArrayList(featureSet)); -// verify(featureStream, times(1)).provisionTopic("feast-featureSet-topic"); -// verify(featureStreamTopicRepository, times(1)).saveAndFlush(expectedTopic); -// assertThat(actual, equalTo(expectedTopic)); -// } -// -// @Test -// public void shouldUpdateRecordIfSelfCreatedTopicExistsGivenFeatureSet() { -// String topicName = "feast-featureSet-topic"; -// FeatureSet oldFeatureSet = new FeatureSet(); -// oldFeatureSet.setName("featureSet"); -// oldFeatureSet.setVersion(1); -// -// FeatureSet newFeatureSet = new FeatureSet(); -// newFeatureSet.setName("featureSet"); -// oldFeatureSet.setVersion(2); -// -// FeatureStreamTopic originalTopic = new FeatureStreamTopic(topicName, -// Lists.newArrayList(oldFeatureSet)); -// -// when(featureStream.generateTopicName("featureSet")).thenReturn(topicName); -// doThrow(new TopicExistsException()).when(featureStream).provisionTopic(topicName); -// when(featureStreamTopicRepository.findById(topicName)).thenReturn(Optional.of(originalTopic)); -// FeatureStreamService featureStreamService = new FeatureStreamService( -// featureStreamTopicRepository, featureStream); -// -// FeatureStreamTopic expectedTopic = new FeatureStreamTopic(topicName, -// Lists.newArrayList(oldFeatureSet, newFeatureSet)); -// -// FeatureStreamTopic actual = featureStreamService.provisionTopic(newFeatureSet); -// verify(featureStreamTopicRepository, times(1)).saveAndFlush(expectedTopic); -// -// assertThat(actual, equalTo(expectedTopic)); -// } -// -// @Test -// public void shouldThrowErrorIfTopicExistsGivenFeatureSet() { -// String topicName = "feast-featureSet-topic"; -// -// FeatureSet featureSet = new FeatureSet(); -// featureSet.setName("featureSet"); -// -// when(featureStream.generateTopicName("featureSet")).thenReturn(topicName); -// doThrow(new TopicExistsException()).when(featureStream).provisionTopic(topicName); -// when(featureStreamTopicRepository.findById(topicName)).thenReturn(Optional.empty()); -// -// FeatureStreamService featureStreamService = new FeatureStreamService( -// featureStreamTopicRepository, featureStream); -// -// expectedException.expect(TopicExistsException.class); -// featureStreamService.provisionTopic(featureSet); -// } - - -} \ No newline at end of file diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 528b2f7d6d4..47d3e8486da 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -9,6 +9,8 @@ import com.google.common.collect.Lists; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; @@ -37,7 +39,7 @@ public class JobCoordinatorServiceTest { private JobCoordinatorService jobCoordinatorService; private JobInfo existingJob; - + private Source defaultSource; @Before public void setUp() { @@ -48,15 +50,19 @@ public void setUp() { .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build()); + defaultSource = new Source(SourceType.KAFKA, + KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092").setTopic("feast-topic") + .build(), true); FeatureSet featureSet1 = new FeatureSet(); featureSet1.setId("featureSet1:1"); + featureSet1.setSource(defaultSource); FeatureSet featureSet2 = new FeatureSet(); featureSet2.setId("featureSet2:1"); - Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); - existingJob = new JobInfo("extid", "name", "DirectRunner", source, store, + featureSet2.setSource(defaultSource); + existingJob = new JobInfo("extid", "name", "DirectRunner", defaultSource, store, Lists.newArrayList(featureSet1, featureSet2), Lists.newArrayList(), JobStatus.RUNNING); - when(jobInfoRepository.findBySourceIdAndStoreName("DEFAULT", "SERVING")) + when(jobInfoRepository.findBySourceIdAndStoreName(defaultSource.getId(), "SERVING")) .thenReturn(Lists.newArrayList(existingJob)); jobCoordinatorService = new JobCoordinatorService(jobInfoRepository, jobManager); @@ -68,10 +74,12 @@ public void shouldNotStartOrUpdateJobIfNoChanges() { FeatureSetSpec featureSet1 = FeatureSetSpec.newBuilder() .setName("featureSet1") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); FeatureSetSpec featureSet2 = FeatureSetSpec.newBuilder() .setName("featureSet2") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -80,7 +88,7 @@ public void shouldNotStartOrUpdateJobIfNoChanges() { .build(); JobInfo jobInfo = jobCoordinatorService .startOrUpdateJob(Lists.newArrayList(featureSet1, featureSet2), - SourceProto.Source.getDefaultInstance(), store); + defaultSource.toProto(), store); assertThat(jobInfo, equalTo(existingJob)); } @@ -89,6 +97,7 @@ public void shouldStartJobIfNotExists() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -104,12 +113,12 @@ public void shouldStartJobIfNotExists() { when(jobManager.getRunnerType()).thenReturn(Runner.DIRECT); FeatureSet expectedFeatureSet = new FeatureSet(); expectedFeatureSet.setId("featureSet:1"); - Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); JobInfo expectedJobInfo = new JobInfo(jobId, extJobId, "DirectRunner", - source, Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), JobStatus.RUNNING); + defaultSource, Store.fromProto(store), Lists.newArrayList(expectedFeatureSet), + JobStatus.RUNNING); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + .startOrUpdateJob(Lists.newArrayList(featureSet), defaultSource.toProto(), store); assertThat(jobInfo, equalTo(expectedJobInfo)); } @@ -119,6 +128,7 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { FeatureSetSpec featureSet = FeatureSetSpec.newBuilder() .setName("featureSet1") .setVersion(1) + .setSource(defaultSource.toProto()) .build(); StoreProto.Store store = StoreProto.Store.newBuilder() .setName("SERVING") @@ -126,16 +136,15 @@ public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) .build(); String extId = "extId123"; - Source source = Source.fromProto(SourceProto.Source.getDefaultInstance()); JobInfo modifiedJob = new JobInfo(existingJob.getId(), existingJob.getExtId(), - existingJob.getRunner(), source, Store.fromProto(store), + existingJob.getRunner(), defaultSource, Store.fromProto(store), Lists.newArrayList(FeatureSet.fromProto(featureSet)), JobStatus.RUNNING); when(jobManager.updateJob(modifiedJob)).thenReturn(extId); JobInfo expectedJobInfo = modifiedJob; expectedJobInfo.setExtId(extId); when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); JobInfo jobInfo = jobCoordinatorService - .startOrUpdateJob(Lists.newArrayList(featureSet), SourceProto.Source.getDefaultInstance(), + .startOrUpdateJob(Lists.newArrayList(featureSet), defaultSource.toProto(), store); assertThat(jobInfo, equalTo(expectedJobInfo)); } diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index fde90f2d422..63f15c8bd28 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -72,26 +72,26 @@ public class SpecServiceTest { @Mock private StoreRepository storeRepository; - @Mock - private FeatureStreamService featureStreamService; - - @Mock - private JobCoordinatorService jobCoordinatorService; - @Rule public final ExpectedException expectedException = ExpectedException.none(); private SpecService specService; private List featureSets; private List stores; + private Source defaultSource; @Before public void setUp() { initMocks(this); + defaultSource = new Source(SourceType.KAFKA, + KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092").setTopic("my-topic") + .build(), true); + FeatureSet featureSet1v1 = newDummyFeatureSet("f1", 1); FeatureSet featureSet1v2 = newDummyFeatureSet("f1", 2); FeatureSet featureSet1v3 = newDummyFeatureSet("f1", 3); FeatureSet featureSet2v1 = newDummyFeatureSet("f2", 1); + featureSets = Arrays.asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1); when(featureSetRepository.findAll()) .thenReturn(featureSets); @@ -111,8 +111,9 @@ public void setUp() { when(storeRepository.findById("SERVING")).thenReturn(Optional.of(store1)); when(storeRepository.findById("NOTFOUND")).thenReturn(Optional.empty()); - specService = new SpecService(featureSetRepository, storeRepository, featureStreamService, - jobCoordinatorService); + + + specService = new SpecService(featureSetRepository, storeRepository, defaultSource); } @Test @@ -205,7 +206,7 @@ public void shouldGetLatestFeatureSetGivenLatestVersionFilter() GetFeatureSetsResponse actual = specService .getFeatureSets( Filter.newBuilder().setFeatureSetName("f1").setFeatureSetVersion("latest").build()); - List expectedFeatureSets = featureSets.subList(2,3); + List expectedFeatureSets = featureSets.subList(2, 3); List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { FeatureSetSpec toProto = expectedFeatureSet.toProto(); @@ -280,11 +281,6 @@ public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHa public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() throws InvalidProtocolBufferException { when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); - Source updatedSource = new Source(SourceType.KAFKA, - KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f2-features").build()); - when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) - .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = newDummyFeatureSet("f2", 1) .toProto() .toBuilder() @@ -295,7 +291,7 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); FeatureSetSpec expected = incomingFeatureSet.toBuilder() .setVersion(1) - .setSource(updatedSource.toProto()) + .setSource(defaultSource.toProto()) .build(); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); assertThat(applyFeatureSetResponse.getFeatureSet(), equalTo(expected)); @@ -304,18 +300,13 @@ public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() @Test public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() throws InvalidProtocolBufferException { - Source updatedSource = new Source(SourceType.KAFKA, - KafkaSourceConfig.newBuilder().setBootstrapServers("kafka:9092") - .setTopic("feast-f1-features").build()); - when(featureStreamService.setUpSource(ArgumentMatchers.any(FeatureSet.class))) - .thenReturn(updatedSource); FeatureSetSpec incomingFeatureSet = featureSets.get(2).toProto().toBuilder() .clearVersion() .addFeatures(FeatureSpec.newBuilder().setName("feature2").setValueType(Enum.STRING)) .build(); FeatureSetSpec expected = incomingFeatureSet.toBuilder() .setVersion(4) - .setSource(updatedSource.toProto()) + .setSource(defaultSource.toProto()) .build(); ApplyFeatureSetResponse applyFeatureSetResponse = specService .applyFeatureSet(incomingFeatureSet); @@ -359,15 +350,10 @@ public void shouldDoNothingIfNoChange() throws InvalidProtocolBufferException { } private FeatureSet newDummyFeatureSet(String name, int version) { - KafkaSourceConfig kafkaFeatureSourceOptions = - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .build(); Field feature = new Field(name, "feature", Enum.INT64); Field entity = new Field(name, "entity", Enum.STRING); return new FeatureSet(name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), - new Source( - SourceType.KAFKA, kafkaFeatureSourceOptions)); + defaultSource); } private Store newDummyStore(String name) { diff --git a/core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java b/core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java deleted file mode 100644 index a75f3848878..00000000000 --- a/core/src/test/java/feast/core/stream/kafka/KafkaFeatureStreamTest.java +++ /dev/null @@ -1,61 +0,0 @@ -package feast.core.stream.kafka; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.core.IsEqual.equalTo; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import feast.core.exception.TopicExistsException; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ExecutionException; -import org.apache.kafka.clients.admin.AdminClient; -import org.apache.kafka.clients.admin.CreateTopicsResult; -import org.apache.kafka.common.KafkaFuture; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.mockito.ArgumentMatchers; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; - -public class KafkaFeatureStreamTest { - -// @Rule -// public ExpectedException expectedException = ExpectedException.none(); -// -// @Mock -// private AdminClient kafkaAdminClient; -// @Mock -// private CreateTopicsResult createTopicsResult; -// -// private KafkaFeatureStream kafkaFeatureStream; -// -// @Before -// public void setUp() { -// MockitoAnnotations.initMocks(this); -// KafkaFeatureStreamConfig config = new KafkaFeatureStreamConfig("localhost:8121", "feast", 1, -// (short) 2); -// kafkaFeatureStream = new KafkaFeatureStream(kafkaAdminClient, config); -// } -// -// @Test -// public void shouldThrowTopicExistsExceptionIfTopicExists() -// throws ExecutionException, InterruptedException { -// -// KafkaFuture result = mock(KafkaFuture.class); -// -// when(result.get()).thenThrow(new org.apache.kafka.common.errors.TopicExistsException("")); -// Map> resultMap = new HashMap<>(); -// resultMap.put("my-topic", result); -// when(createTopicsResult.values()).thenReturn(resultMap); -// doReturn(createTopicsResult).when(kafkaAdminClient) -// .createTopics(ArgumentMatchers.anyCollection()); -// -// expectedException.expect(TopicExistsException.class); -// kafkaFeatureStream.provisionTopic("my-topic"); -// } -} - From 2fd027e6a09537b5ca4bf033dff7492571522de2 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Wed, 30 Oct 2019 15:33:08 +0800 Subject: [PATCH 10/12] Update end-to-end test config --- .prow/scripts/test-end-to-end.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index 89ddd826e33..2a4cb9e710c 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -97,6 +97,7 @@ feast: stream: type: kafka options: + topic: feast-features bootstrapServers: localhost:9092 replicationFactor: 1 partitions: 1 From 3163a69627d4e2a0a136ac21e84f1b2b294ce33f Mon Sep 17 00:00:00 2001 From: zhilingc Date: Wed, 30 Oct 2019 15:41:15 +0800 Subject: [PATCH 11/12] Add DefaultSource bean --- .../core/config/FeatureStreamConfig.java | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 core/src/main/java/feast/core/config/FeatureStreamConfig.java diff --git a/core/src/main/java/feast/core/config/FeatureStreamConfig.java b/core/src/main/java/feast/core/config/FeatureStreamConfig.java new file mode 100644 index 00000000000..40344681722 --- /dev/null +++ b/core/src/main/java/feast/core/config/FeatureStreamConfig.java @@ -0,0 +1,65 @@ +package feast.core.config; + +import com.google.common.base.Strings; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; +import feast.core.config.FeastProperties.StreamProperties; +import feast.core.model.Source; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import lombok.extern.slf4j.Slf4j; +import org.apache.kafka.clients.admin.AdminClient; +import org.apache.kafka.clients.admin.AdminClientConfig; +import org.apache.kafka.clients.admin.CreateTopicsResult; +import org.apache.kafka.clients.admin.NewTopic; +import org.apache.kafka.common.errors.TopicExistsException; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Slf4j +@Configuration +public class FeatureStreamConfig { + + @Autowired + @Bean + public Source getDefaultSource(FeastProperties feastProperties) { + StreamProperties streamProperties = feastProperties.getStream(); + SourceType featureStreamType = SourceType + .valueOf(streamProperties.getType().toUpperCase()); + switch (featureStreamType) { + case KAFKA: + String bootstrapServers = streamProperties.getOptions().get("bootstrapServers"); + String topicName = streamProperties.getOptions().get("topic"); + Map map = new HashMap<>(); + map.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); + map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "1000"); + AdminClient client = AdminClient.create(map); + + NewTopic newTopic = new NewTopic(topicName, + Integer.valueOf(streamProperties.getOptions().getOrDefault("numPartitions", "1")), + Short.valueOf(streamProperties.getOptions().getOrDefault("replicationFactor", "1"))); + CreateTopicsResult createTopicsResult = client + .createTopics(Collections.singleton(newTopic)); + try { + createTopicsResult.values().get(topicName).get(); + } catch (InterruptedException | ExecutionException e) { + if (e.getCause().getClass().equals(TopicExistsException.class)) { + log.warn(Strings + .lenientFormat( + "Unable to create topic %s in the feature stream, topic already exists, using existing topic.", + topicName)); + } else { + throw new RuntimeException(e.getMessage(), e); + } + } + KafkaSourceConfig sourceConfig = KafkaSourceConfig.newBuilder() + .setBootstrapServers(bootstrapServers).setTopic(topicName).build(); + return new Source(featureStreamType, sourceConfig, true); + default: + throw new RuntimeException("Unsupported source stream, only [KAFKA] is supported"); + } + } +} From cc41c5d77245ba6fbc8f83172484df9cb272cac6 Mon Sep 17 00:00:00 2001 From: zhilingc Date: Wed, 30 Oct 2019 16:27:18 +0800 Subject: [PATCH 12/12] Change column name to match param name --- core/src/main/java/feast/core/model/Source.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index 0f545e8cf20..94515629cfe 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -37,7 +37,7 @@ public class Source { @Column(name = "topics") private String topics; - @Column(name = "use_default") + @Column(name = "is_default") private boolean isDefault; public Source() {