From f2d4db5bbdb5669ae95cb4a45fefb5de09d4c9ed Mon Sep 17 00:00:00 2001 From: Chen Zhiling Date: Tue, 17 Dec 2019 17:15:44 +0800 Subject: [PATCH 01/18] Async job management (#361) * Async job management * Add feature set status for safe ingestion * Make job update timeout configurable * Set job update timeout * Change JobInfo to Job, move Job object instantiation to JobManagers * Increase kafka wait time * Remove Info from method * Update feature set yamls to follow new format * Change toSpec to toProto, refactor job start signature, change Apply to take full FeatureSet object * Erase all traces of jobInfo * Remove status and created timestamp from constructor * Remove queue buffer limit --- .prow/scripts/test-end-to-end-batch.sh | 6 +- .prow/scripts/test-end-to-end.sh | 6 +- .../feast/core/config/FeastProperties.java | 8 + .../java/feast/core/config/JobConfig.java | 56 +-- .../feast/core/dao/FeatureSetRepository.java | 6 +- ...InfoRepository.java => JobRepository.java} | 10 +- .../feast/core/dao/MetricsRepository.java | 2 +- .../SourceRepository.java} | 15 +- .../java/feast/core/grpc/CoreServiceImpl.java | 69 +--- .../main/java/feast/core/job/JobManager.java | 28 +- .../java/feast/core/job/JobUpdateTask.java | 213 +++++++++++ .../feast/core/job/ScheduledJobMonitor.java | 87 ----- .../core/job/dataflow/DataflowJobManager.java | 106 ++++-- .../core/job/dataflow/DataflowJobMonitor.java | 77 ---- .../job/dataflow/DataflowJobStateMapper.java | 2 +- .../core/job/direct/DirectJobStateMapper.java | 2 +- .../job/direct/DirectRunnerJobManager.java | 54 ++- .../job/direct/DirectRunnerJobMonitor.java | 43 --- .../core/model/AbstractTimestampEntity.java | 7 + .../java/feast/core/model/FeatureSet.java | 70 ++-- .../core/model/{JobInfo.java => Job.java} | 8 +- .../main/java/feast/core/model/Metrics.java | 6 +- .../core/service/JobCoordinatorService.java | 290 ++++++++------- .../feast/core/service/JobStatusService.java | 80 ----- .../java/feast/core/service/SpecService.java | 27 +- core/src/main/resources/application.yml | 3 + .../feast/core/grpc/CoreServiceImplTest.java | 153 -------- .../java/feast/core/job/JobMatcher.java} | 27 +- .../feast/core/job/JobUpdateTaskTest.java | 274 ++++++++++++++ .../core/job/ScheduledJobMonitorTest.java | 98 ----- .../job/dataflow/DataflowJobManagerTest.java | 71 +++- .../job/dataflow/DataflowJobMonitorTest.java | 123 ------- .../direct/DirectRunnerJobManagerTest.java | 45 ++- .../service/JobCoordinatorServiceTest.java | 337 +++++++++++------- .../feast/core/service/SpecServiceTest.java | 118 +++--- .../ingestion/transform/WriteToStore.java | 18 +- .../java/feast/ingestion/ImportJobTest.java | 87 +++-- .../src/test/java/feast/test/TestUtil.java | 11 +- protos/feast/core/CoreService.proto | 8 +- protos/feast/core/FeatureSet.proto | 29 ++ sdk/python/feast/client.py | 21 +- sdk/python/feast/core/CoreService_pb2.py | 72 ++-- sdk/python/feast/core/CoreService_pb2.pyi | 18 +- sdk/python/feast/core/FeatureSet_pb2.py | 145 +++++++- sdk/python/feast/core/FeatureSet_pb2.pyi | 75 ++++ sdk/python/feast/feature_set.py | 75 +++- sdk/python/feast/loaders/ingest.py | 4 +- sdk/python/tests/feast_core_server.py | 36 +- sdk/python/tests/test_client.py | 138 ++++--- sdk/python/tests/test_stores.py | 4 +- .../serving/service/CachedSpecService.java | 6 +- .../bigquery/BatchRetrievalQueryRunnable.java | 4 +- .../service/CachedSpecServiceTest.java | 11 +- .../all_types_parquet/all_types_parquet.yaml | 61 ++-- tests/e2e/basic-ingest-redis-serving.py | 10 +- tests/e2e/basic/cust_trans_fs.yaml | 21 +- tests/e2e/bq-batch-retrieval.py | 2 - .../e2e/large_volume/cust_trans_large_fs.yaml | 21 +- 58 files changed, 1931 insertions(+), 1473 deletions(-) rename core/src/main/java/feast/core/dao/{JobInfoRepository.java => JobRepository.java} (73%) rename core/src/main/java/feast/core/{job/NoopJobMonitor.java => dao/SourceRepository.java} (71%) create mode 100644 core/src/main/java/feast/core/job/JobUpdateTask.java delete mode 100644 core/src/main/java/feast/core/job/ScheduledJobMonitor.java delete mode 100644 core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java delete mode 100644 core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java rename core/src/main/java/feast/core/model/{JobInfo.java => Job.java} (94%) delete mode 100644 core/src/main/java/feast/core/service/JobStatusService.java delete mode 100644 core/src/test/java/feast/core/grpc/CoreServiceImplTest.java rename core/src/{main/java/feast/core/job/JobMonitor.java => test/java/feast/core/job/JobMatcher.java} (64%) create mode 100644 core/src/test/java/feast/core/job/JobUpdateTaskTest.java delete mode 100644 core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java delete mode 100644 core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java diff --git a/.prow/scripts/test-end-to-end-batch.sh b/.prow/scripts/test-end-to-end-batch.sh index b370c5b045b..f25c0720edc 100755 --- a/.prow/scripts/test-end-to-end-batch.sh +++ b/.prow/scripts/test-end-to-end-batch.sh @@ -76,7 +76,7 @@ nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.prope sleep 5 tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & -sleep 10 +sleep 20 tail -n10 /var/log/kafka.log echo " @@ -108,6 +108,8 @@ feast: jobs: runner: DirectRunner options: {} + updates: + timeoutSeconds: 240 metrics: enabled: false @@ -141,7 +143,7 @@ EOF nohup java -jar core/target/feast-core-0.3.2-SNAPSHOT.jar \ --spring.config.location=file:///tmp/core.application.yml \ &> /var/log/feast-core.log & -sleep 30 +sleep 35 tail -n10 /var/log/feast-core.log echo " ============================================================ diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index 2c6f4a098f9..f6ebd8c6eef 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -59,7 +59,7 @@ nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.prope sleep 5 tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & -sleep 10 +sleep 20 tail -n10 /var/log/kafka.log echo " @@ -91,6 +91,8 @@ feast: jobs: runner: DirectRunner options: {} + updates: + timeoutSeconds: 240 metrics: enabled: false @@ -124,7 +126,7 @@ EOF nohup java -jar core/target/feast-core-0.3.2-SNAPSHOT.jar \ --spring.config.location=file:///tmp/core.application.yml \ &> /var/log/feast-core.log & -sleep 30 +sleep 35 tail -n10 /var/log/feast-core.log echo " diff --git a/core/src/main/java/feast/core/config/FeastProperties.java b/core/src/main/java/feast/core/config/FeastProperties.java index e57a594305d..1887caf5e64 100644 --- a/core/src/main/java/feast/core/config/FeastProperties.java +++ b/core/src/main/java/feast/core/config/FeastProperties.java @@ -37,6 +37,14 @@ public static class JobProperties { private String runner; private Map options; private MetricsProperties metrics; + private JobUpdatesProperties updates; + } + + @Getter + @Setter + public static class JobUpdatesProperties { + + private long timeoutSeconds; } @Getter diff --git a/core/src/main/java/feast/core/config/JobConfig.java b/core/src/main/java/feast/core/config/JobConfig.java index c47bb784001..728fc0545bf 100644 --- a/core/src/main/java/feast/core/config/JobConfig.java +++ b/core/src/main/java/feast/core/config/JobConfig.java @@ -23,15 +23,12 @@ import com.google.api.services.dataflow.DataflowScopes; import com.google.common.base.Strings; import feast.core.config.FeastProperties.JobProperties; +import feast.core.config.FeastProperties.JobUpdatesProperties; import feast.core.job.JobManager; -import feast.core.job.JobMonitor; -import feast.core.job.NoopJobMonitor; import feast.core.job.Runner; import feast.core.job.dataflow.DataflowJobManager; -import feast.core.job.dataflow.DataflowJobMonitor; import feast.core.job.direct.DirectJobRegistry; import feast.core.job.direct.DirectRunnerJobManager; -import feast.core.job.direct.DirectRunnerJobMonitor; import java.io.IOException; import java.security.GeneralSecurityException; import java.util.HashMap; @@ -54,7 +51,7 @@ public class JobConfig { @Bean @Autowired public JobManager getJobManager( - FeastProperties feastProperties, DirectJobRegistry directJobRegistry) throws Exception { + FeastProperties feastProperties, DirectJobRegistry directJobRegistry) { JobProperties jobProperties = feastProperties.getJobs(); Runner runner = Runner.fromString(jobProperties.getRunner()); @@ -97,52 +94,15 @@ public JobManager getJobManager( } } - /** Get a Job Monitor given the runner type and dataflow configuration. */ - @Bean - public JobMonitor getJobMonitor( - FeastProperties feastProperties, DirectJobRegistry directJobRegistry) throws Exception { - - JobProperties jobProperties = feastProperties.getJobs(); - Runner runner = Runner.fromString(jobProperties.getRunner()); - Map jobOptions = jobProperties.getOptions(); - - switch (runner) { - case DATAFLOW: - if (Strings.isNullOrEmpty(jobOptions.getOrDefault("region", null)) - || Strings.isNullOrEmpty(jobOptions.getOrDefault("project", null))) { - log.warn( - "Project and location of the Dataflow runner is not configured, will not do job monitoring"); - return new NoopJobMonitor(); - } - try { - GoogleCredential credential = - GoogleCredential.getApplicationDefault().createScoped(DataflowScopes.all()); - Dataflow dataflow = - new Dataflow( - GoogleNetHttpTransport.newTrustedTransport(), - JacksonFactory.getDefaultInstance(), - credential); - - return new DataflowJobMonitor( - dataflow, jobOptions.get("project"), jobOptions.get("region")); - } catch (IOException e) { - log.error( - "Unable to find credential required for Dataflow monitoring API: {}", e.getMessage()); - } catch (GeneralSecurityException e) { - log.error("Security exception while "); - } catch (Exception e) { - log.error("Unable to initialize DataflowJobMonitor", e); - } - case DIRECT: - return new DirectRunnerJobMonitor(directJobRegistry); - default: - return new NoopJobMonitor(); - } - } - /** Get a direct job registry */ @Bean public DirectJobRegistry directJobRegistry() { return new DirectJobRegistry(); } + + /** Extracts job update options from feast core options. */ + @Bean + public JobUpdatesProperties jobUpdatesProperties(FeastProperties feastProperties) { + return feastProperties.getJobs().getUpdates(); + } } diff --git a/core/src/main/java/feast/core/dao/FeatureSetRepository.java b/core/src/main/java/feast/core/dao/FeatureSetRepository.java index ca4d6b9d1cb..fd996b331c2 100644 --- a/core/src/main/java/feast/core/dao/FeatureSetRepository.java +++ b/core/src/main/java/feast/core/dao/FeatureSetRepository.java @@ -36,11 +36,11 @@ public interface FeatureSetRepository extends JpaRepository List findByName(String name); // find all versions of featureSets with names matching the regex - @Query(nativeQuery = true, value = "SELECT * FROM feature_sets " - + "WHERE name LIKE ?1 ORDER BY name ASC, version ASC") + @Query( + nativeQuery = true, + value = "SELECT * FROM feature_sets " + "WHERE name LIKE ?1 ORDER BY name ASC, version ASC") List findByNameWithWildcardOrderByNameAscVersionAsc(String name); // find all feature sets and order by name and version List findAllByOrderByNameAscVersionAsc(); - } diff --git a/core/src/main/java/feast/core/dao/JobInfoRepository.java b/core/src/main/java/feast/core/dao/JobRepository.java similarity index 73% rename from core/src/main/java/feast/core/dao/JobInfoRepository.java rename to core/src/main/java/feast/core/dao/JobRepository.java index 6e5820eae7d..98da76912e7 100644 --- a/core/src/main/java/feast/core/dao/JobInfoRepository.java +++ b/core/src/main/java/feast/core/dao/JobRepository.java @@ -16,17 +16,17 @@ */ package feast.core.dao; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import java.util.Collection; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.stereotype.Repository; -/** JPA repository supplying JobInfo objects keyed by ID. */ +/** JPA repository supplying Job objects keyed by ID. */ @Repository -public interface JobInfoRepository extends JpaRepository { - List findByStatusNotIn(Collection statuses); +public interface JobRepository extends JpaRepository { + List findByStatusNotIn(Collection statuses); - List findBySourceIdAndStoreName(String sourceId, String storeName); + List findBySourceIdAndStoreNameOrderByLastUpdatedDesc(String sourceId, String storeName); } diff --git a/core/src/main/java/feast/core/dao/MetricsRepository.java b/core/src/main/java/feast/core/dao/MetricsRepository.java index c7bc4836977..7146e1e3ecb 100644 --- a/core/src/main/java/feast/core/dao/MetricsRepository.java +++ b/core/src/main/java/feast/core/dao/MetricsRepository.java @@ -23,5 +23,5 @@ @Repository public interface MetricsRepository extends JpaRepository { - List findByJobInfo_Id(String id); + List findByJob_Id(String id); } diff --git a/core/src/main/java/feast/core/job/NoopJobMonitor.java b/core/src/main/java/feast/core/dao/SourceRepository.java similarity index 71% rename from core/src/main/java/feast/core/job/NoopJobMonitor.java rename to core/src/main/java/feast/core/dao/SourceRepository.java index c71010c2423..09214a9b684 100644 --- a/core/src/main/java/feast/core/job/NoopJobMonitor.java +++ b/core/src/main/java/feast/core/dao/SourceRepository.java @@ -14,15 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.core.job; +package feast.core.dao; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; +import feast.core.model.Source; +import org.springframework.data.jpa.repository.JpaRepository; -public class NoopJobMonitor implements JobMonitor { - - @Override - public JobStatus getJobStatus(JobInfo job) { - return JobStatus.UNKNOWN; - } -} +/** JPA repository supplying Source objects keyed by id. */ +public interface SourceRepository extends JpaRepository {} diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index 6387fd806b4..1d42cfb3554 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -16,7 +16,6 @@ */ package feast.core.grpc; -import com.google.common.collect.Lists; import com.google.protobuf.InvalidProtocolBufferException; import feast.core.CoreServiceGrpc.CoreServiceImplBase; import feast.core.CoreServiceProto.ApplyFeatureSetRequest; @@ -28,43 +27,28 @@ import feast.core.CoreServiceProto.ListFeatureSetsRequest; import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.ListStoresRequest; -import feast.core.CoreServiceProto.ListStoresRequest.Filter; import feast.core.CoreServiceProto.ListStoresResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; -import feast.core.CoreServiceProto.UpdateStoreResponse.Status; -import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto; -import feast.core.StoreProto.Store; -import feast.core.StoreProto.Store.Subscription; import feast.core.exception.RetrievalException; import feast.core.grpc.interceptors.MonitoringInterceptor; -import feast.core.service.JobCoordinatorService; import feast.core.service.SpecService; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.lognet.springboot.grpc.GRpcService; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.transaction.annotation.Transactional; -/** - * Implementation of the feast core GRPC service. - */ +/** Implementation of the feast core GRPC service. */ @Slf4j @GRpcService(interceptors = {MonitoringInterceptor.class}) public class CoreServiceImpl extends CoreServiceImplBase { private SpecService specService; - private JobCoordinatorService jobCoordinatorService; @Autowired - public CoreServiceImpl(SpecService specService, JobCoordinatorService jobCoordinatorService) { + public CoreServiceImpl(SpecService specService) { this.specService = specService; - this.jobCoordinatorService = jobCoordinatorService; } @Override @@ -118,31 +102,6 @@ public void applyFeatureSet( ApplyFeatureSetRequest request, StreamObserver responseObserver) { try { ApplyFeatureSetResponse response = specService.applyFeatureSet(request.getFeatureSet()); - ListStoresResponse stores = specService.listStores(Filter.newBuilder().build()); - for (Store store : stores.getStoreList()) { - Set featureSetSpecs = new HashSet<>(); - for (Subscription subscription : store.getSubscriptionsList()) { - featureSetSpecs.addAll( - specService - .listFeatureSets( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(subscription.getName()) - .setFeatureSetVersion(subscription.getVersion()) - .build()) - .getFeatureSetsList()); - } - if (!featureSetSpecs.isEmpty() && featureSetSpecs.contains(response.getFeatureSet())) { - // We use the response featureSet source because it contains the information - // about whether to default to the default feature stream or not - SourceProto.Source source = response.getFeatureSet().getSource(); - featureSetSpecs = - featureSetSpecs.stream() - .filter(fs -> fs.getSource().equals(source)) - .collect(Collectors.toSet()); - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSetSpecs), source, store); - } - } responseObserver.onNext(response); responseObserver.onCompleted(); } catch (Exception e) { @@ -158,30 +117,6 @@ public void updateStore( UpdateStoreResponse response = specService.updateStore(request); responseObserver.onNext(response); responseObserver.onCompleted(); - - if (!response.getStatus().equals(Status.NO_CHANGE)) { - Set featureSetSpecs = new HashSet<>(); - Store store = response.getStore(); - for (Subscription subscription : store.getSubscriptionsList()) { - featureSetSpecs.addAll( - specService - .listFeatureSets( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(subscription.getName()) - .setFeatureSetVersion(subscription.getVersion()) - .build()) - .getFeatureSetsList()); - } - if (featureSetSpecs.size() == 0) { - return; - } - featureSetSpecs.stream() - .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) - .entrySet() - .stream() - .forEach( - kv -> jobCoordinatorService.startOrUpdateJob(kv.getValue(), kv.getKey(), store)); - } } catch (Exception e) { log.error("Exception has occurred in UpdateStore method: ", e); responseObserver.onError(e); diff --git a/core/src/main/java/feast/core/job/JobManager.java b/core/src/main/java/feast/core/job/JobManager.java index 5147671c840..99880cdb764 100644 --- a/core/src/main/java/feast/core/job/JobManager.java +++ b/core/src/main/java/feast/core/job/JobManager.java @@ -16,10 +16,8 @@ */ package feast.core.job; -import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.StoreProto.Store; -import feast.core.model.JobInfo; -import java.util.List; +import feast.core.model.Job; +import feast.core.model.JobStatus; public interface JobManager { @@ -33,20 +31,18 @@ public interface JobManager { /** * Start an import job. * - * @param name of job to run - * @param featureSets list of featureSets to be populated by the job - * @param sink Store to sink features to - * @return runner specific job id + * @param job job to start + * @return Job */ - String startJob(String name, List featureSets, Store sink); + Job startJob(Job job); /** * Update already running job with new set of features to ingest. * - * @param jobInfo jobInfo of target job to change - * @return job runner specific job id + * @param job job of target job to change + * @return Job */ - String updateJob(JobInfo jobInfo); + Job updateJob(Job job); /** * Abort a job given runner-specific job ID. @@ -54,4 +50,12 @@ public interface JobManager { * @param extId runner specific job id. */ void abortJob(String extId); + + /** + * Get status of a job given runner-specific job ID. + * + * @param job job. + * @return job status. + */ + JobStatus getJobStatus(Job job); } diff --git a/core/src/main/java/feast/core/job/JobUpdateTask.java b/core/src/main/java/feast/core/job/JobUpdateTask.java new file mode 100644 index 00000000000..373a4a113d7 --- /dev/null +++ b/core/src/main/java/feast/core/job/JobUpdateTask.java @@ -0,0 +1,213 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job; + +import feast.core.FeatureSetProto; +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.StoreProto; +import feast.core.log.Action; +import feast.core.log.AuditLogger; +import feast.core.log.Resource; +import feast.core.model.FeatureSet; +import feast.core.model.Job; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.stream.Collectors; +import lombok.Getter; +import lombok.extern.slf4j.Slf4j; + +/** + * JobUpdateTask is a callable that starts or updates a job given a set of featureSetSpecs, as well + * as their source and sink. + * + *

When complete, the JobUpdateTask returns the updated Job object to be pushed to the db. + */ +@Slf4j +@Getter +public class JobUpdateTask implements Callable { + + private final List featureSetSpecs; + private final SourceProto.Source sourceSpec; + private final StoreProto.Store store; + private final Optional currentJob; + private JobManager jobManager; + private long jobUpdateTimeoutSeconds; + + public JobUpdateTask( + List featureSetSpecs, + SourceProto.Source sourceSpec, + StoreProto.Store store, + Optional currentJob, + JobManager jobManager, + long jobUpdateTimeoutSeconds) { + + this.featureSetSpecs = featureSetSpecs; + this.sourceSpec = sourceSpec; + this.store = store; + this.currentJob = currentJob; + this.jobManager = jobManager; + this.jobUpdateTimeoutSeconds = jobUpdateTimeoutSeconds; + } + + @Override + public Job call() { + ExecutorService executorService = Executors.newSingleThreadExecutor(); + Source source = Source.fromProto(sourceSpec); + Future submittedJob; + if (currentJob.isPresent()) { + Set existingFeatureSetsPopulatedByJob = + currentJob.get().getFeatureSets().stream() + .map(FeatureSet::getId) + .collect(Collectors.toSet()); + Set newFeatureSetsPopulatedByJob = + featureSetSpecs.stream() + .map(fs -> fs.getName() + ":" + fs.getVersion()) + .collect(Collectors.toSet()); + if (existingFeatureSetsPopulatedByJob.size() == newFeatureSetsPopulatedByJob.size() + && existingFeatureSetsPopulatedByJob.containsAll(newFeatureSetsPopulatedByJob)) { + Job job = currentJob.get(); + JobStatus newJobStatus = jobManager.getJobStatus(job); + if (newJobStatus != job.getStatus()) { + AuditLogger.log( + Resource.JOB, + job.getId(), + Action.STATUS_CHANGE, + "Job status updated: changed from %s to %s", + job.getStatus(), + newJobStatus); + } + job.setStatus(newJobStatus); + return job; + } else { + submittedJob = + executorService.submit(() -> updateJob(currentJob.get(), featureSetSpecs, store)); + } + } else { + String jobId = createJobId(source.getId(), store.getName()); + submittedJob = + executorService.submit(() -> startJob(jobId, featureSetSpecs, sourceSpec, store)); + } + + Job job = null; + try { + job = submittedJob.get(getJobUpdateTimeoutSeconds(), TimeUnit.SECONDS); + } catch (InterruptedException | ExecutionException | TimeoutException e) { + log.warn("Unable to start job for source {} and sink {}: {}", source, store, e.getMessage()); + executorService.shutdownNow(); + } + return job; + } + + /** Start or update the job to ingest data to the sink. */ + private Job startJob( + String jobId, + List featureSetSpecs, + SourceProto.Source source, + StoreProto.Store sinkSpec) { + + List featureSets = + featureSetSpecs.stream() + .map( + spec -> + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(spec).build())) + .collect(Collectors.toList()); + Job job = + new Job( + jobId, + "", + jobManager.getRunnerType().toString(), + Source.fromProto(source), + Store.fromProto(sinkSpec), + featureSets, + JobStatus.PENDING); + try { + AuditLogger.log( + Resource.JOB, + jobId, + Action.SUBMIT, + "Building graph and submitting to %s", + jobManager.getRunnerType().getName()); + + job = jobManager.startJob(job); + if (job.getExtId().isEmpty()) { + throw new RuntimeException( + String.format("Could not submit job: \n%s", "unable to retrieve job external id")); + } + + AuditLogger.log( + Resource.JOB, + jobId, + Action.STATUS_CHANGE, + "Job submitted to runner %s with ext id %s.", + jobManager.getRunnerType().getName(), + job.getExtId()); + + return job; + } catch (Exception e) { + AuditLogger.log( + Resource.JOB, + jobId, + Action.STATUS_CHANGE, + "Job failed to be submitted to runner %s. Job status changed to ERROR.", + jobManager.getRunnerType().getName()); + + job.setStatus(JobStatus.ERROR); + return job; + } + } + + /** Update the given job */ + private Job updateJob(Job job, List featureSetSpecs, StoreProto.Store store) { + job.setFeatureSets( + featureSetSpecs.stream() + .map( + spec -> + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(spec).build())) + .collect(Collectors.toList())); + job.setStore(feast.core.model.Store.fromProto(store)); + AuditLogger.log( + Resource.JOB, + job.getId(), + Action.UPDATE, + "Updating job %s for runner %s", + job.getId(), + jobManager.getRunnerType().getName()); + return jobManager.updateJob(job); + } + + String createJobId(String sourceId, String storeName) { + String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); + String sourceIdTrunc = sourceId.split("/")[0].toLowerCase(); + String jobId = String.format("%s-to-%s", sourceIdTrunc, storeName) + dateSuffix; + return jobId.replaceAll("_", "-"); + } +} diff --git a/core/src/main/java/feast/core/job/ScheduledJobMonitor.java b/core/src/main/java/feast/core/job/ScheduledJobMonitor.java deleted file mode 100644 index cc87d5fcf6e..00000000000 --- a/core/src/main/java/feast/core/job/ScheduledJobMonitor.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job; - -import com.google.common.base.Strings; -import feast.core.dao.JobInfoRepository; -import feast.core.log.Action; -import feast.core.log.AuditLogger; -import feast.core.log.Resource; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import java.util.Collection; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.scheduling.annotation.Scheduled; -import org.springframework.stereotype.Component; -import org.springframework.transaction.annotation.Transactional; - -@Slf4j -@Component -public class ScheduledJobMonitor { - - private final JobMonitor jobMonitor; - private final JobInfoRepository jobInfoRepository; - - @Autowired - public ScheduledJobMonitor(JobMonitor jobMonitor, JobInfoRepository jobInfoRepository) { - this.jobMonitor = jobMonitor; - this.jobInfoRepository = jobInfoRepository; - } - - // TODO: Keep receiving the following exception with these arguments below - // Caused by: java.lang.IllegalStateException: Encountered invalid @Scheduled method - // 'pollStatusAndMetrics': Circular placeholder reference .. in property definitions - // @Scheduled( - // fixedDelayString = "${feast.jobs.monitor.fixedDelay}", - // initialDelayString = "${feast.jobs.monitor.initialDelay}") - // - @Transactional - @Scheduled(cron = "* * * * * *") - public void pollStatusAndMetrics() { - updateJobStatus(); - } - - /** Periodically pull status of job which is not in terminal state and update the status in DB. */ - /* package */ void updateJobStatus() { - if (jobMonitor instanceof NoopJobMonitor) { - return; - } - - Collection nonTerminalJobs = - jobInfoRepository.findByStatusNotIn(JobStatus.getTerminalState()); - - for (JobInfo job : nonTerminalJobs) { - String jobId = job.getExtId(); - if (Strings.isNullOrEmpty(jobId)) { - continue; - } - JobStatus jobStatus = jobMonitor.getJobStatus(job); - if (job.getStatus() != jobStatus) { - AuditLogger.log( - Resource.JOB, - jobId, - Action.STATUS_CHANGE, - "Job status updated from %s to %s", - job.getStatus(), - jobStatus); - } - job.setStatus(jobStatus); - jobInfoRepository.save(job); - } - } -} diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 4e4533c4c9a..92763e7971f 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -19,19 +19,22 @@ import static feast.core.util.PipelineUtil.detectClassPathResourcesToStage; import com.google.api.services.dataflow.Dataflow; -import com.google.api.services.dataflow.model.Job; import com.google.common.base.Strings; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.StoreProto.Store; +import feast.core.SourceProto; +import feast.core.StoreProto; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; import feast.core.job.JobManager; import feast.core.job.Runner; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; import feast.ingestion.options.ImportOptions; @@ -40,6 +43,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.beam.runners.dataflow.DataflowPipelineJob; import org.apache.beam.runners.dataflow.DataflowRunner; @@ -72,27 +76,38 @@ public Runner getRunnerType() { } @Override - public String startJob(String name, List featureSets, Store sink) { - return submitDataflowJob(name, featureSets, sink, false); + public Job startJob(Job job) { + List featureSetSpecs = + job.getFeatureSets().stream() + .map(fs -> fs.toProto().getSpec()) + .collect(Collectors.toList()); + try { + return submitDataflowJob( + job.getId(), featureSetSpecs, job.getSource().toProto(), job.getStore().toProto(), false); + } catch (InvalidProtocolBufferException e) { + throw new RuntimeException(String.format("Unable to start job %s", job.getId()), e); + } } /** * Update an existing Dataflow job. * - * @param jobInfo jobInfo of target job to change + * @param job job of target job to change * @return Dataflow-specific job id */ @Override - public String updateJob(JobInfo jobInfo) { + public Job updateJob(Job job) { try { - List featureSetSpecs = new ArrayList<>(); - for (FeatureSet featureSet : jobInfo.getFeatureSets()) { - featureSetSpecs.add(featureSet.toProto()); - } + List featureSetSpecs = + job.getFeatureSets().stream() + .map(fs -> fs.toProto().getSpec()) + .collect(Collectors.toList()); + return submitDataflowJob( - jobInfo.getId(), featureSetSpecs, jobInfo.getStore().toProto(), true); + job.getId(), featureSetSpecs, job.getSource().toProto(), job.getStore().toProto(), true); + } catch (InvalidProtocolBufferException e) { - throw new RuntimeException(String.format("Unable to update job %s", jobInfo.getId()), e); + throw new RuntimeException(String.format("Unable to update job %s", job.getId()), e); } } @@ -104,9 +119,10 @@ public String updateJob(JobInfo jobInfo) { @Override public void abortJob(String dataflowJobId) { try { - Job job = + com.google.api.services.dataflow.model.Job job = dataflow.projects().locations().jobs().get(projectId, location, dataflowJobId).execute(); - Job content = new Job(); + com.google.api.services.dataflow.model.Job content = + new com.google.api.services.dataflow.model.Job(); if (job.getType().equals(DataflowJobType.JOB_TYPE_BATCH.toString())) { content.setRequestedState(DataflowJobState.JOB_STATE_CANCELLED.toString()); } else if (job.getType().equals(DataflowJobType.JOB_TYPE_STREAMING.toString())) { @@ -125,13 +141,63 @@ public void abortJob(String dataflowJobId) { } } - private String submitDataflowJob( - String jobName, List featureSets, Store sink, boolean update) { + /** + * Get status of a dataflow job with given id and try to map it into Feast's JobStatus. + * + * @param job Job containing dataflow job id + * @return status of the job, or return {@link JobStatus#UNKNOWN} if error happens. + */ + @Override + public JobStatus getJobStatus(Job job) { + if (!Runner.DATAFLOW.getName().equals(job.getRunner())) { + return job.getStatus(); + } + + try { + com.google.api.services.dataflow.model.Job dataflowJob = + dataflow + .projects() + .locations() + .jobs() + .get(projectId, location, job.getExtId()) + .execute(); + return DataflowJobStateMapper.map(dataflowJob.getCurrentState()); + } catch (Exception e) { + log.error( + "Unable to retrieve status of a dataflow job with id : {}\ncause: {}", + job.getExtId(), + e.getMessage()); + } + return JobStatus.UNKNOWN; + } + + private Job submitDataflowJob( + String jobName, + List featureSetSpecs, + SourceProto.Source source, + StoreProto.Store sink, + boolean update) { try { - ImportOptions pipelineOptions = getPipelineOptions(jobName, featureSets, sink, update); + ImportOptions pipelineOptions = getPipelineOptions(jobName, featureSetSpecs, sink, update); DataflowPipelineJob pipelineResult = runPipeline(pipelineOptions); + List featureSets = + featureSetSpecs.stream() + .map( + spec -> { + FeatureSet featureSet = new FeatureSet(); + featureSet.setId(spec.getName() + ":" + spec.getVersion()); + return featureSet; + }) + .collect(Collectors.toList()); String jobId = waitForJobToRun(pipelineResult); - return jobId; + return new Job( + jobName, + jobId, + getRunnerType().getName(), + Source.fromProto(source), + Store.fromProto(sink), + featureSets, + JobStatus.PENDING); } catch (Exception e) { log.error("Error submitting job", e); throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); @@ -139,7 +205,7 @@ private String submitDataflowJob( } private ImportOptions getPipelineOptions( - String jobName, List featureSets, Store sink, boolean update) + String jobName, List featureSets, StoreProto.Store sink, boolean update) throws IOException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java deleted file mode 100644 index 93948785482..00000000000 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobMonitor.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job.dataflow; - -import static com.google.common.base.Preconditions.checkNotNull; - -import com.google.api.services.dataflow.Dataflow; -import com.google.api.services.dataflow.model.Job; -import feast.core.job.JobMonitor; -import feast.core.job.Runner; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -public class DataflowJobMonitor implements JobMonitor { - - private final String projectId; - private final String location; - private final Dataflow dataflow; - private final DataflowJobStateMapper jobStateMaper; - - private static final String METRICS_NAMESPACE_KEY = "namespace"; - private static final String FEAST_METRICS_NAMESPACE = "feast"; - - public DataflowJobMonitor(Dataflow dataflow, String projectId, String location) { - checkNotNull(projectId); - checkNotNull(location); - this.projectId = projectId; - this.location = location; - this.dataflow = dataflow; - this.jobStateMaper = new DataflowJobStateMapper(); - } - - /** - * Get status of a dataflow job with given id and try to map it into Feast's JobStatus. - * - * @param jobInfo dataflow job id. - * @return status of the job, or return {@link JobStatus#UNKNOWN} if error happens. - */ - public JobStatus getJobStatus(JobInfo jobInfo) { - if (!Runner.DATAFLOW.getName().equals(jobInfo.getRunner())) { - return jobInfo.getStatus(); - } - - try { - Job job = - dataflow - .projects() - .locations() - .jobs() - .get(projectId, location, jobInfo.getExtId()) - .execute(); - return jobStateMaper.map(job.getCurrentState()); - } catch (Exception e) { - log.error( - "Unable to retrieve status of a dataflow job with id : {}\ncause: {}", - jobInfo.getExtId(), - e.getMessage()); - } - return JobStatus.UNKNOWN; - } -} diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java index c94c84ce8e6..ec5738be699 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobStateMapper.java @@ -58,7 +58,7 @@ public class DataflowJobStateMapper { * @return JobStatus. * @throws IllegalArgumentException if jobState is invalid. */ - public JobStatus map(String jobState) { + public static JobStatus map(String jobState) { DataflowJobState dfJobState = DataflowJobState.valueOf(jobState); if (DATAFLOW_TO_FEAST_JOB_STATUS.containsKey(dfJobState)) { return DATAFLOW_TO_FEAST_JOB_STATUS.get(dfJobState); diff --git a/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java b/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java index e0e521e6a45..dd1c81d83ef 100644 --- a/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java +++ b/core/src/main/java/feast/core/job/direct/DirectJobStateMapper.java @@ -42,7 +42,7 @@ public class DirectJobStateMapper { * @param jobState beam PipelineResult State * @return JobStatus */ - public JobStatus map(State jobState) { + public static JobStatus map(State jobState) { return BEAM_TO_FEAT_JOB_STATUS.get(jobState); } } diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index a09fd394952..89c6dc38488 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -27,7 +27,8 @@ import feast.core.job.JobManager; import feast.core.job.Runner; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; +import feast.core.model.JobStatus; import feast.core.util.TypeConversion; import feast.ingestion.ImportJob; import feast.ingestion.options.ImportOptions; @@ -36,6 +37,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.apache.beam.runners.direct.DirectRunner; import org.apache.beam.sdk.PipelineResult; @@ -67,19 +69,22 @@ public Runner getRunnerType() { /** * Start a direct runner job. * - * @param name of job to run - * @param featureSetSpecs list of specs for featureSets to be populated by the job - * @param sinkSpec Store to sink features to + * @param job Job to start */ @Override - public String startJob( - String name, List featureSetSpecs, StoreProto.Store sinkSpec) { + public Job startJob(Job job) { try { - ImportOptions pipelineOptions = getPipelineOptions(featureSetSpecs, sinkSpec); + List featureSetSpecs = + job.getFeatureSets().stream() + .map(fs -> fs.toProto().getSpec()) + .collect(Collectors.toList()); + ImportOptions pipelineOptions = getPipelineOptions(featureSetSpecs, job.getStore().toProto()); PipelineResult pipelineResult = runPipeline(pipelineOptions); - DirectJob directJob = new DirectJob(name, pipelineResult); + DirectJob directJob = new DirectJob(job.getId(), pipelineResult); jobs.add(directJob); - return name; + job.setExtId(job.getId()); + job.setStatus(JobStatus.RUNNING); + return job; } catch (Exception e) { log.error("Error submitting job", e); throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); @@ -118,23 +123,22 @@ private ImportOptions getPipelineOptions( * *

As a rule of thumb, direct jobs in feast should only be used for testing. * - * @param jobInfo jobInfo of target job to change + * @param job job of target job to change * @return jobId of the job */ @Override - public String updateJob(JobInfo jobInfo) { - String jobId = jobInfo.getExtId(); + public Job updateJob(Job job) { + String jobId = job.getExtId(); abortJob(jobId); try { List featureSetSpecs = new ArrayList<>(); - for (FeatureSet featureSet : jobInfo.getFeatureSets()) { - featureSetSpecs.add(featureSet.toProto()); + for (FeatureSet featureSet : job.getFeatureSets()) { + featureSetSpecs.add(featureSet.toProto().getSpec()); } - startJob(jobId, featureSetSpecs, jobInfo.getStore().toProto()); - } catch (JobExecutionException | InvalidProtocolBufferException e) { + return startJob(job); + } catch (JobExecutionException e) { throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); } - return jobId; } /** @@ -157,4 +161,20 @@ public void abortJob(String extId) { public PipelineResult runPipeline(ImportOptions pipelineOptions) throws IOException { return ImportJob.runPipeline(pipelineOptions); } + + /** + * Gets the state of the direct runner job. Direct runner jobs only have 2 states: RUNNING and + * ABORTED. + * + * @param job Job of the desired job. + * @return JobStatus of the job. + */ + @Override + public JobStatus getJobStatus(Job job) { + DirectJob directJob = jobs.get(job.getId()); + if (directJob == null) { + return JobStatus.ABORTED; + } + return DirectJobStateMapper.map(directJob.getPipelineResult().getState()); + } } diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java deleted file mode 100644 index 50d02b2728b..00000000000 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobMonitor.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job.direct; - -import feast.core.job.JobMonitor; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import lombok.extern.slf4j.Slf4j; - -@Slf4j -public class DirectRunnerJobMonitor implements JobMonitor { - - private final DirectJobRegistry jobs; - private final DirectJobStateMapper jobStateMapper; - - public DirectRunnerJobMonitor(DirectJobRegistry jobs) { - this.jobs = jobs; - jobStateMapper = new DirectJobStateMapper(); - } - - @Override - public JobStatus getJobStatus(JobInfo job) { - DirectJob directJob = jobs.get(job.getId()); - if (directJob == null) { - return JobStatus.ABORTED; - } - return jobStateMapper.map(directJob.getPipelineResult().getState()); - } -} diff --git a/core/src/main/java/feast/core/model/AbstractTimestampEntity.java b/core/src/main/java/feast/core/model/AbstractTimestampEntity.java index cacaa51adbb..7a544a9cbe3 100644 --- a/core/src/main/java/feast/core/model/AbstractTimestampEntity.java +++ b/core/src/main/java/feast/core/model/AbstractTimestampEntity.java @@ -16,6 +16,7 @@ */ package feast.core.model; +import java.time.Instant; import java.util.Date; import javax.persistence.*; import lombok.Data; @@ -44,4 +45,10 @@ protected void onCreate() { protected void onUpdate() { lastUpdated = new Date(); } + + // This constructor is used for testing. + public AbstractTimestampEntity() { + this.created = Date.from(Instant.ofEpochMilli(0L)); + this.lastUpdated = Date.from(Instant.ofEpochMilli(0L)); + } } diff --git a/core/src/main/java/feast/core/model/FeatureSet.java b/core/src/main/java/feast/core/model/FeatureSet.java index 8ba7162d2f2..388b27cb04a 100644 --- a/core/src/main/java/feast/core/model/FeatureSet.java +++ b/core/src/main/java/feast/core/model/FeatureSet.java @@ -17,9 +17,12 @@ package feast.core.model; import com.google.protobuf.Duration; -import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Timestamp; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSetMeta; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.FeatureSetProto.FeatureSpec; import feast.types.ValueProto.ValueType; import java.util.ArrayList; @@ -79,6 +82,10 @@ public class FeatureSet extends AbstractTimestampEntity implements Comparable entities, List features, - Source source) { + Source source, + FeatureSetStatus status) { this.id = String.format("%s:%s", name, version); this.name = name; this.version = version; @@ -97,9 +105,11 @@ public FeatureSet( this.entities = entities; this.features = features; this.source = source; + this.status = status.toString(); } - public static FeatureSet fromProto(FeatureSetSpec featureSetSpec) { + public static FeatureSet fromProto(FeatureSetProto.FeatureSet featureSetProto) { + FeatureSetSpec featureSetSpec = featureSetProto.getSpec(); Source source = Source.fromProto(featureSetSpec.getSource()); String id = String.format("%s:%d", featureSetSpec.getName(), featureSetSpec.getVersion()); List features = new ArrayList<>(); @@ -117,10 +127,11 @@ public static FeatureSet fromProto(FeatureSetSpec featureSetSpec) { featureSetSpec.getMaxAge().getSeconds(), entities, features, - source); + source, + featureSetProto.getMeta().getStatus()); } - public FeatureSetSpec toProto() throws InvalidProtocolBufferException { + public FeatureSetProto.FeatureSet toProto() { List entitySpecs = new ArrayList<>(); for (Field entity : entities) { entitySpecs.add( @@ -138,14 +149,22 @@ public FeatureSetSpec toProto() throws InvalidProtocolBufferException { .setValueType(ValueType.Enum.valueOf(feature.getType())) .build()); } - return FeatureSetSpec.newBuilder() - .setName(name) - .setVersion(version) - .setMaxAge(Duration.newBuilder().setSeconds(maxAgeSeconds)) - .addAllEntities(entitySpecs) - .addAllFeatures(featureSpecs) - .setSource(source.toProto()) - .build(); + FeatureSetMeta.Builder meta = + FeatureSetMeta.newBuilder() + .setCreatedTimestamp( + Timestamp.newBuilder().setSeconds(super.getCreated().getTime() / 1000L)) + .setStatus(FeatureSetStatus.valueOf(status)); + + FeatureSetSpec.Builder spec = + FeatureSetSpec.newBuilder() + .setName(name) + .setVersion(version) + .setMaxAge(Duration.newBuilder().setSeconds(maxAgeSeconds)) + .addAllEntities(entitySpecs) + .addAllFeatures(featureSpecs) + .setSource(source.toProto()); + + return FeatureSetProto.FeatureSet.newBuilder().setMeta(meta).setSpec(spec).build(); } /** @@ -155,50 +174,49 @@ public FeatureSetSpec toProto() throws InvalidProtocolBufferException { * @return boolean denoting if the source or schema have changed. */ public boolean equalTo(FeatureSet other) { - if(!name.equals(other.getName())){ + if (!name.equals(other.getName())) { return false; } - if (!source.equalTo(other.getSource())){ + if (!source.equalTo(other.getSource())) { return false; } - if (maxAgeSeconds != other.maxAgeSeconds){ + if (maxAgeSeconds != other.maxAgeSeconds) { return false; } // Create a map of all fields in this feature set Map fields = new HashMap<>(); - for (Field e : entities){ + for (Field e : entities) { fields.putIfAbsent(e.getName(), e); } - for (Field f : features){ + for (Field f : features) { fields.putIfAbsent(f.getName(), f); } // Ensure map size is consistent with existing fields - if (fields.size() != other.features.size() + other.entities.size()) - { + if (fields.size() != other.features.size() + other.entities.size()) { return false; } // Ensure the other entities and fields exist in the field map - for (Field e : other.entities){ - if(!fields.containsKey(e.getName())){ + for (Field e : other.entities) { + if (!fields.containsKey(e.getName())) { return false; } - if (!e.equals(fields.get(e.getName()))){ + if (!e.equals(fields.get(e.getName()))) { return false; } } - for (Field f : features){ - if(!fields.containsKey(f.getName())){ + for (Field f : features) { + if (!fields.containsKey(f.getName())) { return false; } - if (!f.equals(fields.get(f.getName()))){ + if (!f.equals(fields.get(f.getName()))) { return false; } } diff --git a/core/src/main/java/feast/core/model/JobInfo.java b/core/src/main/java/feast/core/model/Job.java similarity index 94% rename from core/src/main/java/feast/core/model/JobInfo.java rename to core/src/main/java/feast/core/model/Job.java index 74d3402af56..851e68367b5 100644 --- a/core/src/main/java/feast/core/model/JobInfo.java +++ b/core/src/main/java/feast/core/model/Job.java @@ -39,7 +39,7 @@ @Setter @Entity @Table(name = "jobs") -public class JobInfo extends AbstractTimestampEntity { +public class Job extends AbstractTimestampEntity { // Internal job name. Generated by feast ingestion upon invocation. @Id private String id; @@ -71,18 +71,18 @@ public class JobInfo extends AbstractTimestampEntity { private List featureSets; // Job Metrics - @OneToMany(mappedBy = "jobInfo", cascade = CascadeType.ALL) + @OneToMany(mappedBy = "job", cascade = CascadeType.ALL) private List metrics; @Enumerated(EnumType.STRING) @Column(name = "status", length = 16) private JobStatus status; - public JobInfo() { + public Job() { super(); } - public JobInfo( + public Job( String id, String extId, String runner, diff --git a/core/src/main/java/feast/core/model/Metrics.java b/core/src/main/java/feast/core/model/Metrics.java index 1e25222baff..0b7514816fa 100644 --- a/core/src/main/java/feast/core/model/Metrics.java +++ b/core/src/main/java/feast/core/model/Metrics.java @@ -41,7 +41,7 @@ public class Metrics extends AbstractTimestampEntity { @ManyToOne(fetch = FetchType.LAZY) @JoinColumn(name = "job_id") - private JobInfo jobInfo; + private Job job; /** Metrics name */ private String name; @@ -56,8 +56,8 @@ public class Metrics extends AbstractTimestampEntity { * @param metricsName metrics name. * @param value metrics value. */ - public Metrics(JobInfo job, String metricsName, double value) { - this.jobInfo = job; + public Metrics(Job job, String metricsName, double value) { + this.job = job; this.name = metricsName; this.value = value; } diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index c56531a9da7..76a1cc27dd8 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -16,30 +16,38 @@ */ package feast.core.service; -import com.google.common.base.Strings; +import com.google.protobuf.InvalidProtocolBufferException; +import feast.core.CoreServiceProto.ListFeatureSetsRequest; +import feast.core.CoreServiceProto.ListStoresRequest.Filter; +import feast.core.CoreServiceProto.ListStoresResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto; +import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.StoreProto; -import feast.core.dao.JobInfoRepository; -import feast.core.exception.JobExecutionException; -import feast.core.exception.RetrievalException; +import feast.core.StoreProto.Store.Subscription; +import feast.core.config.FeastProperties.JobUpdatesProperties; +import feast.core.dao.FeatureSetRepository; +import feast.core.dao.JobRepository; import feast.core.job.JobManager; -import feast.core.log.Action; -import feast.core.log.AuditLogger; -import feast.core.log.Resource; +import feast.core.job.JobUpdateTask; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; import feast.core.model.Source; import feast.core.model.Store; -import java.time.Instant; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -47,165 +55,153 @@ @Service public class JobCoordinatorService { - private JobInfoRepository jobInfoRepository; + private final long POLLING_INTERVAL_MILLISECONDS = 60000; // 1 min + private JobRepository jobRepository; + private FeatureSetRepository featureSetRepository; + private SpecService specService; private JobManager jobManager; + private JobUpdatesProperties jobUpdatesProperties; @Autowired - public JobCoordinatorService(JobInfoRepository jobInfoRepository, JobManager jobManager) { - this.jobInfoRepository = jobInfoRepository; + public JobCoordinatorService( + JobRepository jobRepository, + FeatureSetRepository featureSetRepository, + SpecService specService, + JobManager jobManager, + JobUpdatesProperties jobUpdatesProperties) { + this.jobRepository = jobRepository; + this.featureSetRepository = featureSetRepository; + this.specService = specService; this.jobManager = jobManager; + this.jobUpdatesProperties = jobUpdatesProperties; } /** - * Start or update a job given the list of FeatureSets to populate and the store to sink to. If - * there has been no change in the featureSet, and there is a running job for the featureSet, this - * method will do nothing. + * Poll does the following: + * + *

1) Checks DB and extracts jobs that have to run based on the specs available + * + *

2) Does a diff with the current set of jobs, starts/updates job(s) if necessary + * + *

3) Updates job object in DB with status, feature sets + * + *

4) Updates Feature set statuses */ @Transactional - public JobInfo startOrUpdateJob( - List featureSetSpecs, SourceProto.Source sourceSpec, StoreProto.Store store) { - Source source = Source.fromProto(sourceSpec); - Optional job = getJob(source.getId(), store.getName()); - if (job.isPresent()) { - Set existingFeatureSetsPopulatedByJob = - job.get().getFeatureSets().stream().map(FeatureSet::getId).collect(Collectors.toSet()); - Set newFeatureSetsPopulatedByJob = + @Scheduled(fixedDelay = POLLING_INTERVAL_MILLISECONDS) + public void Poll() { + log.info("Polling for new jobs..."); + List jobUpdateTasks = new ArrayList<>(); + ListStoresResponse listStoresResponse = specService.listStores(Filter.newBuilder().build()); + for (StoreProto.Store store : listStoresResponse.getStoreList()) { + Set featureSetSpecs = new HashSet<>(); + try { + for (Subscription subscription : store.getSubscriptionsList()) { + featureSetSpecs.addAll( + specService + .listFeatureSets( + ListFeatureSetsRequest.Filter.newBuilder() + .setFeatureSetName(subscription.getName()) + .setFeatureSetVersion(subscription.getVersion()) + .build()) + .getFeatureSetsList().stream() + .map(FeatureSetProto.FeatureSet::getSpec) + .collect(Collectors.toList())); + } + if (!featureSetSpecs.isEmpty()) { featureSetSpecs.stream() - .map(fs -> fs.getName() + ":" + fs.getVersion()) - .collect(Collectors.toSet()); - if (existingFeatureSetsPopulatedByJob.size() == newFeatureSetsPopulatedByJob.size() - && existingFeatureSetsPopulatedByJob.containsAll(newFeatureSetsPopulatedByJob)) { - return job.get(); - } else { - return updateJob(job.get(), featureSetSpecs, store); + .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) + .entrySet() + .stream() + .forEach( + kv -> { + Optional originalJob = + getJob(Source.fromProto(kv.getKey()), Store.fromProto(store)); + jobUpdateTasks.add( + new JobUpdateTask( + kv.getValue(), + kv.getKey(), + store, + originalJob, + jobManager, + jobUpdatesProperties.getTimeoutSeconds())); + }); + } + } catch (InvalidProtocolBufferException e) { + log.warn("Unable to retrieve feature sets for store {}: {}", store, e.getMessage()); } - } else { - return startJob( - createJobId(source.getId(), store.getName()), featureSetSpecs, sourceSpec, store); } - } - - /** Get the non-terminal job associated with the given featureSet name and store name, if any. */ - private Optional getJob(String sourceId, String storeName) { - List jobs = jobInfoRepository.findBySourceIdAndStoreName(sourceId, storeName); - if (jobs.isEmpty()) { - return Optional.empty(); + if (jobUpdateTasks.size() == 0) { + log.info("No jobs found."); + return; } - return jobs.stream() - .filter(job -> !(JobStatus.getTerminalState().contains(job.getStatus()))) - .findFirst(); - } - - /** Start or update the job to ingest data to the sink. */ - private JobInfo startJob( - String jobId, - List featureSetSpecs, - SourceProto.Source source, - StoreProto.Store sinkSpec) { - try { - AuditLogger.log( - Resource.JOB, - jobId, - Action.SUBMIT, - "Building graph and submitting to %s", - jobManager.getRunnerType().getName()); - String extId = jobManager.startJob(jobId, featureSetSpecs, sinkSpec); - if (extId.isEmpty()) { - throw new RuntimeException( - String.format("Could not submit job: \n%s", "unable to retrieve job external id")); + log.info("Creating/Updating {} jobs...", jobUpdateTasks.size()); + ExecutorService executorService = Executors.newFixedThreadPool(jobUpdateTasks.size()); + ExecutorCompletionService ecs = new ExecutorCompletionService<>(executorService); + jobUpdateTasks.forEach(ecs::submit); + + int completedTasks = 0; + while (completedTasks < jobUpdateTasks.size()) { + try { + Job job = ecs.take().get(); + if (job != null) { + jobRepository.saveAndFlush(job); + } + } catch (ExecutionException | InterruptedException e) { + log.warn("Unable to start or update job: {}", e.getMessage()); } - - AuditLogger.log( - Resource.JOB, - jobId, - Action.STATUS_CHANGE, - "Job submitted to runner %s with ext id %s.", - jobManager.getRunnerType().getName(), - extId); - - List featureSets = new ArrayList<>(); - - for (FeatureSetSpec featureSetSpec : featureSetSpecs) { - FeatureSet featureSet = new FeatureSet(); - featureSet.setId(featureSetSpec.getName() + ":" + featureSetSpec.getVersion()); - featureSets.add(featureSet); - } - - JobInfo jobInfo = - new JobInfo( - jobId, - extId, - jobManager.getRunnerType().getName(), - Source.fromProto(source), - Store.fromProto(sinkSpec), - featureSets, - JobStatus.RUNNING); - - return jobInfoRepository.save(jobInfo); - } catch (Exception e) { - updateJobStatus(jobId, JobStatus.ERROR); - AuditLogger.log( - Resource.JOB, - jobId, - Action.STATUS_CHANGE, - "Job failed to be submitted to runner %s. Job status changed to ERROR.", - jobManager.getRunnerType().getName()); - throw new JobExecutionException(String.format("Error running ingestion job: %s", e), e); + completedTasks++; } - } - /** Update the given job */ - private JobInfo updateJob( - JobInfo jobInfo, List featureSetSpecs, StoreProto.Store store) { - jobInfo.setFeatureSets( - featureSetSpecs.stream() - .map(spec -> FeatureSet.fromProto(spec)) - .collect(Collectors.toList())); - jobInfo.setStore(Store.fromProto(store)); - String extId = jobManager.updateJob(jobInfo); - jobInfo.setExtId(extId); - return jobInfoRepository.save(jobInfo); + log.info("Updating feature set status"); + updateFeatureSetStatuses(jobUpdateTasks); } - /** - * Drain the given job. If this is successful, the job will start the draining process. When the - * draining process is complete, the job will be cleaned up and removed. - * - *

Batch jobs will be cancelled, as draining these jobs is not supported by beam. - * - * @param id feast-internal id of a job - */ - public void abortJob(String id) { - Optional jobOptional = jobInfoRepository.findById(id); - if (!jobOptional.isPresent()) { - throw new RetrievalException(Strings.lenientFormat("Unable to retrieve job with id %s", id)); - } - JobInfo job = jobOptional.get(); - if (JobStatus.getTerminalState().contains(job.getStatus())) { - throw new IllegalStateException("Unable to stop job already in terminal state"); + // TODO: make this more efficient + private void updateFeatureSetStatuses(List jobUpdateTasks) { + Set ready = new HashSet<>(); + Set pending = new HashSet<>(); + for (JobUpdateTask jobUpdateTask : jobUpdateTasks) { + Optional job = + getJob( + Source.fromProto(jobUpdateTask.getSourceSpec()), + Store.fromProto(jobUpdateTask.getStore())); + if (job.isPresent()) { + if (job.get().getStatus() == JobStatus.RUNNING) { + ready.addAll(job.get().getFeatureSets()); + } else { + pending.addAll(job.get().getFeatureSets()); + } + } } - jobManager.abortJob(job.getExtId()); - job.setStatus(JobStatus.ABORTING); - - AuditLogger.log(Resource.JOB, id, Action.ABORT, "Triggering draining of job"); - jobInfoRepository.saveAndFlush(job); + ready.removeAll(pending); + ready.forEach( + fs -> { + fs.setStatus(FeatureSetStatus.STATUS_READY.toString()); + featureSetRepository.save(fs); + }); + pending.forEach( + fs -> { + fs.setStatus(FeatureSetStatus.STATUS_PENDING.toString()); + featureSetRepository.save(fs); + }); + featureSetRepository.flush(); } - /** Update a given job's status */ - public void updateJobStatus(String jobId, JobStatus status) { - Optional jobRecordOptional = jobInfoRepository.findById(jobId); - if (jobRecordOptional.isPresent()) { - JobInfo jobRecord = jobRecordOptional.get(); - jobRecord.setStatus(status); - jobInfoRepository.save(jobRecord); + @Transactional + public Optional getJob(Source source, Store store) { + List jobs = + jobRepository.findBySourceIdAndStoreNameOrderByLastUpdatedDesc( + source.getId(), store.getName()); + jobs = + jobs.stream() + .filter(job -> !JobStatus.getTerminalState().contains(job.getStatus())) + .collect(Collectors.toList()); + if (jobs.size() == 0) { + return Optional.empty(); } - } - - public String createJobId(String sourceId, String storeName) { - String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); - String sourceIdTrunc = sourceId.split("/")[0].toLowerCase(); - String jobId = String.format("%s-to-%s", sourceIdTrunc, storeName) + dateSuffix; - return jobId.replaceAll("_", "-"); + // return the latest + return Optional.of(jobs.get(0)); } } diff --git a/core/src/main/java/feast/core/service/JobStatusService.java b/core/src/main/java/feast/core/service/JobStatusService.java deleted file mode 100644 index db6cd41ee8b..00000000000 --- a/core/src/main/java/feast/core/service/JobStatusService.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.service; - -import lombok.extern.slf4j.Slf4j; -import org.springframework.stereotype.Service; - -@Slf4j -@Service -public class JobStatusService { - // - // private JobInfoRepository jobInfoRepository; - // private MetricsRepository metricsRepository; - // - // @Autowired - // public JobStatusService( - // JobInfoRepository jobInfoRepository, - // MetricsRepository metricsRepository) { - // this.jobInfoRepository = jobInfoRepository; - // this.metricsRepository = metricsRepository; - // } - // - // /** - // * Lists all jobs registered to the db, sorted by provided orderBy - // * - // * @param orderBy list order - // * @return list of JobDetails - // */ - // @Transactional - // public List listJobs(Sort orderBy) { - // List jobs = jobInfoRepository.findAll(orderBy); - // return jobs.stream().map(JobInfo::getJobDetail).collect(Collectors.toList()); - // } - // - // /** - // * Lists all jobs registered to the db, sorted chronologically by creation time - // * - // * @return list of JobDetails - // */ - // @Transactional - // public List listJobs() { - // return listJobs(Sort.by(Sort.Direction.ASC, "created")); - // } - // - // /** - // * Gets information regarding a single job. - // * - // * @param id feast-internal job id - // * @return JobDetail for that job - // */ - // @Transactional - // public JobDetail getJob(String id) { - // Optional job = jobInfoRepository.findById(id); - // if (!job.isPresent()) { - // throw new RetrievalException(Strings.lenientFormat("Unable to retrieve job with id %s", - // id)); - // } - // JobDetail.Builder jobDetailBuilder = job.get().getJobDetail().toBuilder(); - // List metrics = metricsRepository.findByJobInfo_Id(id); - // for (Metrics metric : metrics) { - // jobDetailBuilder.putMetrics(metric.getName(), metric.getValue()); - // } - // return jobDetailBuilder.build(); - // } - -} diff --git a/core/src/main/java/feast/core/service/SpecService.java b/core/src/main/java/feast/core/service/SpecService.java index 4ea2d288f2e..937fc297171 100644 --- a/core/src/main/java/feast/core/service/SpecService.java +++ b/core/src/main/java/feast/core/service/SpecService.java @@ -32,6 +32,7 @@ import feast.core.CoreServiceProto.ListStoresResponse.Builder; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.StoreProto; @@ -110,8 +111,9 @@ public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) if (featureSet == null) { throw io.grpc.Status.NOT_FOUND - .withDescription(String.format("Feature set with name \"%s\" could not be found.", - request.getName())) + .withDescription( + String.format( + "Feature set with name \"%s\" could not be found.", request.getName())) .asRuntimeException(); } } else { @@ -121,13 +123,14 @@ public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) if (featureSet == null) { throw io.grpc.Status.NOT_FOUND - .withDescription(String.format("Feature set with name \"%s\" and version \"%s\" could " - + "not be found.", request.getName(), request.getVersion())) + .withDescription( + String.format( + "Feature set with name \"%s\" and version \"%s\" could " + "not be found.", + request.getName(), request.getVersion())) .asRuntimeException(); } } - // Only a single item in list, return successfully return GetFeatureSetResponse.newBuilder().setFeatureSet(featureSet.toProto()).build(); } @@ -154,7 +157,9 @@ public ListFeatureSetsResponse listFeatureSets(ListFeatureSetsRequest.Filter fil if (name.equals("")) { featureSets = featureSetRepository.findAllByOrderByNameAscVersionAsc(); } else { - featureSets = featureSetRepository.findByNameWithWildcardOrderByNameAscVersionAsc(name.replace('*', '%')); + featureSets = + featureSetRepository.findByNameWithWildcardOrderByNameAscVersionAsc( + name.replace('*', '%')); featureSets = featureSets.stream() .filter(getVersionFilter(filter.getFeatureSetVersion())) @@ -210,10 +215,11 @@ public ListStoresResponse listStores(ListStoresRequest.Filter filter) { * this method will update the incoming featureSet spec with the latest version stored in the * repository, and return that. * - * @param newFeatureSetSpec featureSet to add. + * @param newFeatureSet featureSet to add. */ - public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) + public ApplyFeatureSetResponse applyFeatureSet(FeatureSetProto.FeatureSet newFeatureSet) throws InvalidProtocolBufferException { + FeatureSetSpec newFeatureSetSpec = newFeatureSet.getSpec(); FeatureSetValidator.validateSpec(newFeatureSetSpec); List existingFeatureSets = featureSetRepository.findByName(newFeatureSetSpec.getName()); @@ -223,7 +229,7 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) } else { existingFeatureSets = Ordering.natural().reverse().sortedCopy(existingFeatureSets); FeatureSet latest = existingFeatureSets.get(0); - FeatureSet featureSet = FeatureSet.fromProto(newFeatureSetSpec); + FeatureSet featureSet = FeatureSet.fromProto(newFeatureSet); // If the featureSet remains unchanged, we do nothing. if (featureSet.equalTo(latest)) { @@ -234,7 +240,8 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetSpec newFeatureSetSpec) } newFeatureSetSpec = newFeatureSetSpec.toBuilder().setVersion(latest.getVersion() + 1).build(); } - FeatureSet featureSet = FeatureSet.fromProto(newFeatureSetSpec); + newFeatureSet = newFeatureSet.toBuilder().setSpec(newFeatureSetSpec).build(); + FeatureSet featureSet = FeatureSet.fromProto(newFeatureSet); if (newFeatureSetSpec.getSource() == SourceProto.Source.getDefaultInstance()) { featureSet.setSource(defaultSource); } diff --git a/core/src/main/resources/application.yml b/core/src/main/resources/application.yml index ed11d6c9ebf..7c0d90e5f47 100644 --- a/core/src/main/resources/application.yml +++ b/core/src/main/resources/application.yml @@ -30,6 +30,9 @@ feast: runner: DirectRunner # Key-value dict of job options to be passed to the population jobs. options: {} + updates: + # Timeout in seconds for each attempt to update or submit a new job to the runner. + timeoutSeconds: 240 metrics: # Enable metrics pushing for all ingestion jobs. enabled: false diff --git a/core/src/test/java/feast/core/grpc/CoreServiceImplTest.java b/core/src/test/java/feast/core/grpc/CoreServiceImplTest.java deleted file mode 100644 index 5a3794cc654..00000000000 --- a/core/src/test/java/feast/core/grpc/CoreServiceImplTest.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.grpc; - -import static org.hamcrest.Matchers.containsInAnyOrder; -import static org.junit.Assert.*; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; -import static org.mockito.MockitoAnnotations.initMocks; - -import com.google.protobuf.InvalidProtocolBufferException; -import feast.core.CoreServiceProto.ApplyFeatureSetRequest; -import feast.core.CoreServiceProto.ApplyFeatureSetResponse; -import feast.core.CoreServiceProto.ApplyFeatureSetResponse.Status; -import feast.core.CoreServiceProto.ListFeatureSetsRequest; -import feast.core.CoreServiceProto.ListFeatureSetsResponse; -import feast.core.CoreServiceProto.ListStoresResponse; -import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.SourceProto.KafkaSourceConfig; -import feast.core.SourceProto.Source; -import feast.core.SourceProto.SourceType; -import feast.core.StoreProto.Store; -import feast.core.StoreProto.Store.RedisConfig; -import feast.core.StoreProto.Store.StoreType; -import feast.core.StoreProto.Store.Subscription; -import feast.core.service.JobCoordinatorService; -import feast.core.service.SpecService; -import io.grpc.stub.StreamObserver; -import java.util.ArrayList; -import org.junit.Before; -import org.junit.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.ArgumentMatchers; -import org.mockito.Captor; -import org.mockito.Mock; - -public class CoreServiceImplTest { - - @Mock private JobCoordinatorService jobCoordinatorService; - - @Mock private SpecService specService; - - @Captor private ArgumentCaptor> fsListArgCaptor; - - @Before - public void setUp() { - initMocks(this); - } - - @Test - public void shouldPassCorrectListOfFeatureSetsToJobService() - throws InvalidProtocolBufferException { - CoreServiceImpl coreService = new CoreServiceImpl(specService, jobCoordinatorService); - Store store = - Store.newBuilder() - .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) - .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0")) - .build(); - FeatureSetSpec fs1Sc1 = - FeatureSetSpec.newBuilder() - .setName("feature_set") - .setVersion(1) - .setSource( - Source.newBuilder() - .setType(SourceType.KAFKA) - .setKafkaSourceConfig( - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("topic1") - .build())) - .build(); - FeatureSetSpec fs2Sc1 = - FeatureSetSpec.newBuilder() - .setName("feature_set_other") - .setVersion(1) - .setSource( - Source.newBuilder() - .setType(SourceType.KAFKA) - .setKafkaSourceConfig( - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("topic1") - .build())) - .build(); - FeatureSetSpec fs3Sc2 = - FeatureSetSpec.newBuilder() - .setName("feature_set") - .setVersion(2) - .setSource( - Source.newBuilder() - .setType(SourceType.KAFKA) - .setKafkaSourceConfig( - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("topic2") - .build())) - .build(); - when(specService.applyFeatureSet(fs1Sc1)) - .thenReturn( - ApplyFeatureSetResponse.newBuilder() - .setStatus(Status.CREATED) - .setFeatureSet(fs1Sc1) - .build()); - when(specService.listStores(ArgumentMatchers.any())) - .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); - when(specService.listFeatureSets( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName("*") - .setFeatureSetVersion(">0") - .build())) - .thenReturn( - ListFeatureSetsResponse.newBuilder() - .addFeatureSets(fs1Sc1) - .addFeatureSets(fs3Sc2) - .addFeatureSets(fs2Sc1) - .build()); - - coreService.applyFeatureSet( - ApplyFeatureSetRequest.newBuilder().setFeatureSet(fs1Sc1).build(), - new StreamObserver() { - @Override - public void onNext(ApplyFeatureSetResponse applyFeatureSetResponse) {} - - @Override - public void onError(Throwable throwable) {} - - @Override - public void onCompleted() {} - }); - - verify(jobCoordinatorService, times(1)) - .startOrUpdateJob(fsListArgCaptor.capture(), eq(fs1Sc1.getSource()), eq(store)); - - assertThat(fsListArgCaptor.getValue(), containsInAnyOrder(fs1Sc1, fs2Sc1)); - } -} diff --git a/core/src/main/java/feast/core/job/JobMonitor.java b/core/src/test/java/feast/core/job/JobMatcher.java similarity index 64% rename from core/src/main/java/feast/core/job/JobMonitor.java rename to core/src/test/java/feast/core/job/JobMatcher.java index 740f4bdb879..87be05668fa 100644 --- a/core/src/main/java/feast/core/job/JobMonitor.java +++ b/core/src/test/java/feast/core/job/JobMatcher.java @@ -16,16 +16,23 @@ */ package feast.core.job; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; +import feast.core.model.Job; +import org.mockito.ArgumentMatcher; -public interface JobMonitor { +public class JobMatcher implements ArgumentMatcher { - /** - * Get status of a job given runner-specific job ID. - * - * @param job job. - * @return job status. - */ - JobStatus getJobStatus(JobInfo job); + private Job left; + + public JobMatcher(Job job) { + this.left = job; + } + + @Override + public boolean matches(Job right) { + if (right == null) { + return false; + } + left.setId(right.getId()); + return left.equals(right); + } } diff --git a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java new file mode 100644 index 00000000000..a1b4cdbab2d --- /dev/null +++ b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java @@ -0,0 +1,274 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.job; + +import static org.hamcrest.core.Is.is; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertThat; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; +import static org.mockito.MockitoAnnotations.initMocks; + +import feast.core.FeatureSetProto; +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; +import feast.core.StoreProto; +import feast.core.StoreProto.Store.RedisConfig; +import feast.core.StoreProto.Store.StoreType; +import feast.core.StoreProto.Store.Subscription; +import feast.core.model.FeatureSet; +import feast.core.model.Job; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; +import java.util.Arrays; +import java.util.Optional; +import org.hamcrest.core.IsNull; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; + +public class JobUpdateTaskTest { + + @Mock private JobManager jobManager; + + private StoreProto.Store store; + private SourceProto.Source source; + + @Before + public void setUp() { + initMocks(this); + store = + StoreProto.Store.newBuilder() + .setName("test") + .setType(StoreType.REDIS) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .build(); + + source = + SourceProto.Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) + .build(); + } + + @Test + public void shouldUpdateJobIfPresent() { + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); + FeatureSetProto.FeatureSet featureSet2 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet2").setVersion(1).setSource(source)) + .build(); + Job originalJob = + new Job( + "job", + "old_ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.RUNNING); + JobUpdateTask jobUpdateTask = + new JobUpdateTask( + Arrays.asList(featureSet1.getSpec(), featureSet2.getSpec()), + source, + store, + Optional.of(originalJob), + jobManager, + 100L); + Job submittedJob = + new Job( + "job", + "old_ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + JobStatus.RUNNING); + + Job expected = + new Job( + "job", + "new_ext", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + JobStatus.PENDING); + when(jobManager.updateJob(submittedJob)).thenReturn(expected); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + Job actual = jobUpdateTask.call(); + + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldCreateJobIfNotPresent() { + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); + JobUpdateTask jobUpdateTask = + spy( + new JobUpdateTask( + Arrays.asList(featureSet1.getSpec()), + source, + store, + Optional.empty(), + jobManager, + 100L)); + doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); + + Job expectedInput = + new Job( + "job", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.PENDING); + + Job expected = + new Job( + "job", + "ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.RUNNING); + + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + when(jobManager.startJob(expectedInput)).thenReturn(expected); + + Job actual = jobUpdateTask.call(); + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldUpdateJobStatusIfNotCreateOrUpdate() { + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); + Job originalJob = + new Job( + "job", + "ext", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.RUNNING); + JobUpdateTask jobUpdateTask = + new JobUpdateTask( + Arrays.asList(featureSet1.getSpec()), + source, + store, + Optional.of(originalJob), + jobManager, + 100L); + + when(jobManager.getJobStatus(originalJob)).thenReturn(JobStatus.ABORTING); + Job expected = + new Job( + "job", + "ext", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.ABORTING); + Job actual = jobUpdateTask.call(); + + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + .build(); + JobUpdateTask jobUpdateTask = + spy( + new JobUpdateTask( + Arrays.asList(featureSet1.getSpec()), + source, + store, + Optional.empty(), + jobManager, + 100L)); + doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); + + Job expectedInput = + new Job( + "job", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.PENDING); + + Job expected = + new Job( + "job", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.ERROR); + + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + when(jobManager.startJob(expectedInput)) + .thenThrow(new RuntimeException("Something went wrong")); + + Job actual = jobUpdateTask.call(); + assertThat(actual, equalTo(expected)); + } + + @Test + public void shouldTimeout() { + FeatureSetSpec featureSet1 = + FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + JobUpdateTask jobUpdateTask = + spy( + new JobUpdateTask( + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 0L)); + Job actual = jobUpdateTask.call(); + assertThat(actual, is(IsNull.nullValue())); + } +} diff --git a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java b/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java deleted file mode 100644 index 24d1747ce49..00000000000 --- a/core/src/test/java/feast/core/job/ScheduledJobMonitorTest.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job; - -import static org.hamcrest.core.IsEqual.equalTo; -import static org.junit.Assert.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import feast.core.SourceProto.KafkaSourceConfig; -import feast.core.SourceProto.SourceType; -import feast.core.dao.JobInfoRepository; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import feast.core.model.Source; -import feast.core.model.Store; -import java.util.Collection; -import java.util.Collections; -import org.junit.Before; -import org.junit.Test; -import org.mockito.ArgumentCaptor; -import org.mockito.Mock; -import org.mockito.MockitoAnnotations; - -public class ScheduledJobMonitorTest { - - ScheduledJobMonitor scheduledJobMonitor; - - @Mock JobMonitor jobMonitor; - - @Mock JobInfoRepository jobInfoRepository; - - @Before - public void setUp() { - MockitoAnnotations.initMocks(this); - scheduledJobMonitor = new ScheduledJobMonitor(jobMonitor, jobInfoRepository); - } - - @Test - public void getJobStatus_shouldUpdateJobInfoForRunningJob() { - Source source = - new Source( - SourceType.KAFKA, - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("feast-topic") - .build(), - true); - JobInfo job = - new JobInfo( - "jobId", - "extId1", - "DataflowRunner", - source, - new Store(), - Collections.emptyList(), - Collections.emptyList(), - JobStatus.RUNNING); - - when(jobInfoRepository.findByStatusNotIn((Collection) any(Collection.class))) - .thenReturn(Collections.singletonList(job)); - when(jobMonitor.getJobStatus(job)).thenReturn(JobStatus.COMPLETED); - - scheduledJobMonitor.updateJobStatus(); - - ArgumentCaptor argCaptor = ArgumentCaptor.forClass(JobInfo.class); - verify(jobInfoRepository).save(argCaptor.capture()); - - JobInfo jobInfos = argCaptor.getValue(); - assertThat(jobInfos.getStatus(), equalTo(JobStatus.COMPLETED)); - } - - @Test - public void getJobStatus_shouldNotUpdateJobInfoForTerminalJob() { - when(jobInfoRepository.findByStatusNotIn((Collection) any(Collection.class))) - .thenReturn(Collections.emptyList()); - - scheduledJobMonitor.updateJobStatus(); - - verify(jobInfoRepository, never()).save(any(JobInfo.class)); - } -} diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java index c2c47a8d035..5f72f0dd7a6 100644 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java +++ b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java @@ -28,14 +28,26 @@ import com.google.api.services.dataflow.Dataflow; import com.google.common.collect.Lists; +import com.google.protobuf.Duration; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; +import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.MetricsProperties; import feast.core.exception.JobExecutionException; +import feast.core.job.Runner; +import feast.core.model.FeatureSet; +import feast.core.model.Job; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; import feast.ingestion.options.ImportOptions; import java.io.IOException; import java.util.HashMap; @@ -80,10 +92,26 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .build(); + + SourceProto.Source source = + SourceProto.Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) .build(); FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); + FeatureSetSpec.newBuilder() + .setName("featureSet") + .setVersion(1) + .setSource(source) + .setMaxAge(Duration.newBuilder().build()) + .build(); Printer printer = JsonFormat.printer(); String expectedExtJobId = "feast-job-0"; @@ -108,7 +136,18 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { when(mockPipelineResult.getJobId()).thenReturn(expectedExtJobId); doReturn(mockPipelineResult).when(dfJobManager).runPipeline(any()); - String jobId = dfJobManager.startJob(jobName, Lists.newArrayList(featureSetSpec), store); + Job job = + new Job( + jobName, + "", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Lists.newArrayList( + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + JobStatus.PENDING); + Job actual = dfJobManager.startJob(job); verify(dfJobManager, times(1)).runPipeline(captor.capture()); ImportOptions actualPipelineOptions = captor.getValue(); @@ -129,7 +168,7 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { expectedPipelineOptions.setFilesToStage(actualPipelineOptions.getFilesToStage()); assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); - assertThat(jobId, equalTo(expectedExtJobId)); + assertThat(actual.getExtId(), equalTo(expectedExtJobId)); } @Test @@ -141,8 +180,18 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) .build(); + SourceProto.Source source = + SourceProto.Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) + .build(); + FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); + FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).setSource(source).build(); dfJobManager = Mockito.spy(dfJobManager); @@ -151,7 +200,19 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { doReturn(mockPipelineResult).when(dfJobManager).runPipeline(any()); + Job job = + new Job( + "job", + "", + Runner.DATAFLOW.getName(), + Source.fromProto(source), + Store.fromProto(store), + Lists.newArrayList( + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + JobStatus.PENDING); + expectedException.expect(JobExecutionException.class); - dfJobManager.startJob("job", Lists.newArrayList(featureSetSpec), store); + dfJobManager.startJob(job); } } diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java deleted file mode 100644 index 1311fcbdfc4..00000000000 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobMonitorTest.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.core.job.dataflow; - -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.google.api.services.dataflow.Dataflow; -import com.google.api.services.dataflow.Dataflow.Projects; -import com.google.api.services.dataflow.Dataflow.Projects.Locations; -import com.google.api.services.dataflow.Dataflow.Projects.Locations.Jobs; -import com.google.api.services.dataflow.Dataflow.Projects.Locations.Jobs.Get; -import com.google.api.services.dataflow.model.Job; -import com.google.common.collect.Lists; -import feast.core.job.Runner; -import feast.core.model.JobInfo; -import feast.core.model.JobStatus; -import feast.types.FieldProto.Field; -import feast.types.ValueProto.BoolList; -import feast.types.ValueProto.Value; -import java.io.IOException; -import org.junit.Before; -import org.junit.Test; - -public class DataflowJobMonitorTest { - - private DataflowJobMonitor monitor; - private String location; - private String projectId; - private Jobs jobService; - - @Before - public void setUp() throws Exception { - projectId = "myProject"; - location = "asia-east1"; - Dataflow dataflow = mock(Dataflow.class); - Dataflow.Projects projects = mock(Projects.class); - Dataflow.Projects.Locations locations = mock(Locations.class); - jobService = mock(Jobs.class); - when(dataflow.projects()).thenReturn(projects); - when(projects.locations()).thenReturn(locations); - when(locations.jobs()).thenReturn(jobService); - - monitor = new DataflowJobMonitor(dataflow, projectId, location); - } - - @Test - public void getJobStatus_shouldReturnCorrectJobStatusForValidDataflowJobState() - throws IOException { - String jobId = "myJobId"; - - Get getOp = mock(Get.class); - Job job = mock(Job.class); - when(getOp.execute()).thenReturn(job); - when(job.getCurrentState()).thenReturn(DataflowJobState.JOB_STATE_RUNNING.toString()); - when(jobService.get(projectId, location, jobId)).thenReturn(getOp); - - JobInfo jobInfo = mock(JobInfo.class); - when(jobInfo.getExtId()).thenReturn(jobId); - when(jobInfo.getRunner()).thenReturn(Runner.DATAFLOW.getName()); - assertThat(monitor.getJobStatus(jobInfo), equalTo(JobStatus.RUNNING)); - } - - @Test - public void getJobStatus_shouldReturnUnknownStateForInvalidDataflowJobState() throws IOException { - String jobId = "myJobId"; - - Get getOp = mock(Get.class); - Job job = mock(Job.class); - when(getOp.execute()).thenReturn(job); - when(job.getCurrentState()).thenReturn("Random String"); - when(jobService.get(projectId, location, jobId)).thenReturn(getOp); - - JobInfo jobInfo = mock(JobInfo.class); - when(jobInfo.getExtId()).thenReturn(jobId); - when(jobInfo.getRunner()).thenReturn(Runner.DATAFLOW.getName()); - assertThat(monitor.getJobStatus(jobInfo), equalTo(JobStatus.UNKNOWN)); - } - - @Test - public void getJobStatus_shouldReturnUnknownStateWhenExceptionHappen() throws IOException { - String jobId = "myJobId"; - - when(jobService.get(projectId, location, jobId)) - .thenThrow(new RuntimeException("some thing wrong")); - - JobInfo jobInfo = mock(JobInfo.class); - when(jobInfo.getExtId()).thenReturn(jobId); - when(jobInfo.getRunner()).thenReturn(Runner.DATAFLOW.getName()); - assertThat(monitor.getJobStatus(jobInfo), equalTo(JobStatus.UNKNOWN)); - } - - @Test - public void test() { - Field field = - Field.newBuilder() - .setName("Hello") - .setValue( - Value.newBuilder() - .setBoolListVal( - BoolList.newBuilder() - .addAllVal(Lists.newArrayList(true, false, true, true)) - .build())) - .build(); - field.getName(); - } -} diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index f78060269c5..73cbd9030f2 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -26,13 +26,25 @@ import static org.mockito.MockitoAnnotations.initMocks; import com.google.common.collect.Lists; +import com.google.protobuf.Duration; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.SourceProto; +import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; +import feast.core.StoreProto.Store.Subscription; import feast.core.config.FeastProperties.MetricsProperties; +import feast.core.job.Runner; +import feast.core.model.FeatureSet; +import feast.core.model.Job; +import feast.core.model.JobStatus; +import feast.core.model.Source; +import feast.core.model.Store; import feast.ingestion.options.ImportOptions; import java.io.IOException; import java.util.HashMap; @@ -74,10 +86,26 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .build(); + + SourceProto.Source source = + SourceProto.Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) .build(); FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).build(); + FeatureSetSpec.newBuilder() + .setName("featureSet") + .setVersion(1) + .setMaxAge(Duration.newBuilder()) + .setSource(source) + .build(); Printer printer = JsonFormat.printer(); @@ -100,7 +128,18 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { PipelineResult mockPipelineResult = Mockito.mock(PipelineResult.class); doReturn(mockPipelineResult).when(drJobManager).runPipeline(any()); - String jobId = drJobManager.startJob(expectedJobId, Lists.newArrayList(featureSetSpec), store); + Job job = + new Job( + expectedJobId, + "", + Runner.DIRECT.getName(), + Source.fromProto(source), + Store.fromProto(store), + Lists.newArrayList( + FeatureSet.fromProto( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + JobStatus.PENDING); + Job actual = drJobManager.startJob(job); verify(drJobManager, times(1)).runPipeline(pipelineOptionsCaptor.capture()); verify(directJobRegistry, times(1)).add(directJobCaptor.capture()); @@ -112,7 +151,7 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { assertThat(actualPipelineOptions.toString(), equalTo(expectedPipelineOptions.toString())); assertThat(jobStarted.getPipelineResult(), equalTo(mockPipelineResult)); assertThat(jobStarted.getJobId(), equalTo(expectedJobId)); - assertThat(jobId, equalTo(expectedJobId)); + assertThat(actual.getExtId(), equalTo(expectedJobId)); } @Test diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 9bc641f92b0..5b892d30aa7 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -16,179 +16,274 @@ */ package feast.core.service; +import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; -import static org.mockito.Mockito.spy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; -import com.google.common.collect.Lists; +import com.google.protobuf.InvalidProtocolBufferException; +import feast.core.CoreServiceProto.ListFeatureSetsRequest.Filter; +import feast.core.CoreServiceProto.ListFeatureSetsResponse; +import feast.core.CoreServiceProto.ListStoresResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto.KafkaSourceConfig; +import feast.core.SourceProto.Source; import feast.core.SourceProto.SourceType; import feast.core.StoreProto; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; -import feast.core.dao.JobInfoRepository; +import feast.core.StoreProto.Store.Subscription; +import feast.core.config.FeastProperties.JobUpdatesProperties; +import feast.core.dao.FeatureSetRepository; +import feast.core.dao.JobRepository; import feast.core.job.JobManager; +import feast.core.job.JobMatcher; import feast.core.job.Runner; import feast.core.model.FeatureSet; -import feast.core.model.JobInfo; +import feast.core.model.Job; import feast.core.model.JobStatus; -import feast.core.model.Source; -import feast.core.model.Store; +import java.util.Arrays; +import java.util.List; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import org.mockito.ArgumentCaptor; import org.mockito.Mock; public class JobCoordinatorServiceTest { @Rule public final ExpectedException exception = ExpectedException.none(); - @Mock JobInfoRepository jobInfoRepository; + @Mock + JobRepository jobRepository; @Mock JobManager jobManager; + @Mock SpecService specService; + @Mock FeatureSetRepository featureSetRepository; - private JobCoordinatorService jobCoordinatorService; - private JobInfo existingJob; - private Source defaultSource; + private JobUpdatesProperties jobUpdatesProperties; @Before public void setUp() { initMocks(this); + jobUpdatesProperties = new JobUpdatesProperties(); + jobUpdatesProperties.setTimeoutSeconds(5); + } - Store store = - Store.fromProto( - StoreProto.Store.newBuilder() - .setName("SERVING") - .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) - .build()); - defaultSource = - new Source( - SourceType.KAFKA, - KafkaSourceConfig.newBuilder() - .setBootstrapServers("kafka:9092") - .setTopic("feast-topic") - .build(), - true); - FeatureSet featureSet1 = new FeatureSet(); - featureSet1.setId("featureSet1:1"); - featureSet1.setSource(defaultSource); - FeatureSet featureSet2 = new FeatureSet(); - featureSet2.setId("featureSet2:1"); - featureSet2.setSource(defaultSource); - existingJob = - new JobInfo( - "extid", - "name", - "DirectRunner", - defaultSource, - store, - Lists.newArrayList(featureSet1, featureSet2), - Lists.newArrayList(), - JobStatus.RUNNING); - when(jobInfoRepository.findBySourceIdAndStoreName(defaultSource.getId(), "SERVING")) - .thenReturn(Lists.newArrayList(existingJob)); - - jobCoordinatorService = new JobCoordinatorService(jobInfoRepository, jobManager); - jobCoordinatorService = spy(jobCoordinatorService); + @Test + public void shouldDoNothingIfNoStoresFound() { + when(specService.listStores(any())).thenReturn(ListStoresResponse.newBuilder().build()); + JobCoordinatorService jcs = + new JobCoordinatorService( + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jcs.Poll(); + verify(jobRepository, times(0)).saveAndFlush(any()); } @Test - public void shouldNotStartOrUpdateJobIfNoChanges() { - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder() - .setName("featureSet1") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); - FeatureSetSpec featureSet2 = - FeatureSetSpec.newBuilder() - .setName("featureSet2") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); + public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocolBufferException { StoreProto.Store store = StoreProto.Store.newBuilder() - .setName("SERVING") + .setName("test") .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) .build(); - JobInfo jobInfo = - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSet1, featureSet2), defaultSource.toProto(), store); - assertThat(jobInfo, equalTo(existingJob)); + when(specService.listStores(any())) + .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); + when(specService.listFeatureSets( + Filter.newBuilder().setFeatureSetName("*").setFeatureSetVersion(">0").build())) + .thenReturn(ListFeatureSetsResponse.newBuilder().build()); + JobCoordinatorService jcs = + new JobCoordinatorService( + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jcs.Poll(); + verify(jobRepository, times(0)).saveAndFlush(any()); } @Test - public void shouldStartJobIfNotExists() { - FeatureSetSpec featureSet = - FeatureSetSpec.newBuilder() - .setName("featureSet") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); + public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferException { StoreProto.Store store = StoreProto.Store.newBuilder() - .setName("SERVING") + .setName("test") .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions( + Subscription.newBuilder().setName("features").setVersion(">0").build()) + .build(); + Source source = + Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) .build(); - String jobId = "featureSet-to-SERVING"; - String extJobId = "extId123"; - when(jobCoordinatorService.createJobId("featureSet", "SERVING")).thenReturn(jobId); - when(jobManager.startJob(jobId, Lists.newArrayList(featureSet), store)).thenReturn(extJobId); - when(jobManager.getRunnerType()).thenReturn(Runner.DIRECT); - FeatureSet expectedFeatureSet = new FeatureSet(); - expectedFeatureSet.setId("featureSet:1"); - JobInfo expectedJobInfo = - new JobInfo( - jobId, - extJobId, - "DirectRunner", - defaultSource, - Store.fromProto(store), - Lists.newArrayList(expectedFeatureSet), + + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source)) + .build(); + FeatureSetProto.FeatureSet featureSet2 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source)) + .build(); + String extId = "ext"; + ArgumentCaptor jobArgCaptor = ArgumentCaptor.forClass(Job.class); + + Job expectedInput = + new Job( + "", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), + JobStatus.PENDING); + + Job expected = + new Job( + "some_id", + extId, + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1), FeatureSet.fromProto(featureSet2)), JobStatus.RUNNING); - when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); - JobInfo jobInfo = - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSet), defaultSource.toProto(), store); - assertThat(jobInfo, equalTo(expectedJobInfo)); + + when(specService.listFeatureSets( + Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) + .thenReturn( + ListFeatureSetsResponse.newBuilder() + .addFeatureSets(featureSet1) + .addFeatureSets(featureSet2) + .build()); + when(specService.listStores(any())) + .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); + + when(jobManager.startJob(argThat(new JobMatcher(expectedInput)))).thenReturn(expected); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + + JobCoordinatorService jcs = + new JobCoordinatorService( + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jcs.Poll(); + verify(jobRepository, times(1)).saveAndFlush(jobArgCaptor.capture()); + Job actual = jobArgCaptor.getValue(); + assertThat(actual, equalTo(expected)); } @Test - public void shouldUpdateJobIfAlreadyExistsButThereIsAChange() { - FeatureSetSpec featureSet = - FeatureSetSpec.newBuilder() - .setName("featureSet1") - .setVersion(1) - .setSource(defaultSource.toProto()) - .build(); + public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { StoreProto.Store store = StoreProto.Store.newBuilder() - .setName("SERVING") + .setName("test") .setType(StoreType.REDIS) - .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) + .setRedisConfig(RedisConfig.newBuilder().build()) + .addSubscriptions( + Subscription.newBuilder().setName("features").setVersion(">0").build()) + .build(); + Source source1 = + Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("servers:9092") + .build()) + .build(); + Source source2 = + Source.newBuilder() + .setType(SourceType.KAFKA) + .setKafkaSourceConfig( + KafkaSourceConfig.newBuilder() + .setTopic("topic") + .setBootstrapServers("other.servers:9092") + .build()) + .build(); + + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source1)) .build(); - String extId = "extId123"; - JobInfo modifiedJob = - new JobInfo( - existingJob.getId(), - existingJob.getExtId(), - existingJob.getRunner(), - defaultSource, - Store.fromProto(store), - Lists.newArrayList(FeatureSet.fromProto(featureSet)), + FeatureSetProto.FeatureSet featureSet2 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source2)) + .build(); + + Job expectedInput1 = + new Job( + "name1", + "", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source1), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), + JobStatus.PENDING); + + Job expected1 = + new Job( + "name1", + "extId1", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source1), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet1)), JobStatus.RUNNING); - when(jobManager.updateJob(modifiedJob)).thenReturn(extId); - JobInfo expectedJobInfo = modifiedJob; - expectedJobInfo.setExtId(extId); - when(jobInfoRepository.save(expectedJobInfo)).thenReturn(expectedJobInfo); - JobInfo jobInfo = - jobCoordinatorService.startOrUpdateJob( - Lists.newArrayList(featureSet), defaultSource.toProto(), store); - assertThat(jobInfo, equalTo(expectedJobInfo)); + + Job expectedInput2 = + new Job( + "", + "extId2", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source2), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet2)), + JobStatus.PENDING); + + Job expected2 = + new Job( + "name2", + "extId2", + Runner.DATAFLOW.getName(), + feast.core.model.Source.fromProto(source2), + feast.core.model.Store.fromProto(store), + Arrays.asList(FeatureSet.fromProto(featureSet2)), + JobStatus.RUNNING); + ArgumentCaptor jobArgCaptor = ArgumentCaptor.forClass(Job.class); + + when(specService.listFeatureSets( + Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) + .thenReturn( + ListFeatureSetsResponse.newBuilder() + .addFeatureSets(featureSet1) + .addFeatureSets(featureSet2) + .build()); + when(specService.listStores(any())) + .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); + + when(jobManager.startJob(argThat(new JobMatcher(expectedInput1)))).thenReturn(expected1); + when(jobManager.startJob(argThat(new JobMatcher(expectedInput2)))).thenReturn(expected2); + when(jobManager.getRunnerType()).thenReturn(Runner.DATAFLOW); + + JobCoordinatorService jcs = + new JobCoordinatorService( + jobRepository, featureSetRepository, specService, jobManager, jobUpdatesProperties); + jcs.Poll(); + + verify(jobRepository, times(2)).saveAndFlush(jobArgCaptor.capture()); + List actual = jobArgCaptor.getAllValues(); + + assertThat(actual.get(0), equalTo(expected1)); + assertThat(actual.get(1), equalTo(expected2)); } } diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index a11adf022b9..dbf1290fb61 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -37,6 +37,7 @@ import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.FeatureSetProto.FeatureSpec; import feast.core.SourceProto.KafkaSourceConfig; import feast.core.SourceProto.SourceType; @@ -53,6 +54,8 @@ import feast.core.model.Store; import feast.types.ValueProto.ValueType.Enum; import io.grpc.StatusRuntimeException; +import java.sql.Date; +import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -68,14 +71,11 @@ public class SpecServiceTest { - @Mock - private FeatureSetRepository featureSetRepository; + @Mock private FeatureSetRepository featureSetRepository; - @Mock - private StoreRepository storeRepository; + @Mock private StoreRepository storeRepository; - @Rule - public final ExpectedException expectedException = ExpectedException.none(); + @Rule public final ExpectedException expectedException = ExpectedException.none(); private SpecService specService; private List featureSets; @@ -102,11 +102,18 @@ public void setUp() { Field f3f1 = new Field("f3", "f3f1", Enum.INT64); Field f3f2 = new Field("f3", "f3f2", Enum.INT64); Field f3e1 = new Field("f3", "f3e1", Enum.STRING); - FeatureSet featureSet3v1 = new FeatureSet( - "f3", 1, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource); - - featureSets = Arrays - .asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1, featureSet3v1); + FeatureSet featureSet3v1 = + new FeatureSet( + "f3", + 1, + 100L, + Arrays.asList(f3e1), + Arrays.asList(f3f2, f3f1), + defaultSource, + FeatureSetStatus.STATUS_READY); + + featureSets = + Arrays.asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1, featureSet3v1); when(featureSetRepository.findAll()).thenReturn(featureSets); when(featureSetRepository.findAllByOrderByNameAscVersionAsc()).thenReturn(featureSets); when(featureSetRepository.findByName("f1")).thenReturn(featureSets.subList(0, 3)); @@ -133,9 +140,9 @@ public void setUp() { public void shouldGetAllFeatureSetsIfNoFilterProvided() throws InvalidProtocolBufferException { ListFeatureSetsResponse actual = specService.listFeatureSets(Filter.newBuilder().setFeatureSetName("").build()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet featureSet : featureSets) { - FeatureSetSpec toProto = featureSet.toProto(); + FeatureSetProto.FeatureSet toProto = featureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -150,9 +157,9 @@ public void shouldGetAllFeatureSetsMatchingNameIfNoVersionProvided() specService.listFeatureSets(Filter.newBuilder().setFeatureSetName("f1").build()); List expectedFeatureSets = featureSets.stream().filter(fs -> fs.getName().equals("f1")).collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -169,9 +176,9 @@ public void shouldGetAllFeatureSetsMatchingNameWithWildcardSearch() featureSets.stream() .filter(fs -> fs.getName().startsWith("f")) .collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -190,9 +197,9 @@ public void shouldGetAllFeatureSetsMatchingVersionIfNoComparator() .filter(fs -> fs.getName().equals("f1")) .filter(fs -> fs.getVersion() == 1) .collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -211,9 +218,9 @@ public void shouldGetAllFeatureSetsGivenVersionWithComparator() .filter(fs -> fs.getName().equals("f1")) .filter(fs -> fs.getVersion() > 1) .collect(Collectors.toList()); - List list = new ArrayList<>(); + List list = new ArrayList<>(); for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetSpec toProto = expectedFeatureSet.toProto(); + FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); list.add(toProto); } ListFeatureSetsResponse expected = @@ -302,10 +309,11 @@ public void shouldThrowRetrievalExceptionIfNoStoresFoundWithName() { @Test public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHasNotChanged() throws InvalidProtocolBufferException { - FeatureSetSpec incomingFeatureSet = - featureSets.get(2).toProto().toBuilder().clearVersion().build(); + FeatureSetSpec incomingFeatureSetSpec = + featureSets.get(2).toProto().getSpec().toBuilder().clearVersion().build(); ApplyFeatureSetResponse applyFeatureSetResponse = - specService.applyFeatureSet(incomingFeatureSet); + specService.applyFeatureSet( + FeatureSetProto.FeatureSet.newBuilder().setSpec(incomingFeatureSetSpec).build()); verify(featureSetRepository, times(0)).save(ArgumentMatchers.any(FeatureSet.class)); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.NO_CHANGE)); @@ -316,15 +324,17 @@ public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHa public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() throws InvalidProtocolBufferException { when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); - FeatureSetSpec incomingFeatureSet = - newDummyFeatureSet("f2", 1).toProto().toBuilder().clearVersion().build(); + FeatureSetSpec incomingFeatureSetSpec = + newDummyFeatureSet("f2", 1).toProto().getSpec().toBuilder().clearVersion().build(); + ApplyFeatureSetResponse applyFeatureSetResponse = - specService.applyFeatureSet(incomingFeatureSet); + specService.applyFeatureSet( + FeatureSetProto.FeatureSet.newBuilder().setSpec(incomingFeatureSetSpec).build()); verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); FeatureSetSpec expected = - incomingFeatureSet.toBuilder().setVersion(1).setSource(defaultSource.toProto()).build(); + incomingFeatureSetSpec.toBuilder().setVersion(1).setSource(defaultSource.toProto()).build(); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); - assertThat(applyFeatureSetResponse.getFeatureSet(), equalTo(expected)); + assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); } @Test @@ -334,6 +344,7 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() featureSets .get(2) .toProto() + .getSpec() .toBuilder() .clearVersion() .addFeatures(FeatureSpec.newBuilder().setName("feature2").setValueType(Enum.STRING)) @@ -341,13 +352,13 @@ public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() FeatureSetSpec expected = incomingFeatureSet.toBuilder().setVersion(4).setSource(defaultSource.toProto()).build(); ApplyFeatureSetResponse applyFeatureSetResponse = - specService.applyFeatureSet(incomingFeatureSet); + specService.applyFeatureSet( + FeatureSetProto.FeatureSet.newBuilder().setSpec(expected).build()); verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); - assertThat(applyFeatureSetResponse.getFeatureSet(), equalTo(expected)); + assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); } - @Test public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() throws InvalidProtocolBufferException { @@ -355,20 +366,32 @@ public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() Field f3f1 = new Field("f3", "f3f1", Enum.INT64); Field f3f2 = new Field("f3", "f3f2", Enum.INT64); Field f3e1 = new Field("f3", "f3e1", Enum.STRING); - FeatureSetProto.FeatureSetSpec incomingFeatureSet = (new FeatureSet( - "f3", 5, 100L, Arrays.asList(f3e1), Arrays.asList(f3f2, f3f1), defaultSource)).toProto(); - - FeatureSetSpec expected = incomingFeatureSet; + FeatureSetProto.FeatureSet incomingFeatureSet = + (new FeatureSet( + "f3", + 5, + 100L, + Arrays.asList(f3e1), + Arrays.asList(f3f2, f3f1), + defaultSource, + FeatureSetStatus.STATUS_READY)) + .toProto(); + + FeatureSetProto.FeatureSet expected = incomingFeatureSet; ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet(incomingFeatureSet); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.NO_CHANGE)); - assertThat(applyFeatureSetResponse.getFeatureSet().getMaxAge(), equalTo(expected.getMaxAge())); - assertThat(applyFeatureSetResponse.getFeatureSet().getEntities(0), - equalTo(expected.getEntities(0))); - assertThat(applyFeatureSetResponse.getFeatureSet().getName(), equalTo(expected.getName())); + assertThat( + applyFeatureSetResponse.getFeatureSet().getSpec().getMaxAge(), + equalTo(expected.getSpec().getMaxAge())); + assertThat( + applyFeatureSetResponse.getFeatureSet().getSpec().getEntities(0), + equalTo(expected.getSpec().getEntities(0))); + assertThat( + applyFeatureSetResponse.getFeatureSet().getSpec().getName(), + equalTo(expected.getSpec().getName())); } - @Test public void shouldUpdateStoreIfConfigChanges() throws InvalidProtocolBufferException { when(storeRepository.findById("SERVING")).thenReturn(Optional.of(stores.get(0))); @@ -410,8 +433,17 @@ public void shouldDoNothingIfNoChange() throws InvalidProtocolBufferException { private FeatureSet newDummyFeatureSet(String name, int version) { Field feature = new Field(name, "feature", Enum.INT64); Field entity = new Field(name, "entity", Enum.STRING); - return new FeatureSet( - name, version, 100L, Arrays.asList(entity), Arrays.asList(feature), defaultSource); + FeatureSet fs = + new FeatureSet( + name, + version, + 100L, + Arrays.asList(entity), + Arrays.asList(feature), + defaultSource, + FeatureSetStatus.STATUS_READY); + fs.setCreated(Date.from(Instant.ofEpochSecond(10L))); + return fs; } private Store newDummyStore(String name) { diff --git a/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java b/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java index 6f697f1c6fd..2e3a0a5ddee 100644 --- a/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java +++ b/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java @@ -50,7 +50,6 @@ import org.apache.beam.sdk.values.PCollection; import org.apache.beam.sdk.values.PDone; import org.apache.beam.sdk.values.TypeDescriptors; -import org.apache.beam.sdk.values.ValueInSingleWindow; import org.slf4j.Logger; @AutoValue @@ -61,8 +60,8 @@ public abstract class WriteToStore extends PTransform, P public static final String METRIC_NAMESPACE = "WriteToStore"; public static final String ELEMENTS_WRITTEN_METRIC = "elements_written"; - private static final Counter elementsWritten = Metrics - .counter(METRIC_NAMESPACE, ELEMENTS_WRITTEN_METRIC); + private static final Counter elementsWritten = + Metrics.counter(METRIC_NAMESPACE, ELEMENTS_WRITTEN_METRIC); public abstract Store getStore(); @@ -151,11 +150,14 @@ public void processElement(ProcessContext context) { break; } - input.apply("IncrementWriteToStoreElementsWrittenCounter", - MapElements.into(TypeDescriptors.booleans()).via((FeatureRow row) -> { - elementsWritten.inc(); - return true; - })); + input.apply( + "IncrementWriteToStoreElementsWrittenCounter", + MapElements.into(TypeDescriptors.booleans()) + .via( + (FeatureRow row) -> { + elementsWritten.inc(); + return true; + })); return PDone.in(input.getPipeline()); } diff --git a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java index bd034341ec9..4a09bee82ff 100644 --- a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java +++ b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java @@ -170,12 +170,14 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() Map expected = new HashMap<>(); LOGGER.info("Generating test data ..."); - IntStream.range(0, IMPORT_JOB_SAMPLE_FEATURE_ROW_SIZE).forEach(i -> { - FeatureRow randomRow = TestUtil.createRandomFeatureRow(spec); - RedisKey redisKey = TestUtil.createRedisKey(spec, randomRow); - input.add(randomRow); - expected.put(redisKey, randomRow); - }); + IntStream.range(0, IMPORT_JOB_SAMPLE_FEATURE_ROW_SIZE) + .forEach( + i -> { + FeatureRow randomRow = TestUtil.createRandomFeatureRow(spec); + RedisKey redisKey = TestUtil.createRedisKey(spec, randomRow); + input.add(randomRow); + expected.put(redisKey, randomRow); + }); LOGGER.info("Starting Import Job with the following options: {}", options.toString()); PipelineResult pipelineResult = ImportJob.runPipeline(options); @@ -183,43 +185,50 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() Assert.assertEquals(pipelineResult.getState(), State.RUNNING); LOGGER.info("Publishing {} Feature Row messages to Kafka ...", input.size()); - TestUtil.publishFeatureRowsToKafka(KAFKA_BOOTSTRAP_SERVERS, KAFKA_TOPIC, input, - ByteArraySerializer.class, KAFKA_PUBLISH_TIMEOUT_SEC); - TestUtil.waitUntilAllElementsAreWrittenToStore(pipelineResult, + TestUtil.publishFeatureRowsToKafka( + KAFKA_BOOTSTRAP_SERVERS, + KAFKA_TOPIC, + input, + ByteArraySerializer.class, + KAFKA_PUBLISH_TIMEOUT_SEC); + TestUtil.waitUntilAllElementsAreWrittenToStore( + pipelineResult, Duration.standardSeconds(IMPORT_JOB_MAX_RUN_DURATION_SEC), Duration.standardSeconds(IMPORT_JOB_CHECK_INTERVAL_DURATION_SEC)); LOGGER.info("Validating the actual values written to Redis ..."); Jedis jedis = new Jedis(REDIS_HOST, REDIS_PORT); - expected.forEach((key, expectedValue) -> { - - // Ensure ingested key exists. - byte[] actualByteValue = jedis.get(key.toByteArray()); - if (actualByteValue == null) { - LOGGER.error("Key not found in Redis: " + key); - LOGGER.info("Redis INFO:"); - LOGGER.info(jedis.info()); - String randomKey = jedis.randomKey(); - if (randomKey != null) { - LOGGER.info("Sample random key, value (for debugging purpose):"); - LOGGER.info("Key: " + randomKey); - LOGGER.info("Value: " + jedis.get(randomKey)); - } - Assert.fail("Missing key in Redis."); - } - - // Ensure value is a valid serialized FeatureRow object. - FeatureRow actualValue = null; - try { - actualValue = FeatureRow.parseFrom(actualByteValue); - } catch (InvalidProtocolBufferException e) { - Assert.fail(String - .format("Actual Redis value cannot be parsed as FeatureRow, key: %s, value :%s", - key, new String(actualByteValue, StandardCharsets.UTF_8))); - } - - // Ensure the retrieved FeatureRow is equal to the ingested FeatureRow. - Assert.assertEquals(expectedValue, actualValue); - }); + expected.forEach( + (key, expectedValue) -> { + + // Ensure ingested key exists. + byte[] actualByteValue = jedis.get(key.toByteArray()); + if (actualByteValue == null) { + LOGGER.error("Key not found in Redis: " + key); + LOGGER.info("Redis INFO:"); + LOGGER.info(jedis.info()); + String randomKey = jedis.randomKey(); + if (randomKey != null) { + LOGGER.info("Sample random key, value (for debugging purpose):"); + LOGGER.info("Key: " + randomKey); + LOGGER.info("Value: " + jedis.get(randomKey)); + } + Assert.fail("Missing key in Redis."); + } + + // Ensure value is a valid serialized FeatureRow object. + FeatureRow actualValue = null; + try { + actualValue = FeatureRow.parseFrom(actualByteValue); + } catch (InvalidProtocolBufferException e) { + Assert.fail( + String.format( + "Actual Redis value cannot be parsed as FeatureRow, key: %s, value :%s", + key, new String(actualByteValue, StandardCharsets.UTF_8))); + } + + // Ensure the retrieved FeatureRow is equal to the ingested FeatureRow. + Assert.assertEquals(expectedValue, actualValue); + }); } } diff --git a/ingestion/src/test/java/feast/test/TestUtil.java b/ingestion/src/test/java/feast/test/TestUtil.java index ef41f3950a5..d66ef4a97d9 100644 --- a/ingestion/src/test/java/feast/test/TestUtil.java +++ b/ingestion/src/test/java/feast/test/TestUtil.java @@ -352,15 +352,16 @@ public static Field field(String name, Object value, ValueType.Enum valueType) { /** * This blocking method waits until an ImportJob pipeline has written all elements to the store. - *

- * The pipeline must be in the RUNNING state before calling this method. * - * @param pipelineResult result of running the Pipeline + *

The pipeline must be in the RUNNING state before calling this method. + * + * @param pipelineResult result of running the Pipeline * @param maxWaitDuration wait until this max amount of duration * @throws InterruptedException if the thread is interruped while waiting */ - public static void waitUntilAllElementsAreWrittenToStore(PipelineResult pipelineResult, - Duration maxWaitDuration, Duration checkInterval) throws InterruptedException { + public static void waitUntilAllElementsAreWrittenToStore( + PipelineResult pipelineResult, Duration maxWaitDuration, Duration checkInterval) + throws InterruptedException { if (pipelineResult.getState().isTerminal()) { return; } diff --git a/protos/feast/core/CoreService.proto b/protos/feast/core/CoreService.proto index 2e0646e9730..9a9eaa64fdd 100644 --- a/protos/feast/core/CoreService.proto +++ b/protos/feast/core/CoreService.proto @@ -72,7 +72,7 @@ message GetFeatureSetRequest { // Response containing a single feature set message GetFeatureSetResponse { - feast.core.FeatureSetSpec feature_set = 1; + feast.core.FeatureSet feature_set = 1; } // Retrieves details for all versions of a specific feature set @@ -95,7 +95,7 @@ message ListFeatureSetsRequest { } message ListFeatureSetsResponse { - repeated feast.core.FeatureSetSpec feature_sets = 1; + repeated feast.core.FeatureSet feature_sets = 1; } message ListStoresRequest { @@ -113,7 +113,7 @@ message ListStoresResponse { message ApplyFeatureSetRequest { // Feature set version and source will be ignored - feast.core.FeatureSetSpec feature_set = 1; + feast.core.FeatureSet feature_set = 1; } message ApplyFeatureSetResponse { @@ -129,7 +129,7 @@ message ApplyFeatureSetResponse { } // Feature set response has been enriched with version and source information - feast.core.FeatureSetSpec feature_set = 1; + feast.core.FeatureSet feature_set = 1; Status status = 2; } diff --git a/protos/feast/core/FeatureSet.proto b/protos/feast/core/FeatureSet.proto index a80ae36f088..a5adf139bff 100644 --- a/protos/feast/core/FeatureSet.proto +++ b/protos/feast/core/FeatureSet.proto @@ -25,6 +25,15 @@ option go_package = "github.com/gojek/feast/sdk/go/protos/feast/core"; import "feast/types/Value.proto"; import "feast/core/Source.proto"; import "google/protobuf/duration.proto"; +import "google/protobuf/timestamp.proto"; + +message FeatureSet { + // User-specified specifications of this feature set. + FeatureSetSpec spec = 1; + + // System-populated metadata for this feature set. + FeatureSetMeta meta = 2; +} message FeatureSetSpec { // Name of the featureSet. Must be unique. @@ -67,3 +76,23 @@ message FeatureSpec { // Value type of the feature. feast.types.ValueType.Enum value_type = 2; } + + +message FeatureSetMeta { + // Created timestamp of this specific feature set. + google.protobuf.Timestamp created_timestamp = 1; + + // Status of the feature set. + // Used to indicate whether the feature set is ready for consumption or ingestion. + // Currently supports 2 states: + // 1) STATUS_PENDING - A feature set is in pending state if Feast has not spun up the jobs + // necessary to push rows for this feature set to stores subscribing to this feature set. + // 2) STATUS_READY - Feature set is ready for consumption or ingestion + FeatureSetStatus status = 2; +} + +enum FeatureSetStatus { + STATUS_INVALID = 0; + STATUS_PENDING = 1; + STATUS_READY = 2; +} \ No newline at end of file diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index 3af2e12a91e..20df828a0e8 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -20,6 +20,7 @@ from typing import Dict, Union from typing import List import grpc +import time import pandas as pd import pyarrow as pa import pyarrow.parquet as pq @@ -33,6 +34,7 @@ GetFeatureSetResponse, ) from feast.core.CoreService_pb2_grpc import CoreServiceStub +from feast.core.FeatureSet_pb2 import FeatureSetStatus from feast.exceptions import format_grpc_exception from feast.feature_set import FeatureSet, Entity from feast.job import Job @@ -519,10 +521,25 @@ def ingest( ref_df, discard_unused_fields=True, replace_existing_features=True ) self.apply(feature_set) - - feature_set = self.get_feature_set(name, version) + current_time = time.time() + + print("Waiting for feature set to be ready for ingestion...") + while True: + if timeout is not None and time.time() - current_time >= timeout: + raise TimeoutError("Timed out waiting for feature set to be ready") + feature_set = self.get_feature_set(name, version) + if ( + feature_set is not None + and feature_set.status == FeatureSetStatus.STATUS_READY + ): + break + time.sleep(3) + + if timeout is not None: + timeout = timeout - int(time.time() - current_time) if feature_set.source.source_type == "Kafka": + print("Ingesting to kafka...") ingest_table_to_kafka( feature_set=feature_set, table=table, diff --git a/sdk/python/feast/core/CoreService_pb2.py b/sdk/python/feast/core/CoreService_pb2.py index 69a5498d879..3185bece28e 100644 --- a/sdk/python/feast/core/CoreService_pb2.py +++ b/sdk/python/feast/core/CoreService_pb2.py @@ -22,7 +22,7 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\020CoreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"5\n\x14GetFeatureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"H\n\x15GetFeatureSetResponse\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\"\x94\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1a?\n\x06\x46ilter\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"K\n\x17ListFeatureSetsResponse\x12\x30\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x1a.feast.core.FeatureSetSpec\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"I\n\x16\x41pplyFeatureSetRequest\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\"\xb7\x01\n\x17\x41pplyFeatureSetResponse\x12/\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\x32\xa0\x04\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"5\n\x14GetFeatureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"D\n\x15GetFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\x94\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1a?\n\x06\x46ilter\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"G\n\x17ListFeatureSetsResponse\x12,\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x16.feast.core.FeatureSet\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"E\n\x16\x41pplyFeatureSetRequest\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\xb3\x01\n\x17\x41pplyFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\x32\xa0\x04\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') , dependencies=[feast_dot_core_dot_FeatureSet__pb2.DESCRIPTOR,feast_dot_core_dot_Store__pb2.DESCRIPTOR,]) @@ -49,8 +49,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=821, - serialized_end=868, + serialized_start=805, + serialized_end=852, ) _sym_db.RegisterEnumDescriptor(_APPLYFEATURESETRESPONSE_STATUS) @@ -71,8 +71,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1118, - serialized_end=1154, + serialized_start=1102, + serialized_end=1138, ) _sym_db.RegisterEnumDescriptor(_UPDATESTORERESPONSE_STATUS) @@ -142,7 +142,7 @@ oneofs=[ ], serialized_start=152, - serialized_end=224, + serialized_end=220, ) @@ -179,8 +179,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=312, - serialized_end=375, + serialized_start=308, + serialized_end=371, ) _LISTFEATURESETSREQUEST = _descriptor.Descriptor( @@ -209,8 +209,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=227, - serialized_end=375, + serialized_start=223, + serialized_end=371, ) @@ -240,8 +240,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=377, - serialized_end=452, + serialized_start=373, + serialized_end=444, ) @@ -271,8 +271,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=529, - serialized_end=551, + serialized_start=521, + serialized_end=543, ) _LISTSTORESREQUEST = _descriptor.Descriptor( @@ -301,8 +301,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=454, - serialized_end=551, + serialized_start=446, + serialized_end=543, ) @@ -332,8 +332,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=553, - serialized_end=607, + serialized_start=545, + serialized_end=599, ) @@ -363,8 +363,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=609, - serialized_end=682, + serialized_start=601, + serialized_end=670, ) @@ -402,8 +402,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=685, - serialized_end=868, + serialized_start=673, + serialized_end=852, ) @@ -426,8 +426,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=870, - serialized_end=898, + serialized_start=854, + serialized_end=882, ) @@ -457,8 +457,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=900, - serialized_end=946, + serialized_start=884, + serialized_end=930, ) @@ -488,8 +488,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=948, - serialized_end=1002, + serialized_start=932, + serialized_end=986, ) @@ -527,19 +527,19 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1005, - serialized_end=1154, + serialized_start=989, + serialized_end=1138, ) -_GETFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC +_GETFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _LISTFEATURESETSREQUEST_FILTER.containing_type = _LISTFEATURESETSREQUEST _LISTFEATURESETSREQUEST.fields_by_name['filter'].message_type = _LISTFEATURESETSREQUEST_FILTER -_LISTFEATURESETSRESPONSE.fields_by_name['feature_sets'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC +_LISTFEATURESETSRESPONSE.fields_by_name['feature_sets'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _LISTSTORESREQUEST_FILTER.containing_type = _LISTSTORESREQUEST _LISTSTORESREQUEST.fields_by_name['filter'].message_type = _LISTSTORESREQUEST_FILTER _LISTSTORESRESPONSE.fields_by_name['store'].message_type = feast_dot_core_dot_Store__pb2._STORE -_APPLYFEATURESETREQUEST.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC -_APPLYFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESETSPEC +_APPLYFEATURESETREQUEST.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET +_APPLYFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET _APPLYFEATURESETRESPONSE.fields_by_name['status'].enum_type = _APPLYFEATURESETRESPONSE_STATUS _APPLYFEATURESETRESPONSE_STATUS.containing_type = _APPLYFEATURESETRESPONSE _UPDATESTOREREQUEST.fields_by_name['store'].message_type = feast_dot_core_dot_Store__pb2._STORE @@ -669,8 +669,8 @@ file=DESCRIPTOR, index=0, serialized_options=None, - serialized_start=1157, - serialized_end=1701, + serialized_start=1141, + serialized_end=1685, methods=[ _descriptor.MethodDescriptor( name='GetFeastCoreVersion', diff --git a/sdk/python/feast/core/CoreService_pb2.pyi b/sdk/python/feast/core/CoreService_pb2.pyi index 0bf897000bc..5cd6eaf6717 100644 --- a/sdk/python/feast/core/CoreService_pb2.pyi +++ b/sdk/python/feast/core/CoreService_pb2.pyi @@ -1,7 +1,7 @@ # @generated by generate_proto_mypy_stubs.py. Do not edit! import sys from feast.core.FeatureSet_pb2 import ( - FeatureSetSpec as feast___core___FeatureSet_pb2___FeatureSetSpec, + FeatureSet as feast___core___FeatureSet_pb2___FeatureSet, ) from feast.core.Store_pb2 import ( @@ -58,11 +58,11 @@ class GetFeatureSetResponse(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @property - def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSetSpec: ... + def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSet: ... def __init__(self, *, - feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSetSpec] = None, + feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSet] = None, ) -> None: ... @classmethod def FromString(cls, s: bytes) -> GetFeatureSetResponse: ... @@ -119,11 +119,11 @@ class ListFeatureSetsResponse(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @property - def feature_sets(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[feast___core___FeatureSet_pb2___FeatureSetSpec]: ... + def feature_sets(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[feast___core___FeatureSet_pb2___FeatureSet]: ... def __init__(self, *, - feature_sets : typing___Optional[typing___Iterable[feast___core___FeatureSet_pb2___FeatureSetSpec]] = None, + feature_sets : typing___Optional[typing___Iterable[feast___core___FeatureSet_pb2___FeatureSet]] = None, ) -> None: ... @classmethod def FromString(cls, s: bytes) -> ListFeatureSetsResponse: ... @@ -195,11 +195,11 @@ class ApplyFeatureSetRequest(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @property - def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSetSpec: ... + def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSet: ... def __init__(self, *, - feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSetSpec] = None, + feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSet] = None, ) -> None: ... @classmethod def FromString(cls, s: bytes) -> ApplyFeatureSetRequest: ... @@ -236,11 +236,11 @@ class ApplyFeatureSetResponse(google___protobuf___message___Message): status = ... # type: ApplyFeatureSetResponse.Status @property - def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSetSpec: ... + def feature_set(self) -> feast___core___FeatureSet_pb2___FeatureSet: ... def __init__(self, *, - feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSetSpec] = None, + feature_set : typing___Optional[feast___core___FeatureSet_pb2___FeatureSet] = None, status : typing___Optional[ApplyFeatureSetResponse.Status] = None, ) -> None: ... @classmethod diff --git a/sdk/python/feast/core/FeatureSet_pb2.py b/sdk/python/feast/core/FeatureSet_pb2.py index 8c331db16b1..0cb77f1a70b 100644 --- a/sdk/python/feast/core/FeatureSet_pb2.py +++ b/sdk/python/feast/core/FeatureSet_pb2.py @@ -4,6 +4,7 @@ import sys _b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +from google.protobuf.internal import enum_type_wrapper from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection @@ -16,6 +17,7 @@ from feast.types import Value_pb2 as feast_dot_types_dot_Value__pb2 from feast.core import Source_pb2 as feast_dot_core_dot_Source__pb2 from google.protobuf import duration_pb2 as google_dot_protobuf_dot_duration__pb2 +from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 DESCRIPTOR = _descriptor.FileDescriptor( @@ -23,11 +25,79 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\017FeatureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x1b\x66\x65\x61st/core/FeatureSet.proto\x12\nfeast.core\x1a\x17\x66\x65\x61st/types/Value.proto\x1a\x17\x66\x65\x61st/core/Source.proto\x1a\x1egoogle/protobuf/duration.proto\"\xd4\x01\n\x0e\x46\x65\x61tureSetSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12(\n\x08\x65ntities\x18\x03 \x03(\x0b\x32\x16.feast.core.EntitySpec\x12)\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x17.feast.core.FeatureSpec\x12*\n\x07max_age\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\"\n\x06source\x18\x06 \x01(\x0b\x32\x12.feast.core.Source\"K\n\nEntitySpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"L\n\x0b\x46\x65\x61tureSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.EnumBN\n\nfeast.coreB\x0f\x46\x65\x61tureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x1b\x66\x65\x61st/core/FeatureSet.proto\x12\nfeast.core\x1a\x17\x66\x65\x61st/types/Value.proto\x1a\x17\x66\x65\x61st/core/Source.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"`\n\nFeatureSet\x12(\n\x04spec\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\x12(\n\x04meta\x18\x02 \x01(\x0b\x32\x1a.feast.core.FeatureSetMeta\"\xd4\x01\n\x0e\x46\x65\x61tureSetSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12(\n\x08\x65ntities\x18\x03 \x03(\x0b\x32\x16.feast.core.EntitySpec\x12)\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x17.feast.core.FeatureSpec\x12*\n\x07max_age\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\"\n\x06source\x18\x06 \x01(\x0b\x32\x12.feast.core.Source\"K\n\nEntitySpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"L\n\x0b\x46\x65\x61tureSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"u\n\x0e\x46\x65\x61tureSetMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x06status\x18\x02 \x01(\x0e\x32\x1c.feast.core.FeatureSetStatus*L\n\x10\x46\x65\x61tureSetStatus\x12\x12\n\x0eSTATUS_INVALID\x10\x00\x12\x12\n\x0eSTATUS_PENDING\x10\x01\x12\x10\n\x0cSTATUS_READY\x10\x02\x42N\n\nfeast.coreB\x0f\x46\x65\x61tureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') , - dependencies=[feast_dot_types_dot_Value__pb2.DESCRIPTOR,feast_dot_core_dot_Source__pb2.DESCRIPTOR,google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,]) + dependencies=[feast_dot_types_dot_Value__pb2.DESCRIPTOR,feast_dot_core_dot_Source__pb2.DESCRIPTOR,google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,]) + +_FEATURESETSTATUS = _descriptor.EnumDescriptor( + name='FeatureSetStatus', + full_name='feast.core.FeatureSetStatus', + filename=None, + file=DESCRIPTOR, + values=[ + _descriptor.EnumValueDescriptor( + name='STATUS_INVALID', index=0, number=0, + serialized_options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='STATUS_PENDING', index=1, number=1, + serialized_options=None, + type=None), + _descriptor.EnumValueDescriptor( + name='STATUS_READY', index=2, number=2, + serialized_options=None, + type=None), + ], + containing_type=None, + serialized_options=None, + serialized_start=745, + serialized_end=821, +) +_sym_db.RegisterEnumDescriptor(_FEATURESETSTATUS) + +FeatureSetStatus = enum_type_wrapper.EnumTypeWrapper(_FEATURESETSTATUS) +STATUS_INVALID = 0 +STATUS_PENDING = 1 +STATUS_READY = 2 + +_FEATURESET = _descriptor.Descriptor( + name='FeatureSet', + full_name='feast.core.FeatureSet', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='spec', full_name='feast.core.FeatureSet.spec', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='meta', full_name='feast.core.FeatureSet.meta', index=1, + number=2, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=158, + serialized_end=254, +) _FEATURESETSPEC = _descriptor.Descriptor( @@ -91,8 +161,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=126, - serialized_end=338, + serialized_start=257, + serialized_end=469, ) @@ -129,8 +199,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=340, - serialized_end=415, + serialized_start=471, + serialized_end=546, ) @@ -167,21 +237,73 @@ extension_ranges=[], oneofs=[ ], - serialized_start=417, - serialized_end=493, + serialized_start=548, + serialized_end=624, ) + +_FEATURESETMETA = _descriptor.Descriptor( + name='FeatureSetMeta', + full_name='feast.core.FeatureSetMeta', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='created_timestamp', full_name='feast.core.FeatureSetMeta.created_timestamp', index=0, + number=1, type=11, cpp_type=10, label=1, + has_default_value=False, default_value=None, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='status', full_name='feast.core.FeatureSetMeta.status', index=1, + number=2, type=14, cpp_type=8, label=1, + has_default_value=False, default_value=0, + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=626, + serialized_end=743, +) + +_FEATURESET.fields_by_name['spec'].message_type = _FEATURESETSPEC +_FEATURESET.fields_by_name['meta'].message_type = _FEATURESETMETA _FEATURESETSPEC.fields_by_name['entities'].message_type = _ENTITYSPEC _FEATURESETSPEC.fields_by_name['features'].message_type = _FEATURESPEC _FEATURESETSPEC.fields_by_name['max_age'].message_type = google_dot_protobuf_dot_duration__pb2._DURATION _FEATURESETSPEC.fields_by_name['source'].message_type = feast_dot_core_dot_Source__pb2._SOURCE _ENTITYSPEC.fields_by_name['value_type'].enum_type = feast_dot_types_dot_Value__pb2._VALUETYPE_ENUM _FEATURESPEC.fields_by_name['value_type'].enum_type = feast_dot_types_dot_Value__pb2._VALUETYPE_ENUM +_FEATURESETMETA.fields_by_name['created_timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP +_FEATURESETMETA.fields_by_name['status'].enum_type = _FEATURESETSTATUS +DESCRIPTOR.message_types_by_name['FeatureSet'] = _FEATURESET DESCRIPTOR.message_types_by_name['FeatureSetSpec'] = _FEATURESETSPEC DESCRIPTOR.message_types_by_name['EntitySpec'] = _ENTITYSPEC DESCRIPTOR.message_types_by_name['FeatureSpec'] = _FEATURESPEC +DESCRIPTOR.message_types_by_name['FeatureSetMeta'] = _FEATURESETMETA +DESCRIPTOR.enum_types_by_name['FeatureSetStatus'] = _FEATURESETSTATUS _sym_db.RegisterFileDescriptor(DESCRIPTOR) +FeatureSet = _reflection.GeneratedProtocolMessageType('FeatureSet', (_message.Message,), { + 'DESCRIPTOR' : _FEATURESET, + '__module__' : 'feast.core.FeatureSet_pb2' + # @@protoc_insertion_point(class_scope:feast.core.FeatureSet) + }) +_sym_db.RegisterMessage(FeatureSet) + FeatureSetSpec = _reflection.GeneratedProtocolMessageType('FeatureSetSpec', (_message.Message,), { 'DESCRIPTOR' : _FEATURESETSPEC, '__module__' : 'feast.core.FeatureSet_pb2' @@ -203,6 +325,13 @@ }) _sym_db.RegisterMessage(FeatureSpec) +FeatureSetMeta = _reflection.GeneratedProtocolMessageType('FeatureSetMeta', (_message.Message,), { + 'DESCRIPTOR' : _FEATURESETMETA, + '__module__' : 'feast.core.FeatureSet_pb2' + # @@protoc_insertion_point(class_scope:feast.core.FeatureSetMeta) + }) +_sym_db.RegisterMessage(FeatureSetMeta) + DESCRIPTOR._options = None # @@protoc_insertion_point(module_scope) diff --git a/sdk/python/feast/core/FeatureSet_pb2.pyi b/sdk/python/feast/core/FeatureSet_pb2.pyi index 5d93721fe16..6fa03ed3592 100644 --- a/sdk/python/feast/core/FeatureSet_pb2.pyi +++ b/sdk/python/feast/core/FeatureSet_pb2.pyi @@ -10,6 +10,7 @@ from feast.types.Value_pb2 import ( from google.protobuf.descriptor import ( Descriptor as google___protobuf___descriptor___Descriptor, + EnumDescriptor as google___protobuf___descriptor___EnumDescriptor, ) from google.protobuf.duration_pb2 import ( @@ -24,10 +25,17 @@ from google.protobuf.message import ( Message as google___protobuf___message___Message, ) +from google.protobuf.timestamp_pb2 import ( + Timestamp as google___protobuf___timestamp_pb2___Timestamp, +) + from typing import ( Iterable as typing___Iterable, + List as typing___List, Optional as typing___Optional, Text as typing___Text, + Tuple as typing___Tuple, + cast as typing___cast, ) from typing_extensions import ( @@ -35,6 +43,50 @@ from typing_extensions import ( ) +class FeatureSetStatus(int): + DESCRIPTOR: google___protobuf___descriptor___EnumDescriptor = ... + @classmethod + def Name(cls, number: int) -> str: ... + @classmethod + def Value(cls, name: str) -> FeatureSetStatus: ... + @classmethod + def keys(cls) -> typing___List[str]: ... + @classmethod + def values(cls) -> typing___List[FeatureSetStatus]: ... + @classmethod + def items(cls) -> typing___List[typing___Tuple[str, FeatureSetStatus]]: ... + STATUS_INVALID = typing___cast(FeatureSetStatus, 0) + STATUS_PENDING = typing___cast(FeatureSetStatus, 1) + STATUS_READY = typing___cast(FeatureSetStatus, 2) +STATUS_INVALID = typing___cast(FeatureSetStatus, 0) +STATUS_PENDING = typing___cast(FeatureSetStatus, 1) +STATUS_READY = typing___cast(FeatureSetStatus, 2) + +class FeatureSet(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + + @property + def spec(self) -> FeatureSetSpec: ... + + @property + def meta(self) -> FeatureSetMeta: ... + + def __init__(self, + *, + spec : typing___Optional[FeatureSetSpec] = None, + meta : typing___Optional[FeatureSetMeta] = None, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> FeatureSet: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + if sys.version_info >= (3,): + def HasField(self, field_name: typing_extensions___Literal[u"meta",u"spec"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"meta",u"spec"]) -> None: ... + else: + def HasField(self, field_name: typing_extensions___Literal[u"meta",b"meta",u"spec",b"spec"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"meta",b"meta",u"spec",b"spec"]) -> None: ... + class FeatureSetSpec(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... name = ... # type: typing___Text @@ -109,3 +161,26 @@ class FeatureSpec(google___protobuf___message___Message): def ClearField(self, field_name: typing_extensions___Literal[u"name",u"value_type"]) -> None: ... else: def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name",u"value_type",b"value_type"]) -> None: ... + +class FeatureSetMeta(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + status = ... # type: FeatureSetStatus + + @property + def created_timestamp(self) -> google___protobuf___timestamp_pb2___Timestamp: ... + + def __init__(self, + *, + created_timestamp : typing___Optional[google___protobuf___timestamp_pb2___Timestamp] = None, + status : typing___Optional[FeatureSetStatus] = None, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> FeatureSetMeta: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + if sys.version_info >= (3,): + def HasField(self, field_name: typing_extensions___Literal[u"created_timestamp"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"created_timestamp",u"status"]) -> None: ... + else: + def HasField(self, field_name: typing_extensions___Literal[u"created_timestamp",b"created_timestamp"]) -> bool: ... + def ClearField(self, field_name: typing_extensions___Literal[u"created_timestamp",b"created_timestamp",u"status",b"status"]) -> None: ... diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index 893378e8fac..85d8e137530 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -22,7 +22,11 @@ from feast.entity import Entity from feast.feature import Feature, Field from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto +from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto +from feast.core.FeatureSet_pb2 import FeatureSetStatus from google.protobuf.duration_pb2 import Duration +from google.protobuf.timestamp_pb2 import Timestamp from feast.type_map import python_type_to_feast_value_type from google.protobuf.json_format import MessageToJson from google.protobuf import json_format @@ -41,7 +45,7 @@ def __init__( features: List[Feature] = None, entities: List[Entity] = None, source: Source = None, - max_age: Optional[Duration] = None, + max_age: Optional[Duration] = None ): self._name = name self._fields = OrderedDict() # type: Dict[str, Field] @@ -56,6 +60,8 @@ def __init__( self._max_age = max_age self._version = None self._client = None + self._status = None + self._created_timestamp = None def __eq__(self, other): if not isinstance(other, FeatureSet): @@ -195,6 +201,34 @@ def max_age(self, max_age): """ self._max_age = max_age + @property + def status(self): + """ + Returns the status of this feature set + """ + return self._status + + @status.setter + def status(self, status): + """ + Sets the status of this feature set + """ + self._status = status + + @property + def created_timestamp(self): + """ + Returns the created_timestamp of this feature set + """ + return self._created_timestamp + + @created_timestamp.setter + def created_timestamp(self, created_timestamp): + """ + Sets the status of this feature set + """ + self._created_timestamp = created_timestamp + def add(self, resource): """ Adds a resource (Feature, Entity) to this Feature Set. @@ -388,6 +422,8 @@ def update_from_feature_set(self, feature_set): self.features = feature_set.features self.entities = feature_set.entities self.source = feature_set.source + self.status = feature_set.status + self.created_timestamp = feature_set.created_timestamp def get_kafka_source_brokers(self) -> str: """ @@ -443,49 +479,56 @@ def from_dict(cls, fs_dict): if ("kind" not in fs_dict) and (fs_dict["kind"].strip() != "feature_set"): raise Exception(f"Resource kind is not a feature set {str(fs_dict)}") feature_set_proto = json_format.ParseDict( - fs_dict, FeatureSetSpecProto(), ignore_unknown_fields=True + fs_dict, FeatureSetProto(), ignore_unknown_fields=True ) return cls.from_proto(feature_set_proto) @classmethod - def from_proto(cls, feature_set_proto: FeatureSetSpecProto): + def from_proto(cls, feature_set_proto: FeatureSetProto): """ Creates a feature set from a protobuf representation of a feature set Args: - from_proto: A protobuf representation of a feature set + feature_set_proto: A protobuf representation of a feature set Returns: Returns a FeatureSet object based on the feature set protobuf """ feature_set = cls( - name=feature_set_proto.name, + name=feature_set_proto.spec.name, features=[ - Feature.from_proto(feature) for feature in feature_set_proto.features + Feature.from_proto(feature) + for feature in feature_set_proto.spec.features ], entities=[ - Entity.from_proto(entity) for entity in feature_set_proto.entities + Entity.from_proto(entity) for entity in feature_set_proto.spec.entities ], - max_age=feature_set_proto.max_age, + max_age=feature_set_proto.spec.max_age, source=( None - if feature_set_proto.source.type == 0 - else Source.from_proto(feature_set_proto.source) - ), + if feature_set_proto.spec.source.type == 0 + else Source.from_proto(feature_set_proto.spec.source) + ) ) - feature_set._version = feature_set_proto.version + feature_set._version = feature_set_proto.spec.version + feature_set._status = feature_set_proto.meta.status + feature_set._created_timestamp = feature_set_proto.meta.created_timestamp return feature_set - def to_proto(self) -> FeatureSetSpecProto: + def to_proto(self) -> FeatureSetProto: """ Converts a feature set object to its protobuf representation Returns: - FeatureSetSpec protobuf + FeatureSetProto protobuf """ - return FeatureSetSpecProto( + meta = FeatureSetMetaProto( + created_timestamp=self.created_timestamp, status=self.status + ) + + spec = FeatureSetSpecProto( name=self.name, version=self.version, max_age=self.max_age, @@ -502,6 +545,8 @@ def to_proto(self) -> FeatureSetSpecProto: ], ) + return FeatureSetProto(spec=spec, meta=meta) + def _infer_pd_column_type(column, series, rows_to_sample): dtype = None diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py index 23ba2ecb3b4..a59c7c66b1a 100644 --- a/sdk/python/feast/loaders/ingest.py +++ b/sdk/python/feast/loaders/ingest.py @@ -200,8 +200,8 @@ def ingest_table_to_kafka( ): # Push rows onto a queue for the production process to pick up row_queue.put(row) - while row_queue.qsize() > chunk_size: - time.sleep(0.1) + # while row_queue.qsize() > chunk_size: + # time.sleep(0.1) row_queue.put(None) except Exception as ex: _logger.error(f"Exception occurred: {ex}") diff --git a/sdk/python/tests/feast_core_server.py b/sdk/python/tests/feast_core_server.py index f547ba273bc..61688f65044 100644 --- a/sdk/python/tests/feast_core_server.py +++ b/sdk/python/tests/feast_core_server.py @@ -10,11 +10,17 @@ ListFeatureSetsResponse, ListFeatureSetsRequest, ) -from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpec +from google.protobuf.timestamp_pb2 import Timestamp +from feast.core.FeatureSet_pb2 import ( + FeatureSetSpec as FeatureSetSpec, + FeatureSetMeta, + FeatureSetStatus, +) from feast.core.Source_pb2 import ( SourceType as SourceTypeProto, KafkaSourceConfig as KafkaSourceConfigProto, ) +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto from typing import List _logger = logging.getLogger(__name__) @@ -49,31 +55,37 @@ def ListFeatureSets(self, request: ListFeatureSetsRequest, context): def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context): feature_set = request.feature_set - if feature_set.version is None: - feature_set.version = 1 + if feature_set.spec.version is None: + feature_set.spec.version = 1 else: - feature_set.version = feature_set.version + 1 + feature_set.spec.version = feature_set.spec.version + 1 - if feature_set.source.type == SourceTypeProto.INVALID: - feature_set.source.kafka_source_config.CopyFrom( + if feature_set.spec.source.type == SourceTypeProto.INVALID: + feature_set.spec.source.kafka_source_config.CopyFrom( KafkaSourceConfigProto(bootstrap_servers="server.com", topic="topic1") ) - feature_set.source.type = SourceTypeProto.KAFKA + feature_set.spec.source.type = SourceTypeProto.KAFKA - self._feature_sets[feature_set.name] = feature_set + feature_set_meta = FeatureSetMeta( + status=FeatureSetStatus.STATUS_READY, + created_timestamp=Timestamp(seconds=10), + ) + applied_feature_set = FeatureSetProto(spec=feature_set.spec, meta=feature_set_meta) + self._feature_sets[feature_set.spec.name] = applied_feature_set _logger.info( "registered feature set " - + feature_set.name + + feature_set.spec.name + " with " - + str(len(feature_set.entities)) + + str(len(feature_set.spec.entities)) + " entities and " - + str(len(feature_set.features)) + + str(len(feature_set.spec.features)) + " features" ) return ApplyFeatureSetResponse( - feature_set=feature_set, status=ApplyFeatureSetResponse.Status.CREATED + feature_set=applied_feature_set, + status=ApplyFeatureSetResponse.Status.CREATED, ) diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py index 2243ebfd1b3..8996b7543c3 100644 --- a/sdk/python/tests/test_client.py +++ b/sdk/python/tests/test_client.py @@ -26,7 +26,14 @@ from feast.entity import Entity from feast.feature_set import Feature from feast.source import KafkaSource -from feast.core.FeatureSet_pb2 import FeatureSetSpec, FeatureSpec, EntitySpec +from feast.core.FeatureSet_pb2 import ( + FeatureSetSpec, + FeatureSpec, + EntitySpec, + FeatureSetMeta, + FeatureSetStatus, +) +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto from feast.core.Source_pb2 import SourceType, KafkaSourceConfig, Source from feast.core.CoreService_pb2 import ( GetFeastCoreVersionResponse, @@ -178,29 +185,34 @@ def test_get_feature_set(self, mock_client, mocker): mock_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( - feature_set=FeatureSetSpec( - name="my_feature_set", - version=2, - max_age=Duration(seconds=3600), - features=[ - FeatureSpec( - name="my_feature_1", value_type=ValueProto.ValueType.FLOAT - ), - FeatureSpec( - name="my_feature_2", value_type=ValueProto.ValueType.FLOAT - ), - ], - entities=[ - EntitySpec( - name="my_entity_1", value_type=ValueProto.ValueType.INT64 - ) - ], - source=Source( - type=SourceType.KAFKA, - kafka_source_config=KafkaSourceConfig( - bootstrap_servers="localhost:9092", topic="topic" + feature_set=FeatureSetProto( + spec=FeatureSetSpec( + name="my_feature_set", + version=2, + max_age=Duration(seconds=3600), + features=[ + FeatureSpec( + name="my_feature_1", + value_type=ValueProto.ValueType.FLOAT, + ), + FeatureSpec( + name="my_feature_2", + value_type=ValueProto.ValueType.FLOAT, + ), + ], + entities=[ + EntitySpec( + name="my_entity_1", + value_type=ValueProto.ValueType.INT64, + ) + ], + source=Source( + type=SourceType.KAFKA, + kafka_source_config=KafkaSourceConfig( + bootstrap_servers="localhost:9092", topic="topic" + ), ), - ), + ) ) ), ) @@ -229,27 +241,31 @@ def test_get_batch_features(self, mock_client, mocker): mock_client._core_service_stub, "GetFeatureSet", return_value=GetFeatureSetResponse( - feature_set=FeatureSetSpec( - name="customer_fs", - version=1, - entities=[ - EntitySpec( - name="customer", value_type=ValueProto.ValueType.INT64 - ), - EntitySpec( - name="transaction", value_type=ValueProto.ValueType.INT64 - ), - ], - features=[ - FeatureSpec( - name="customer_feature_1", - value_type=ValueProto.ValueType.FLOAT, - ), - FeatureSpec( - name="customer_feature_2", - value_type=ValueProto.ValueType.STRING, - ), - ], + feature_set=FeatureSetProto( + spec=FeatureSetSpec( + name="customer_fs", + version=1, + entities=[ + EntitySpec( + name="customer", value_type=ValueProto.ValueType.INT64 + ), + EntitySpec( + name="transaction", + value_type=ValueProto.ValueType.INT64, + ), + ], + features=[ + FeatureSpec( + name="customer_feature_1", + value_type=ValueProto.ValueType.FLOAT, + ), + FeatureSpec( + name="customer_feature_2", + value_type=ValueProto.ValueType.STRING, + ), + ], + ), + meta=FeatureSetMeta(status=FeatureSetStatus.STATUS_READY), ) ), ) @@ -373,11 +389,13 @@ def test_feature_set_ingest_success(self, dataframe, client, mocker): # Register with Feast core client.apply(driver_fs) + driver_fs = driver_fs.to_proto() + driver_fs.meta.status = FeatureSetStatus.STATUS_READY mocker.patch.object( client._core_service_stub, "GetFeatureSet", - return_value=GetFeatureSetResponse(feature_set=driver_fs.to_proto()), + return_value=GetFeatureSetResponse(feature_set=driver_fs), ) # Need to create a mock producer @@ -385,6 +403,36 @@ def test_feature_set_ingest_success(self, dataframe, client, mocker): # Ingest data into Feast client.ingest("driver-feature-set", dataframe) + @pytest.mark.parametrize("dataframe,exception", [(dataframes.GOOD, TimeoutError)]) + def test_feature_set_ingest_fail_if_pending( + self, dataframe, exception, client, mocker + ): + with pytest.raises(exception): + driver_fs = FeatureSet( + "driver-feature-set", + source=KafkaSource(brokers="kafka:9092", topic="test"), + ) + driver_fs.add(Feature(name="feature_1", dtype=ValueType.FLOAT)) + driver_fs.add(Feature(name="feature_2", dtype=ValueType.STRING)) + driver_fs.add(Feature(name="feature_3", dtype=ValueType.INT64)) + driver_fs.add(Entity(name="entity_id", dtype=ValueType.INT64)) + + # Register with Feast core + client.apply(driver_fs) + driver_fs = driver_fs.to_proto() + driver_fs.meta.status = FeatureSetStatus.STATUS_PENDING + + mocker.patch.object( + client._core_service_stub, + "GetFeatureSet", + return_value=GetFeatureSetResponse(feature_set=driver_fs), + ) + + # Need to create a mock producer + with patch("feast.loaders.ingest.KafkaProducer") as mocked_queue: + # Ingest data into Feast + client.ingest("driver-feature-set", dataframe, timeout=1) + @pytest.mark.parametrize( "dataframe,exception", [ diff --git a/sdk/python/tests/test_stores.py b/sdk/python/tests/test_stores.py index 92445ecb7a7..330f272dacd 100644 --- a/sdk/python/tests/test_stores.py +++ b/sdk/python/tests/test_stores.py @@ -38,9 +38,9 @@ def test_register_feature_set(self, sqlite_store): fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) fs.add(Entity(name="my-entity-1", dtype=ValueType.INT64)) fs._version = 1 - feature_set_proto = fs.to_proto() + feature_set_spec_proto = fs.to_proto().spec - sqlite_store.register_feature_set(feature_set_proto) + sqlite_store.register_feature_set(feature_set_spec_proto) feature_row = FeatureRowProto.FeatureRow( feature_set="feature_set_1", event_timestamp=Timestamp(), diff --git a/serving/src/main/java/feast/serving/service/CachedSpecService.java b/serving/src/main/java/feast/serving/service/CachedSpecService.java index 64a2c6e084f..edf2da37a01 100644 --- a/serving/src/main/java/feast/serving/service/CachedSpecService.java +++ b/serving/src/main/java/feast/serving/service/CachedSpecService.java @@ -27,6 +27,7 @@ import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.Subscription; @@ -115,7 +116,7 @@ public FeatureSetSpec getFeatureSet(String name, int version) { "Unable to retrieve featureSet with id %s from core, featureSet does not exist", id)); } - return featureSets.getFeatureSets(0); + return featureSets.getFeatureSets(0).getSpec(); } catch (ExecutionException e) { throw new SpecRetrievalException( String.format("Unable to retrieve featureSet with id %s", id), e); @@ -157,7 +158,8 @@ private Map getFeatureSetSpecMap() { .setFeatureSetVersion(subscription.getVersion())) .build()); - for (FeatureSetSpec featureSetSpec : featureSetsResponse.getFeatureSetsList()) { + for (FeatureSet featureSet : featureSetsResponse.getFeatureSetsList()) { + FeatureSetSpec featureSetSpec = featureSet.getSpec(); featureSetSpecs.put( String.format("%s:%s", featureSetSpec.getName(), featureSetSpec.getVersion()), featureSetSpec); diff --git a/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java b/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java index 2d51547d0e7..e16f5060c2a 100644 --- a/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java +++ b/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java @@ -175,7 +175,6 @@ Job runBatchQuery(List featureSetQueries) ExecutorCompletionService executorCompletionService = new ExecutorCompletionService<>(executorService); - List featureSetInfos = new ArrayList<>(); for (int i = 0; i < featureSetQueries.size(); i++) { @@ -191,7 +190,8 @@ Job runBatchQuery(List featureSetQueries) for (int i = 0; i < featureSetQueries.size(); i++) { try { - FeatureSetInfo featureSetInfo = executorCompletionService.take().get(SUBQUERY_TIMEOUT_SECS, TimeUnit.SECONDS); + FeatureSetInfo featureSetInfo = + executorCompletionService.take().get(SUBQUERY_TIMEOUT_SECS, TimeUnit.SECONDS); featureSetInfos.add(featureSetInfo); } catch (InterruptedException | ExecutionException | TimeoutException e) { jobService() diff --git a/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java b/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java index 5b295e9ee79..5bd2038f2be 100644 --- a/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java +++ b/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java @@ -26,6 +26,7 @@ import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.RedisConfig; @@ -94,9 +95,13 @@ public void setUp() throws IOException { featureSetSpecs.put("fs1:2", FeatureSetSpec.newBuilder().setName("fs1").setVersion(2).build()); featureSetSpecs.put("fs2:1", FeatureSetSpec.newBuilder().setName("fs2").setVersion(1).build()); - List fs1FeatureSets = - Lists.newArrayList(featureSetSpecs.get("fs1:1"), featureSetSpecs.get("fs1:2")); - List fs2FeatureSets = Lists.newArrayList(featureSetSpecs.get("fs2:1")); + List fs1FeatureSets = + Lists.newArrayList( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpecs.get("fs1:1")).build(), + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpecs.get("fs1:2")).build()); + List fs2FeatureSets = + Lists.newArrayList( + FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpecs.get("fs2:1")).build()); when(coreService.listFeatureSets( ListFeatureSetsRequest.newBuilder() .setFilter( diff --git a/tests/e2e/all_types_parquet/all_types_parquet.yaml b/tests/e2e/all_types_parquet/all_types_parquet.yaml index 85dd8c2c226..cf5ea702357 100644 --- a/tests/e2e/all_types_parquet/all_types_parquet.yaml +++ b/tests/e2e/all_types_parquet/all_types_parquet.yaml @@ -1,31 +1,32 @@ -name: all_types_parquet kind: feature_set -entities: - - name: customer_id - valueType: INT64 -features: - - name: int32_feature - valueType: INT64 - - name: int64_feature - valueType: INT64 - - name: float_feature - valueType: DOUBLE - - name: double_feature - valueType: DOUBLE - - name: string_feature - valueType: STRING - - name: bytes_feature - valueType: BYTES - - name: int32_list_feature - valueType: INT64_LIST - - name: int64_list_feature - valueType: INT64_LIST - - name: float_list_feature - valueType: DOUBLE_LIST - - name: double_list_feature - valueType: DOUBLE_LIST - - name: string_list_feature - valueType: STRING_LIST - - name: bytes_list_feature - valueType: BYTES_LIST -maxAge: 0s +spec: + name: all_types_parquet + entities: + - name: customer_id + valueType: INT64 + features: + - name: int32_feature + valueType: INT64 + - name: int64_feature + valueType: INT64 + - name: float_feature + valueType: DOUBLE + - name: double_feature + valueType: DOUBLE + - name: string_feature + valueType: STRING + - name: bytes_feature + valueType: BYTES + - name: int32_list_feature + valueType: INT64_LIST + - name: int64_list_feature + valueType: INT64_LIST + - name: float_list_feature + valueType: DOUBLE_LIST + - name: double_list_feature + valueType: DOUBLE_LIST + - name: string_list_feature + valueType: STRING_LIST + - name: bytes_list_feature + valueType: BYTES_LIST + maxAge: 0s diff --git a/tests/e2e/basic-ingest-redis-serving.py b/tests/e2e/basic-ingest-redis-serving.py index 902b0985c14..f674363f36b 100644 --- a/tests/e2e/basic-ingest-redis-serving.py +++ b/tests/e2e/basic-ingest-redis-serving.py @@ -70,7 +70,7 @@ def basic_dataframe(): ) -@pytest.mark.timeout(300) +@pytest.mark.timeout(45) @pytest.mark.run(order=10) def test_basic_register_feature_set_success(client): # Load feature set from file @@ -96,7 +96,7 @@ def test_basic_register_feature_set_success(client): ) -@pytest.mark.timeout(45) +@pytest.mark.timeout(300) @pytest.mark.run(order=11) def test_basic_ingest_success(client, basic_dataframe): cust_trans_fs = client.get_feature_set(name="customer_transactions") @@ -202,7 +202,7 @@ def all_types_dataframe(): ) -@pytest.mark.timeout(300) +@pytest.mark.timeout(45) @pytest.mark.run(order=20) def test_all_types_register_feature_set_success(client): all_types_fs_expected = FeatureSet( @@ -246,7 +246,7 @@ def test_all_types_register_feature_set_success(client): ) -@pytest.mark.timeout(45) +@pytest.mark.timeout(300) @pytest.mark.run(order=21) def test_all_types_ingest_success(client, all_types_dataframe): # Get all_types feature set @@ -322,7 +322,7 @@ def large_volume_dataframe(): return customer_data -@pytest.mark.timeout(300) +@pytest.mark.timeout(45) @pytest.mark.run(order=30) def test_large_volume_register_feature_set_success(client): cust_trans_fs_expected = FeatureSet.from_yaml( diff --git a/tests/e2e/basic/cust_trans_fs.yaml b/tests/e2e/basic/cust_trans_fs.yaml index e72ee616eb8..14d46794a6d 100644 --- a/tests/e2e/basic/cust_trans_fs.yaml +++ b/tests/e2e/basic/cust_trans_fs.yaml @@ -1,11 +1,12 @@ -name: customer_transactions kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -maxAge: 3600s +spec: + name: customer_transactions + entities: + - name: customer_id + valueType: INT64 + features: + - name: daily_transactions + valueType: FLOAT + - name: total_transactions + valueType: FLOAT + maxAge: 3600s diff --git a/tests/e2e/bq-batch-retrieval.py b/tests/e2e/bq-batch-retrieval.py index 067dd14a2fb..639ca9f5595 100644 --- a/tests/e2e/bq-batch-retrieval.py +++ b/tests/e2e/bq-batch-retrieval.py @@ -174,11 +174,9 @@ def test_multiple_featureset_joins(client): ) client.apply(fs1) - time.sleep(10) fs1 = client.get_feature_set(name="feature_set_1", version=1) client.apply(fs2) - time.sleep(10) fs2 = client.get_feature_set(name="feature_set_2", version=1) N_ROWS = 10 diff --git a/tests/e2e/large_volume/cust_trans_large_fs.yaml b/tests/e2e/large_volume/cust_trans_large_fs.yaml index 04707412aa6..54bf4cac28e 100644 --- a/tests/e2e/large_volume/cust_trans_large_fs.yaml +++ b/tests/e2e/large_volume/cust_trans_large_fs.yaml @@ -1,11 +1,12 @@ -name: customer_transactions_large kind: feature_set -entities: -- name: customer_id - valueType: INT64 -features: -- name: daily_transactions - valueType: FLOAT -- name: total_transactions - valueType: FLOAT -maxAge: 3600s +spec: + name: customer_transactions_large + entities: + - name: customer_id + valueType: INT64 + features: + - name: daily_transactions + valueType: FLOAT + - name: total_transactions + valueType: FLOAT + maxAge: 3600s From 03b3135d34090823efebaa28be5479ac6351140a Mon Sep 17 00:00:00 2001 From: Chen Zhiling Date: Tue, 17 Dec 2019 22:30:22 +0800 Subject: [PATCH 02/18] Catch errors thrown by BQ during entity table loading (#371) * Catch errors thrown by BQ during entity table loading * Apply spotless * Add logging at grpc service level --- .../ServingServiceGRpcController.java | 3 +++ .../serving/service/BigQueryServingService.java | 17 +++++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java b/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java index 01601505583..0eb9d1e3450 100644 --- a/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java +++ b/serving/src/main/java/feast/serving/controller/ServingServiceGRpcController.java @@ -74,6 +74,7 @@ public void getOnlineFeatures( responseObserver.onNext(onlineFeatures); responseObserver.onCompleted(); } catch (Exception e) { + log.warn("Failed to get Online Features", e); responseObserver.onError(e); } span.finish(); @@ -88,6 +89,7 @@ public void getBatchFeatures( responseObserver.onNext(batchFeatures); responseObserver.onCompleted(); } catch (Exception e) { + log.warn("Failed to get Batch Features", e); responseObserver.onError(e); } } @@ -99,6 +101,7 @@ public void getJob(GetJobRequest request, StreamObserver respons responseObserver.onNext(response); responseObserver.onCompleted(); } catch (Exception e) { + log.warn("Failed to get Job", e); responseObserver.onError(e); } } diff --git a/serving/src/main/java/feast/serving/service/BigQueryServingService.java b/serving/src/main/java/feast/serving/service/BigQueryServingService.java index 701e146ee5d..d1658bde54d 100644 --- a/serving/src/main/java/feast/serving/service/BigQueryServingService.java +++ b/serving/src/main/java/feast/serving/service/BigQueryServingService.java @@ -123,7 +123,19 @@ public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeat .asRuntimeException(); } - Table entityTable = loadEntities(getFeaturesRequest.getDatasetSource()); + Table entityTable; + String entityTableName; + try { + entityTable = loadEntities(getFeaturesRequest.getDatasetSource()); + + TableId entityTableWithUUIDs = generateUUIDs(entityTable); + entityTableName = generateFullTableName(entityTableWithUUIDs); + } catch (Exception e) { + throw Status.INTERNAL + .withDescription("Unable to load entity dataset to Bigquery") + .asRuntimeException(); + } + Schema entityTableSchema = entityTable.getDefinition().getSchema(); List entityNames = entityTableSchema.getFields().stream() @@ -131,9 +143,6 @@ public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeat .filter(name -> !name.equals("event_timestamp")) .collect(Collectors.toList()); - TableId entityTableWithUUIDs = generateUUIDs(entityTable); - String entityTableName = generateFullTableName(entityTableWithUUIDs); - List featureSetInfos = QueryTemplater.getFeatureSetInfos(featureSetSpecs, getFeaturesRequest.getFeatureSetsList()); From 7571ba7241c6451f4dd2718b41e4102c058fe1a1 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Wed, 18 Dec 2019 14:38:20 +0800 Subject: [PATCH 03/18] Update helm dependency before building (#373) To ensure requirements.lock is in sync with requirements.yaml https://github.com/helm/helm/issues/2033 --- .prow/scripts/sync-helm-charts.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.prow/scripts/sync-helm-charts.sh b/.prow/scripts/sync-helm-charts.sh index 88fc04f7d14..8c242aeae69 100755 --- a/.prow/scripts/sync-helm-charts.sh +++ b/.prow/scripts/sync-helm-charts.sh @@ -31,7 +31,7 @@ fi exit_code=0 for dir in "$repo_dir"/*; do - if helm dependency build "$dir"; then + if helm dep update "$dir" && helm dep build "$dir"; then helm package --destination "$sync_dir" "$dir" else log_error "Problem building dependencies. Skipping packaging of '$dir'." From b36dff62799d84bc0a135c8b9e1428f4388c841d Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Sun, 22 Dec 2019 10:43:29 +0800 Subject: [PATCH 04/18] Add documentation to default values.yaml in Feast chart (#376) --- .../feast/charts/feast-serving/values.yaml | 2 +- infra/charts/feast/values.yaml | 103 ++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/infra/charts/feast/charts/feast-serving/values.yaml b/infra/charts/feast/charts/feast-serving/values.yaml index b312a40692c..857c3fcfd50 100644 --- a/infra/charts/feast/charts/feast-serving/values.yaml +++ b/infra/charts/feast/charts/feast-serving/values.yaml @@ -107,7 +107,7 @@ application.yaml: # store.yaml will be mounted in the container. springConfigMountPath: /etc/feast/feast-serving -# gcpServiceAccount is the service account that Feast Core will use. +# gcpServiceAccount is the service account that Feast Serving will use. gcpServiceAccount: # useExistingSecret specifies Feast to use an existing secret containing Google # Cloud service account JSON key file. diff --git a/infra/charts/feast/values.yaml b/infra/charts/feast/values.yaml index 8a0e2ff0666..fd75a3fce6f 100644 --- a/infra/charts/feast/values.yaml +++ b/infra/charts/feast/values.yaml @@ -39,61 +39,164 @@ # --from-file=/home/user/key.json # +# ============================================================ +# Feast Core +# ============================================================ + feast-core: + # enabled specifies whether to install Feast Core component. + # + # Normally, this is set to "false" when Feast users need access to low latency + # Feast Serving, by deploying multiple instances of Feast Serving closest + # to the client. These instances of Feast Serving however can still use + # the same shared Feast Core. enabled: true + # jvmOptions are options that will be passed to the Java Virtual Machine (JVM) + # running Feast Core. + # + # For example, it is good practice to set min and max heap size in JVM. + # https://stackoverflow.com/questions/6902135/side-effect-for-increasing-maxpermsize-and-max-heap-size jvmOptions: - -Xms1024m - -Xmx1024m + # resources that should be allocated to Feast Core. resources: requests: cpu: 1000m memory: 1024Mi + limits: + memory: 2048Mi + # gcpServiceAccount is the Google service account that Feast Core will use. gcpServiceAccount: + # useExistingSecret specifies Feast to use an existing secret containing + # Google Cloud service account JSON key file. + # + # This is the only supported option for now to use a service account JSON. + # Feast admin is expected to create this secret before deploying Feast. useExistingSecret: true + existingSecret: + # name is the secret name of the existing secret for the service account. + name: feast-gcp-service-account + # key is the secret key of the existing secret for the service account. + # key is normally derived from the file name of the JSON key file. + key: key.json + +# ============================================================ +# Feast Serving Online +# ============================================================ feast-serving-online: + # enabled specifies whether to install Feast Serving Online component. enabled: true + # redis.enabled specifies whether Redis should be installed as part of Feast Serving. + # + # If enabled is set to "false", Feast admin has to ensure there is an + # existing Redis running outside Feast, that Feast Serving can connect to. redis: enabled: true + # jvmOptions are options that will be passed to the Feast Serving JVM. jvmOptions: - -Xms1024m - -Xmx1024m + # resources that should be allocated to Feast Serving. resources: requests: cpu: 500m memory: 1024Mi + limits: + memory: 2048Mi + # store.yaml is the configuration for Feast Store. + # + # Refer to this link for more description: + # https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/protos/feast/core/Store.proto store.yaml: name: redis type: REDIS redis_config: + # If redis.enabled is set to false, Feast admin should uncomment and + # set the host value to an "existing" Redis instance Feast will use as + # online Store. + # + # Else, if redis.enabled is set to true, no additional configuration is + # required. + # + # host: redis-host port: 6379 subscriptions: - name: "*" version: ">0" +# ============================================================ +# Feast Serving Batch +# ============================================================ + feast-serving-batch: + # enabled specifies whether to install Feast Serving Batch component. enabled: true + # redis.enabled specifies whether Redis should be installed as part of Feast Serving. + # + # This is usually set to "false" for Feast Serving Batch because the default + # store is BigQuery. redis: enabled: false + # jvmOptions are options that will be passed to the Feast Serving JVM. jvmOptions: - -Xms1024m - -Xmx1024m + # resources that should be allocated to Feast Serving. resources: requests: cpu: 500m memory: 1024Mi + limits: + memory: 2048Mi + # gcpServiceAccount is the service account that Feast Serving will use. gcpServiceAccount: + # useExistingSecret specifies Feast to use an existing secret containing + # Google Cloud service account JSON key file. + # + # This is the only supported option for now to use a service account JSON. + # Feast admin is expected to create this secret before deploying Feast. useExistingSecret: true + existingSecret: + # name is the secret name of the existing secret for the service account. + name: feast-gcp-service-account + # key is the secret key of the existing secret for the service account. + # key is normally derived from the file name of the JSON key file. + key: key.json + # application.yaml is the main configuration for Feast Serving application. + # + # Feast Core is a Spring Boot app which uses this yaml configuration file. + # Refer to https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/serving/src/main/resources/application.yml + # for a complete list and description of the configuration. application.yaml: feast: jobs: + # staging-location specifies the URI to store intermediate files for + # batch serving (required if using BigQuery as Store). + # + # Please set the value to an "existing" Google Cloud Storage URI that + # Feast serving has write access to. staging-location: gs://bucket/path + # Type of store to store job metadata. + # + # This default configuration assumes that Feast Serving Online is + # enabled as well. So Feast Serving Batch will share the same + # Redis instance to store job statuses. store-type: REDIS + # store.yaml is the configuration for Feast Store. + # + # Refer to this link for more description: + # https://github.com/gojek/feast/blob/79eb4ab5fa3d37102c1dca9968162a98690526ba/protos/feast/core/Store.proto store.yaml: name: bigquery type: BIGQUERY bigquery_config: + # project_id specifies the Google Cloud Project. Please set this to the + # project id you are using BigQuery in. project_id: PROJECT_ID + # dataset_id specifies an "existing" BigQuery dataset Feast Serving Batch + # will use. Please ensure this dataset is created beforehand. dataset_id: DATASET_ID subscriptions: - name: "*" From ccdaaf18ce7e677c0351c6fa43959fd395efad98 Mon Sep 17 00:00:00 2001 From: Willem Pienaar <6728866+woop@users.noreply.github.com> Date: Sun, 22 Dec 2019 10:59:19 +0800 Subject: [PATCH 05/18] Update CODEOWNERS to include khorshuheng --- .github/CODEOWNERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index c319bc99ffc..a30c747b135 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,8 +1,8 @@ # Each line is a file pattern followed by one or more owners. # https://help.github.com/en/articles/about-code-owners -* @zhilingc @pradithya @woop @tims @thirteen37 @davidheryanto -/core/ @zhilingc @pradithya @tims -/ingestion/ @zhilingc @pradithya @tims -/serving/ @zhilingc @pradithya @tims +* @zhilingc @pradithya @woop @davidheryanto @khorshuheng +/core/ @zhilingc @pradithya +/ingestion/ @zhilingc @pradithya +/serving/ @zhilingc @pradithya /cli/ @zhilingc @pradithya From b20c1cde8dd1b342f7d6ad3af731eff225ffc1e4 Mon Sep 17 00:00:00 2001 From: Willem Pienaar <6728866+woop@users.noreply.github.com> Date: Sun, 24 Nov 2019 12:42:40 +0530 Subject: [PATCH 06/18] Create CHANGELOG.md --- CHANGELOG.md | 333 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000000..c9fe47a7975 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,333 @@ +# Changelog + +## [v0.3.0](https://github.com/gojek/feast/tree/v0.3.0) (2019-11-19) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.1.8...v0.3.0) + + +**Merged pull requests:** + +- Regenerate go protos [\#313](https://github.com/gojek/feast/pull/313) ([zhilingc](https://github.com/zhilingc)) +- Bump chart version to 0.3.0 [\#311](https://github.com/gojek/feast/pull/311) ([zhilingc](https://github.com/zhilingc)) +- Refactored Core API: ListFeatureSets, ListStore, and GetFeatureSet [\#309](https://github.com/gojek/feast/pull/309) ([woop](https://github.com/woop)) +- Use Maven's --also-make by default [\#308](https://github.com/gojek/feast/pull/308) ([ches](https://github.com/ches)) +- Python SDK Ingestion and schema inference updates [\#307](https://github.com/gojek/feast/pull/307) ([woop](https://github.com/woop)) +- Batch ingestion fix [\#299](https://github.com/gojek/feast/pull/299) ([zhilingc](https://github.com/zhilingc)) +- Update values-demo.yaml to make Minikube installation simpler [\#298](https://github.com/gojek/feast/pull/298) ([woop](https://github.com/woop)) +- Fix bug in core not setting default Kafka source [\#297](https://github.com/gojek/feast/pull/297) ([woop](https://github.com/woop)) +- Replace Prometheus logging in ingestion with StatsD logging [\#293](https://github.com/gojek/feast/pull/293) ([woop](https://github.com/woop)) +- Feast Core: Stage files manually when launching Dataflow jobs [\#291](https://github.com/gojek/feast/pull/291) ([davidheryanto](https://github.com/davidheryanto)) +- Database tweaks [\#290](https://github.com/gojek/feast/pull/290) ([smadarasmi](https://github.com/smadarasmi)) +- Feast Helm charts and build script [\#289](https://github.com/gojek/feast/pull/289) ([davidheryanto](https://github.com/davidheryanto)) +- Fix max\_age changes not updating specs and add TQDM silencing flag [\#292](https://github.com/gojek/feast/pull/292) ([woop](https://github.com/woop)) +- Zl/ingestion fixes [\#286](https://github.com/gojek/feast/pull/286) ([zhilingc](https://github.com/zhilingc)) +- Consolidate jobs [\#279](https://github.com/gojek/feast/pull/279) ([zhilingc](https://github.com/zhilingc)) +- Import Spring Boot's dependency BOM, fix spring-boot:run at parent project level [\#276](https://github.com/gojek/feast/pull/276) ([ches](https://github.com/ches)) +- Feast 0.3 Continuous Integration \(CI\) Update [\#271](https://github.com/gojek/feast/pull/271) ([davidheryanto](https://github.com/davidheryanto)) +- Add batch feature retrieval to Python SDK [\#268](https://github.com/gojek/feast/pull/268) ([woop](https://github.com/woop)) +- Set Maven build requirements and some project POM metadata [\#267](https://github.com/gojek/feast/pull/267) ([ches](https://github.com/ches)) +- Python SDK enhancements [\#264](https://github.com/gojek/feast/pull/264) ([woop](https://github.com/woop)) +- Use a symlink for Java SDK's protos [\#263](https://github.com/gojek/feast/pull/263) ([ches](https://github.com/ches)) +- Clean up the Maven build [\#262](https://github.com/gojek/feast/pull/262) ([ches](https://github.com/ches)) +- Add golang SDK [\#261](https://github.com/gojek/feast/pull/261) ([zhilingc](https://github.com/zhilingc)) +- Move storage configuration to serving [\#254](https://github.com/gojek/feast/pull/254) ([zhilingc](https://github.com/zhilingc)) +- 0.3 dev serving api change [\#253](https://github.com/gojek/feast/pull/253) ([zhilingc](https://github.com/zhilingc)) + +## [v0.1.8](https://github.com/gojek/feast/tree/v0.1.8) (2019-10-30) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.1.2...v0.1.8) + +**Implemented enhancements:** + +- Feast cli config file should be settable by an env var [\#149](https://github.com/gojek/feast/issues/149) +- Helm chart for deploying feast using Flink as runner [\#64](https://github.com/gojek/feast/issues/64) +- Get ingestion metrics when running on Flink runner [\#63](https://github.com/gojek/feast/issues/63) +- Move source types into their own package and discover them using java.util.ServiceLoader [\#61](https://github.com/gojek/feast/issues/61) +- Change config to yaml [\#51](https://github.com/gojek/feast/issues/51) +- Ability to pass runner option during ingestion job submission [\#50](https://github.com/gojek/feast/issues/50) + +**Fixed bugs:** + +- Fix Print Method in Feast CLI [\#211](https://github.com/gojek/feast/issues/211) +- Dataflow monitoring by core is failing with incorrect job id [\#153](https://github.com/gojek/feast/issues/153) +- Feast core crashes without logger set [\#150](https://github.com/gojek/feast/issues/150) + +**Closed issues:** + +- Update Prow for Feast 0.3 to run tests for PRs on GitHub [\#275](https://github.com/gojek/feast/issues/275) +- Remove JavaFX \(Oracle\) dependency and move to OpenJDK [\#266](https://github.com/gojek/feast/issues/266) +- Update Go Client to Feast 0.3 for Online Serving [\#260](https://github.com/gojek/feast/issues/260) +- Update Java Client to Feast 0.3 API for Online Serving [\#257](https://github.com/gojek/feast/issues/257) +- Create end-to-end test for Feast 0.3 [\#256](https://github.com/gojek/feast/issues/256) +- Move storage configuration to serving deployment [\#255](https://github.com/gojek/feast/issues/255) +- StorageInfoRepository [\#233](https://github.com/gojek/feast/issues/233) +- Error when running feast apply entity integration-tests/testdata/entity\_specs/entity\_1.yaml [\#226](https://github.com/gojek/feast/issues/226) +- Does Feast help in feature engineering as a whole, i.e, calculating/ extracting the features or is this going to be developed in the near future? [\#220](https://github.com/gojek/feast/issues/220) +- Redis Error saying JedisConnectionException:connection timed out [\#219](https://github.com/gojek/feast/issues/219) +- Entity & feature deletion [\#202](https://github.com/gojek/feast/issues/202) +- REDIS1 error [\#193](https://github.com/gojek/feast/issues/193) +- Not able to push to feature store [\#192](https://github.com/gojek/feast/issues/192) +- Add nodeSelector to helm charts [\#186](https://github.com/gojek/feast/issues/186) +- Multiple feature specs in a single yaml file [\#181](https://github.com/gojek/feast/issues/181) +- Update continuous integration/deployment \(CI/CD\) process [\#180](https://github.com/gojek/feast/issues/180) +- Integration test for streaming data [\#179](https://github.com/gojek/feast/issues/179) +- Quickstart.ipynb shows Description field cannot be empty [\#178](https://github.com/gojek/feast/issues/178) +- Default service type for Helm chart should not be LoadBalancer [\#161](https://github.com/gojek/feast/issues/161) +- Support for ingesting flat json from text files and streams [\#140](https://github.com/gojek/feast/issues/140) +- Ability to correct mistakes made by ingesting the wrong data [\#139](https://github.com/gojek/feast/issues/139) +- Add ability to start job from yaml using python sdk [\#124](https://github.com/gojek/feast/issues/124) +- Add build/test triggering for every PR and on the master branch [\#102](https://github.com/gojek/feast/issues/102) +- Create Getting Started documentation [\#98](https://github.com/gojek/feast/issues/98) +- Create a release [\#65](https://github.com/gojek/feast/issues/65) +- Add python tests to unit testing script [\#58](https://github.com/gojek/feast/issues/58) +- Create proper OWNERS files for each sub-component [\#41](https://github.com/gojek/feast/issues/41) +- Option to create a resource without overwriting existing records [\#37](https://github.com/gojek/feast/issues/37) + +**Merged pull requests:** + +- Remove redis transaction [\#280](https://github.com/gojek/feast/pull/280) ([pradithya](https://github.com/pradithya)) +- Fix tracing to continue from existing trace created by grpc client [\#245](https://github.com/gojek/feast/pull/245) ([pradithya](https://github.com/pradithya)) + +## [v0.1.2](https://github.com/gojek/feast/tree/v0.1.2) (2019-08-23) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.1.1...v0.1.2) + +**Fixed bugs:** + +- Batch Import, feature with datetime format issue [\#203](https://github.com/gojek/feast/issues/203) +- Serving not correctly report readiness check if there is no activity [\#190](https://github.com/gojek/feast/issues/190) +- Serving stop periodically reloading feature specification after a while [\#188](https://github.com/gojek/feast/issues/188) + +**Closed issues:** + +- \[question\] I just confused why FlinkJobManager always use DirectRunner as runner. [\#231](https://github.com/gojek/feast/issues/231) +- dtype 'datetime64\[ns, UTC\]' importer fails because key looks like 'datetime64\[ns, tz\]' [\#229](https://github.com/gojek/feast/issues/229) +- Add filtering capability to create dataset api [\#214](https://github.com/gojek/feast/issues/214) +- DataFlow throws out " java.io.FileNotFoundException: No files matched spec: bucket/ingestion\_1.csv" while executing [\#212](https://github.com/gojek/feast/issues/212) +- Python tests for sdk are broken [\#204](https://github.com/gojek/feast/issues/204) +- Python SDK create\_dataset is actually creating dataset in BQ [\#201](https://github.com/gojek/feast/issues/201) +- Python SDK Importer unable to stage dataframe without timestamp [\#195](https://github.com/gojek/feast/issues/195) +- Remove requirement to specify a staging bucket. [\#177](https://github.com/gojek/feast/issues/177) + +**Merged pull requests:** + +- Add `romanwozniak` to prow owners config [\#216](https://github.com/gojek/feast/pull/216) ([romanwozniak](https://github.com/romanwozniak)) +- Implement filter for create dataset api [\#215](https://github.com/gojek/feast/pull/215) ([pradithya](https://github.com/pradithya)) +- expand raw column to accomodate more features ingested in one go [\#213](https://github.com/gojek/feast/pull/213) ([budi](https://github.com/budi)) +- update feast installation docs [\#210](https://github.com/gojek/feast/pull/210) ([budi](https://github.com/budi)) +- Add Prow job for unit testing Python SDK [\#209](https://github.com/gojek/feast/pull/209) ([davidheryanto](https://github.com/davidheryanto)) +- fix create\_dataset [\#208](https://github.com/gojek/feast/pull/208) ([budi](https://github.com/budi)) +- Update Feast installation doc [\#207](https://github.com/gojek/feast/pull/207) ([davidheryanto](https://github.com/davidheryanto)) +- Fix unit test cli in prow script not returning correct exit code [\#206](https://github.com/gojek/feast/pull/206) ([davidheryanto](https://github.com/davidheryanto)) +- Fix pytests and make TS conversion conditional [\#205](https://github.com/gojek/feast/pull/205) ([zhilingc](https://github.com/zhilingc)) +- Use full prow build id as dataset name during test [\#200](https://github.com/gojek/feast/pull/200) ([davidheryanto](https://github.com/davidheryanto)) +- Add Feast CLI / python SDK documentation [\#199](https://github.com/gojek/feast/pull/199) ([romanwozniak](https://github.com/romanwozniak)) +- Update library version to fix security vulnerabilities in dependencies [\#198](https://github.com/gojek/feast/pull/198) ([davidheryanto](https://github.com/davidheryanto)) +- Update Prow configuration for Feast CI [\#197](https://github.com/gojek/feast/pull/197) ([davidheryanto](https://github.com/davidheryanto)) +- \[budi\] update python sdk quickstart [\#196](https://github.com/gojek/feast/pull/196) ([budi](https://github.com/budi)) +- Readiness probe [\#191](https://github.com/gojek/feast/pull/191) ([pradithya](https://github.com/pradithya)) +- Fix periodic feature spec reload [\#189](https://github.com/gojek/feast/pull/189) ([pradithya](https://github.com/pradithya)) +- Fixed a typo in environment variable in installation [\#187](https://github.com/gojek/feast/pull/187) ([gauravkumar37](https://github.com/gauravkumar37)) +- Revert "Update Quickstart" [\#185](https://github.com/gojek/feast/pull/185) ([zhilingc](https://github.com/zhilingc)) +- Update Quickstart [\#184](https://github.com/gojek/feast/pull/184) ([pradithya](https://github.com/pradithya)) +- Continuous integration and deployment \(CI/CD\) update [\#183](https://github.com/gojek/feast/pull/183) ([davidheryanto](https://github.com/davidheryanto)) +- Remove feature specs being able to declare their serving or warehouse stores [\#159](https://github.com/gojek/feast/pull/159) ([tims](https://github.com/tims)) + +## [v0.1.1](https://github.com/gojek/feast/tree/v0.1.1) (2019-04-18) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.1.0...v0.1.1) + +**Fixed bugs:** + +- Fix BigQuery query template to retrieve training data [\#182](https://github.com/gojek/feast/pull/182) ([davidheryanto](https://github.com/davidheryanto)) + +**Closed issues:** + +- "pip install Feast" does not work properly [\#175](https://github.com/gojek/feast/issues/175) +- Push Feast Python SDK to https://pypi.org [\#121](https://github.com/gojek/feast/issues/121) +- Toggle data stores using flags in feature specifications [\#38](https://github.com/gojek/feast/issues/38) + +**Merged pull requests:** + +- Add python init files [\#176](https://github.com/gojek/feast/pull/176) ([zhilingc](https://github.com/zhilingc)) +- Change pypi package from Feast to feast [\#173](https://github.com/gojek/feast/pull/173) ([zhilingc](https://github.com/zhilingc)) + +## [v0.1.0](https://github.com/gojek/feast/tree/v0.1.0) (2019-04-09) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.0.2...v0.1.0) + +**Implemented enhancements:** + +- Removal of storing historical value of feature in serving storage [\#53](https://github.com/gojek/feast/issues/53) +- Remove feature "granularity" and relegate to metadata [\#17](https://github.com/gojek/feast/issues/17) + +**Closed issues:** + +- Add ability to name an import job [\#167](https://github.com/gojek/feast/issues/167) +- Ingestion retrying an invalid FeatureRow endlessly [\#163](https://github.com/gojek/feast/issues/163) +- Ability to associate data ingested in Warehouse store to its ingestion job [\#145](https://github.com/gojek/feast/issues/145) +- Missing \(Fixing\) unit test for FeatureRowKafkaIO [\#132](https://github.com/gojek/feast/issues/132) + +**Merged pull requests:** + +- Catch all kind of exception to avoid retrying [\#171](https://github.com/gojek/feast/pull/171) ([pradithya](https://github.com/pradithya)) +- Integration test [\#170](https://github.com/gojek/feast/pull/170) ([zhilingc](https://github.com/zhilingc)) +- Proto error [\#169](https://github.com/gojek/feast/pull/169) ([pradithya](https://github.com/pradithya)) +- Add --name flag to submit job [\#168](https://github.com/gojek/feast/pull/168) ([pradithya](https://github.com/pradithya)) +- Prevent throwing RuntimeException when invalid proto is received [\#166](https://github.com/gojek/feast/pull/166) ([pradithya](https://github.com/pradithya)) +- Add davidheryanto to OWNER file [\#165](https://github.com/gojek/feast/pull/165) ([pradithya](https://github.com/pradithya)) +- Check validity of event timestamp in ValidateFeatureRowDoFn [\#164](https://github.com/gojek/feast/pull/164) ([pradithya](https://github.com/pradithya)) +- Remove granularity [\#162](https://github.com/gojek/feast/pull/162) ([pradithya](https://github.com/pradithya)) +- Better Kafka test [\#160](https://github.com/gojek/feast/pull/160) ([tims](https://github.com/tims)) +- Simplify and document CLI building steps [\#158](https://github.com/gojek/feast/pull/158) ([thirteen37](https://github.com/thirteen37)) +- Fix link typo in README.md [\#156](https://github.com/gojek/feast/pull/156) ([pradithya](https://github.com/pradithya)) +- Add Feast admin quickstart guide [\#155](https://github.com/gojek/feast/pull/155) ([thirteen37](https://github.com/thirteen37)) +- Pass all specs to ingestion by file [\#154](https://github.com/gojek/feast/pull/154) ([tims](https://github.com/tims)) +- Preload spec in serving cache [\#152](https://github.com/gojek/feast/pull/152) ([pradithya](https://github.com/pradithya)) +- Add job identifier to FeatureRow [\#147](https://github.com/gojek/feast/pull/147) ([mansiib](https://github.com/mansiib)) +- Fix unit tests [\#146](https://github.com/gojek/feast/pull/146) ([mansiib](https://github.com/mansiib)) +- Add thirteen37 to OWNERS [\#144](https://github.com/gojek/feast/pull/144) ([thirteen37](https://github.com/thirteen37)) +- Fix import spec created from Importer.from\_csv [\#143](https://github.com/gojek/feast/pull/143) ([pradithya](https://github.com/pradithya)) +- Regenerate go [\#142](https://github.com/gojek/feast/pull/142) ([zhilingc](https://github.com/zhilingc)) +- Flat JSON for pubsub and text files [\#141](https://github.com/gojek/feast/pull/141) ([tims](https://github.com/tims)) +- Add wait flag for jobs, fix go proto path for dataset service [\#138](https://github.com/gojek/feast/pull/138) ([zhilingc](https://github.com/zhilingc)) +- Fix Python SDK importer's ability to apply features [\#135](https://github.com/gojek/feast/pull/135) ([woop](https://github.com/woop)) +- Refactor stores [\#110](https://github.com/gojek/feast/pull/110) ([tims](https://github.com/tims)) +- Coalesce rows [\#89](https://github.com/gojek/feast/pull/89) ([tims](https://github.com/tims)) +- Remove historical feature in serving store [\#87](https://github.com/gojek/feast/pull/87) ([pradithya](https://github.com/pradithya)) + +## [v0.0.2](https://github.com/gojek/feast/tree/v0.0.2) (2019-03-11) + +[Full Changelog](https://github.com/gojek/feast/compare/v0.0.1...v0.0.2) + +**Implemented enhancements:** + +- Coalesce FeatureRows for improved "latest" value consistency in serving stores [\#88](https://github.com/gojek/feast/issues/88) +- Kafka source [\#22](https://github.com/gojek/feast/issues/22) + +**Closed issues:** + +- Preload Feast's spec in serving cache [\#151](https://github.com/gojek/feast/issues/151) +- Feast csv data upload job [\#137](https://github.com/gojek/feast/issues/137) +- Blocking call to start feast ingestion job [\#136](https://github.com/gojek/feast/issues/136) +- Python SDK fails to apply feature when submitting job [\#134](https://github.com/gojek/feast/issues/134) +- Default dump format should be changed for Python SDK [\#133](https://github.com/gojek/feast/issues/133) +- Listing resources and finding out system state [\#131](https://github.com/gojek/feast/issues/131) +- Reorganise ingestion store classes to match architecture [\#109](https://github.com/gojek/feast/issues/109) + +## [v0.0.1](https://github.com/gojek/feast/tree/v0.0.1) (2019-02-11) + +[Full Changelog](https://github.com/gojek/feast/compare/ec9def2bbb06dc759538e4424caadd70f548ea64...v0.0.1) + +**Implemented enhancements:** + +- Spring boot CLI logs show up as JSON [\#104](https://github.com/gojek/feast/issues/104) +- Allow for registering feature that doesn't have warehouse store [\#5](https://github.com/gojek/feast/issues/5) + +**Fixed bugs:** + +- Error when submitting large import spec [\#125](https://github.com/gojek/feast/issues/125) +- Ingestion is not ignoring unknown feature in streaming source [\#99](https://github.com/gojek/feast/issues/99) +- Vulnerability in dependency \(core - jackson-databind \) [\#92](https://github.com/gojek/feast/issues/92) +- TF file for cloud build trigger broken [\#72](https://github.com/gojek/feast/issues/72) +- Job Execution Failure with NullPointerException [\#46](https://github.com/gojek/feast/issues/46) +- Runtime Dependency Error After Upgrade to Beam 2.9.0 [\#44](https://github.com/gojek/feast/issues/44) +- \[FlinkRunner\] Core should not follow remote flink runner job to completion [\#21](https://github.com/gojek/feast/issues/21) +- Go packages in protos use incorrect repo [\#16](https://github.com/gojek/feast/issues/16) + +**Closed issues:** + +- Build failure in cloudbuild [\#128](https://github.com/gojek/feast/issues/128) +- Add Redis to Feast Helm chart [\#122](https://github.com/gojek/feast/issues/122) +- RedisIO fails to ingest all of the data [\#118](https://github.com/gojek/feast/issues/118) +- Error thrown by core when requesting feature spec is not clear [\#116](https://github.com/gojek/feast/issues/116) +- Add filter to ingress for internal load balancer defined in helm chart [\#113](https://github.com/gojek/feast/issues/113) +- Build on master branch [\#111](https://github.com/gojek/feast/issues/111) +- Cleanup warning while compiling protobuf [\#107](https://github.com/gojek/feast/issues/107) +- Include ui into core's build [\#105](https://github.com/gojek/feast/issues/105) +- Go tests failing for CLI [\#96](https://github.com/gojek/feast/issues/96) +- Update CLI to reflect recent changes in API [\#84](https://github.com/gojek/feast/issues/84) +- Jackson dependency issues [\#81](https://github.com/gojek/feast/issues/81) +- Branch conflicts with tag when using cloud build trigger [\#78](https://github.com/gojek/feast/issues/78) +- Newest \(latest?\) value of a feature [\#77](https://github.com/gojek/feast/issues/77) +- Create helm package repo to host helm charts [\#70](https://github.com/gojek/feast/issues/70) +- Option to add service account to core deployment. [\#69](https://github.com/gojek/feast/issues/69) +- Deduplicate list of storages in specs service [\#66](https://github.com/gojek/feast/issues/66) +- Vulnerability in dependency \(webpack-dev-server\) [\#60](https://github.com/gojek/feast/issues/60) +- Add build process for docker images [\#55](https://github.com/gojek/feast/issues/55) +- Python SDK [\#48](https://github.com/gojek/feast/issues/48) +- Bump Apache Beam SDK version [\#42](https://github.com/gojek/feast/issues/42) +- Fix unit tests script [\#36](https://github.com/gojek/feast/issues/36) +- Create CONTRIBUTING documentation [\#32](https://github.com/gojek/feast/issues/32) +- Errors during kafka deserializer \(passing\) test execution [\#31](https://github.com/gojek/feast/issues/31) +- Removal of ingestion's profile for different runner [\#28](https://github.com/gojek/feast/issues/28) +- Error Store should not require a storage spec [\#27](https://github.com/gojek/feast/issues/27) +- \[FlinkRunner\] Ingestion job tries to connect to every store available in core [\#20](https://github.com/gojek/feast/issues/20) +- FeatureRow proto to wrap FeatureRowKey and FeatureRowMessage [\#13](https://github.com/gojek/feast/issues/13) +- Ingestion should fail immediately when there are no valid stores [\#12](https://github.com/gojek/feast/issues/12) +- Create CI [\#2](https://github.com/gojek/feast/issues/2) + +**Merged pull requests:** + +- Disable test during docker image creation [\#129](https://github.com/gojek/feast/pull/129) ([pradithya](https://github.com/pradithya)) +- Repackage helm chart [\#127](https://github.com/gojek/feast/pull/127) ([pradithya](https://github.com/pradithya)) +- Increase the column size for storing raw import spec [\#126](https://github.com/gojek/feast/pull/126) ([pradithya](https://github.com/pradithya)) +- Update Helm Charts \(Redis, Logging\) [\#123](https://github.com/gojek/feast/pull/123) ([woop](https://github.com/woop)) +- Added LOG\_TYPE environmental variable [\#120](https://github.com/gojek/feast/pull/120) ([woop](https://github.com/woop)) +- Fix missing Redis write [\#119](https://github.com/gojek/feast/pull/119) ([pradithya](https://github.com/pradithya)) +- add logging when error on request feature [\#117](https://github.com/gojek/feast/pull/117) ([pradithya](https://github.com/pradithya)) +- run yarn run build during generate-resource [\#115](https://github.com/gojek/feast/pull/115) ([pradithya](https://github.com/pradithya)) +- Add loadBalancerSourceRanges option for both serving and core [\#114](https://github.com/gojek/feast/pull/114) ([zhilingc](https://github.com/zhilingc)) +- Build master [\#112](https://github.com/gojek/feast/pull/112) ([pradithya](https://github.com/pradithya)) +- Cleanup warning while building proto files [\#108](https://github.com/gojek/feast/pull/108) ([pradithya](https://github.com/pradithya)) +- Embed ui build & packaging into core's build [\#106](https://github.com/gojek/feast/pull/106) ([pradithya](https://github.com/pradithya)) +- Add build badge to README [\#103](https://github.com/gojek/feast/pull/103) ([woop](https://github.com/woop)) +- Ignore features in FeatureRow if it's not requested in import spec [\#101](https://github.com/gojek/feast/pull/101) ([pradithya](https://github.com/pradithya)) +- Add override for serving service static ip [\#100](https://github.com/gojek/feast/pull/100) ([zhilingc](https://github.com/zhilingc)) +- Fix go test [\#97](https://github.com/gojek/feast/pull/97) ([zhilingc](https://github.com/zhilingc)) +- add missing copyright headers and fix test fail due to previous merge [\#95](https://github.com/gojek/feast/pull/95) ([tims](https://github.com/tims)) +- Allow submission of kafka jobs [\#94](https://github.com/gojek/feast/pull/94) ([zhilingc](https://github.com/zhilingc)) +- upgrade jackson databind for security vulnerability [\#93](https://github.com/gojek/feast/pull/93) ([tims](https://github.com/tims)) +- Version revert [\#91](https://github.com/gojek/feast/pull/91) ([zhilingc](https://github.com/zhilingc)) +- Fix validating feature row when the associated feature spec has no warehouse store [\#90](https://github.com/gojek/feast/pull/90) ([pradithya](https://github.com/pradithya)) +- Add get command [\#85](https://github.com/gojek/feast/pull/85) ([zhilingc](https://github.com/zhilingc)) +- Avoid error thrown when no storage for warehouse/serving is registered [\#83](https://github.com/gojek/feast/pull/83) ([pradithya](https://github.com/pradithya)) +- Fix jackson dependency issue [\#82](https://github.com/gojek/feast/pull/82) ([zhilingc](https://github.com/zhilingc)) +- Allow registration of feature without warehouse store [\#80](https://github.com/gojek/feast/pull/80) ([pradithya](https://github.com/pradithya)) +- Remove branch from cloud build trigger [\#79](https://github.com/gojek/feast/pull/79) ([woop](https://github.com/woop)) +- move read transforms into "source" package as FeatureSources [\#74](https://github.com/gojek/feast/pull/74) ([tims](https://github.com/tims)) +- Fix tag regex in tf file [\#73](https://github.com/gojek/feast/pull/73) ([zhilingc](https://github.com/zhilingc)) +- Update charts [\#71](https://github.com/gojek/feast/pull/71) ([mansiib](https://github.com/mansiib)) +- Deduplicate storage ids before we fetch them [\#68](https://github.com/gojek/feast/pull/68) ([tims](https://github.com/tims)) +- Check the size of result against deduplicated request [\#67](https://github.com/gojek/feast/pull/67) ([pradithya](https://github.com/pradithya)) +- Add ability to submit ingestion job using Flink [\#62](https://github.com/gojek/feast/pull/62) ([pradithya](https://github.com/pradithya)) +- Fix vulnerabilities for webpack-dev [\#59](https://github.com/gojek/feast/pull/59) ([budi](https://github.com/budi)) +- Build push [\#56](https://github.com/gojek/feast/pull/56) ([zhilingc](https://github.com/zhilingc)) +- Fix github vulnerability issue with webpack [\#54](https://github.com/gojek/feast/pull/54) ([budi](https://github.com/budi)) +- Only lookup storage specs that we actually need [\#52](https://github.com/gojek/feast/pull/52) ([tims](https://github.com/tims)) +- Link Python SDK RFC to PR and Issue [\#49](https://github.com/gojek/feast/pull/49) ([woop](https://github.com/woop)) +- Python SDK [\#47](https://github.com/gojek/feast/pull/47) ([zhilingc](https://github.com/zhilingc)) +- Update com.google.httpclient to be same as Beam's dependency [\#45](https://github.com/gojek/feast/pull/45) ([pradithya](https://github.com/pradithya)) +- Bump Beam SDK to 2.9.0 [\#43](https://github.com/gojek/feast/pull/43) ([pradithya](https://github.com/pradithya)) +- Add fix for tests failing in docker image [\#40](https://github.com/gojek/feast/pull/40) ([zhilingc](https://github.com/zhilingc)) +- Change error store to be part of configuration instead [\#39](https://github.com/gojek/feast/pull/39) ([zhilingc](https://github.com/zhilingc)) +- Fix location of Prow's Tide configuration [\#35](https://github.com/gojek/feast/pull/35) ([woop](https://github.com/woop)) +- Add testing folder for deploying test infrastructure and running tests [\#34](https://github.com/gojek/feast/pull/34) ([woop](https://github.com/woop)) +- skeleton contributing guide [\#33](https://github.com/gojek/feast/pull/33) ([tims](https://github.com/tims)) +- allow empty string to select a NoOp write transform [\#30](https://github.com/gojek/feast/pull/30) ([tims](https://github.com/tims)) +- Remove packaging ingestion as separate profile \(fix \#28\) [\#29](https://github.com/gojek/feast/pull/29) ([pradithya](https://github.com/pradithya)) +- Change gopath to point to gojek repo [\#26](https://github.com/gojek/feast/pull/26) ([zhilingc](https://github.com/zhilingc)) +- Fixes \#31 - errors during kafka deserializer \(passing\) test execution [\#25](https://github.com/gojek/feast/pull/25) ([baskaranz](https://github.com/baskaranz)) +- Kafka IO fixes [\#23](https://github.com/gojek/feast/pull/23) ([tims](https://github.com/tims)) +- KafkaIO implementation for feast [\#19](https://github.com/gojek/feast/pull/19) ([baskaranz](https://github.com/baskaranz)) +- Return same type string for warehouse and serving NoOp stores [\#18](https://github.com/gojek/feast/pull/18) ([tims](https://github.com/tims)) +- \#12: prefetch specs and validate on job expansion [\#15](https://github.com/gojek/feast/pull/15) ([tims](https://github.com/tims)) +- Added RFC for Feast Python SDK [\#14](https://github.com/gojek/feast/pull/14) ([woop](https://github.com/woop)) +- Add more validation in feature spec registration [\#11](https://github.com/gojek/feast/pull/11) ([pradithya](https://github.com/pradithya)) +- Added rfcs/ folder with readme and template [\#10](https://github.com/gojek/feast/pull/10) ([woop](https://github.com/woop)) +- Expose ui service rpc [\#9](https://github.com/gojek/feast/pull/9) ([pradithya](https://github.com/pradithya)) +- Add Feast overview to README [\#8](https://github.com/gojek/feast/pull/8) ([woop](https://github.com/woop)) +- Directory structure changes [\#7](https://github.com/gojek/feast/pull/7) ([zhilingc](https://github.com/zhilingc)) +- Change register to apply [\#4](https://github.com/gojek/feast/pull/4) ([zhilingc](https://github.com/zhilingc)) +- Empty response handling in serving api [\#3](https://github.com/gojek/feast/pull/3) ([pradithya](https://github.com/pradithya)) +- Proto file fixes [\#1](https://github.com/gojek/feast/pull/1) ([pradithya](https://github.com/pradithya)) From 197c668e462a0a50ee485a57b45216f5f8207619 Mon Sep 17 00:00:00 2001 From: Willem Pienaar <6728866+woop@users.noreply.github.com> Date: Sun, 24 Nov 2019 13:07:46 +0530 Subject: [PATCH 07/18] Update CHANGELOG.md with a summary for 0.3 --- CHANGELOG.md | 96 +++++++--------------------------------------------- 1 file changed, 13 insertions(+), 83 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c9fe47a7975..f6ad89e0c0d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,16 @@ [Full Changelog](https://github.com/gojek/feast/compare/v0.1.8...v0.3.0) +**Summary:** + +* Introduced "Feature Sets" as a concept with a new [Feast Core API](https://github.com/gojek/feast/blob/v0.3.0/protos/feast/core/CoreService.proto), [Feast Serving API](https://github.com/gojek/feast/blob/v0.3.0/protos/feast/serving/ServingService.proto) +* Upgraded [Python SDK](https://github.com/gojek/feast/tree/v0.3.0/sdk/python) to support new Feast API. Allows for management of Feast as a library or through the command line. +* Implemented a [Golang SDK](https://github.com/gojek/feast/tree/v0.3.0/sdk/go) and [Java SDK](https://github.com/gojek/feast/tree/v0.3.0/sdk/java) to support the new Feast Core and Feast Serving APIs. +* Added support for multi-feature set retrieval and joins. +* Added point-in-time correct retrieval for both batch and online serving. +* Added support for an external source in Kafka. +* Added job management to Feast Core to manage ingestion/population jobs to remote Feast deployments +* Added metric support through Prometheus **Merged pull requests:** @@ -20,7 +30,7 @@ - Database tweaks [\#290](https://github.com/gojek/feast/pull/290) ([smadarasmi](https://github.com/smadarasmi)) - Feast Helm charts and build script [\#289](https://github.com/gojek/feast/pull/289) ([davidheryanto](https://github.com/davidheryanto)) - Fix max\_age changes not updating specs and add TQDM silencing flag [\#292](https://github.com/gojek/feast/pull/292) ([woop](https://github.com/woop)) -- Zl/ingestion fixes [\#286](https://github.com/gojek/feast/pull/286) ([zhilingc](https://github.com/zhilingc)) +- Ingestion fixes [\#286](https://github.com/gojek/feast/pull/286) ([zhilingc](https://github.com/zhilingc)) - Consolidate jobs [\#279](https://github.com/gojek/feast/pull/279) ([zhilingc](https://github.com/zhilingc)) - Import Spring Boot's dependency BOM, fix spring-boot:run at parent project level [\#276](https://github.com/gojek/feast/pull/276) ([ches](https://github.com/ches)) - Feast 0.3 Continuous Integration \(CI\) Update [\#271](https://github.com/gojek/feast/pull/271) ([davidheryanto](https://github.com/davidheryanto)) @@ -31,7 +41,7 @@ - Clean up the Maven build [\#262](https://github.com/gojek/feast/pull/262) ([ches](https://github.com/ches)) - Add golang SDK [\#261](https://github.com/gojek/feast/pull/261) ([zhilingc](https://github.com/zhilingc)) - Move storage configuration to serving [\#254](https://github.com/gojek/feast/pull/254) ([zhilingc](https://github.com/zhilingc)) -- 0.3 dev serving api change [\#253](https://github.com/gojek/feast/pull/253) ([zhilingc](https://github.com/zhilingc)) +- Serving API changes for 0.3 [\#253](https://github.com/gojek/feast/pull/253) ([zhilingc](https://github.com/zhilingc)) ## [v0.1.8](https://github.com/gojek/feast/tree/v0.1.8) (2019-10-30) @@ -52,37 +62,6 @@ - Dataflow monitoring by core is failing with incorrect job id [\#153](https://github.com/gojek/feast/issues/153) - Feast core crashes without logger set [\#150](https://github.com/gojek/feast/issues/150) -**Closed issues:** - -- Update Prow for Feast 0.3 to run tests for PRs on GitHub [\#275](https://github.com/gojek/feast/issues/275) -- Remove JavaFX \(Oracle\) dependency and move to OpenJDK [\#266](https://github.com/gojek/feast/issues/266) -- Update Go Client to Feast 0.3 for Online Serving [\#260](https://github.com/gojek/feast/issues/260) -- Update Java Client to Feast 0.3 API for Online Serving [\#257](https://github.com/gojek/feast/issues/257) -- Create end-to-end test for Feast 0.3 [\#256](https://github.com/gojek/feast/issues/256) -- Move storage configuration to serving deployment [\#255](https://github.com/gojek/feast/issues/255) -- StorageInfoRepository [\#233](https://github.com/gojek/feast/issues/233) -- Error when running feast apply entity integration-tests/testdata/entity\_specs/entity\_1.yaml [\#226](https://github.com/gojek/feast/issues/226) -- Does Feast help in feature engineering as a whole, i.e, calculating/ extracting the features or is this going to be developed in the near future? [\#220](https://github.com/gojek/feast/issues/220) -- Redis Error saying JedisConnectionException:connection timed out [\#219](https://github.com/gojek/feast/issues/219) -- Entity & feature deletion [\#202](https://github.com/gojek/feast/issues/202) -- REDIS1 error [\#193](https://github.com/gojek/feast/issues/193) -- Not able to push to feature store [\#192](https://github.com/gojek/feast/issues/192) -- Add nodeSelector to helm charts [\#186](https://github.com/gojek/feast/issues/186) -- Multiple feature specs in a single yaml file [\#181](https://github.com/gojek/feast/issues/181) -- Update continuous integration/deployment \(CI/CD\) process [\#180](https://github.com/gojek/feast/issues/180) -- Integration test for streaming data [\#179](https://github.com/gojek/feast/issues/179) -- Quickstart.ipynb shows Description field cannot be empty [\#178](https://github.com/gojek/feast/issues/178) -- Default service type for Helm chart should not be LoadBalancer [\#161](https://github.com/gojek/feast/issues/161) -- Support for ingesting flat json from text files and streams [\#140](https://github.com/gojek/feast/issues/140) -- Ability to correct mistakes made by ingesting the wrong data [\#139](https://github.com/gojek/feast/issues/139) -- Add ability to start job from yaml using python sdk [\#124](https://github.com/gojek/feast/issues/124) -- Add build/test triggering for every PR and on the master branch [\#102](https://github.com/gojek/feast/issues/102) -- Create Getting Started documentation [\#98](https://github.com/gojek/feast/issues/98) -- Create a release [\#65](https://github.com/gojek/feast/issues/65) -- Add python tests to unit testing script [\#58](https://github.com/gojek/feast/issues/58) -- Create proper OWNERS files for each sub-component [\#41](https://github.com/gojek/feast/issues/41) -- Option to create a resource without overwriting existing records [\#37](https://github.com/gojek/feast/issues/37) - **Merged pull requests:** - Remove redis transaction [\#280](https://github.com/gojek/feast/pull/280) ([pradithya](https://github.com/pradithya)) @@ -95,20 +74,9 @@ **Fixed bugs:** - Batch Import, feature with datetime format issue [\#203](https://github.com/gojek/feast/issues/203) -- Serving not correctly report readiness check if there is no activity [\#190](https://github.com/gojek/feast/issues/190) +- Serving not correctly reporting readiness check if there is no activity [\#190](https://github.com/gojek/feast/issues/190) - Serving stop periodically reloading feature specification after a while [\#188](https://github.com/gojek/feast/issues/188) -**Closed issues:** - -- \[question\] I just confused why FlinkJobManager always use DirectRunner as runner. [\#231](https://github.com/gojek/feast/issues/231) -- dtype 'datetime64\[ns, UTC\]' importer fails because key looks like 'datetime64\[ns, tz\]' [\#229](https://github.com/gojek/feast/issues/229) -- Add filtering capability to create dataset api [\#214](https://github.com/gojek/feast/issues/214) -- DataFlow throws out " java.io.FileNotFoundException: No files matched spec: bucket/ingestion\_1.csv" while executing [\#212](https://github.com/gojek/feast/issues/212) -- Python tests for sdk are broken [\#204](https://github.com/gojek/feast/issues/204) -- Python SDK create\_dataset is actually creating dataset in BQ [\#201](https://github.com/gojek/feast/issues/201) -- Python SDK Importer unable to stage dataframe without timestamp [\#195](https://github.com/gojek/feast/issues/195) -- Remove requirement to specify a staging bucket. [\#177](https://github.com/gojek/feast/issues/177) - **Merged pull requests:** - Add `romanwozniak` to prow owners config [\#216](https://github.com/gojek/feast/pull/216) ([romanwozniak](https://github.com/romanwozniak)) @@ -141,12 +109,6 @@ - Fix BigQuery query template to retrieve training data [\#182](https://github.com/gojek/feast/pull/182) ([davidheryanto](https://github.com/davidheryanto)) -**Closed issues:** - -- "pip install Feast" does not work properly [\#175](https://github.com/gojek/feast/issues/175) -- Push Feast Python SDK to https://pypi.org [\#121](https://github.com/gojek/feast/issues/121) -- Toggle data stores using flags in feature specifications [\#38](https://github.com/gojek/feast/issues/38) - **Merged pull requests:** - Add python init files [\#176](https://github.com/gojek/feast/pull/176) ([zhilingc](https://github.com/zhilingc)) @@ -235,38 +197,6 @@ - \[FlinkRunner\] Core should not follow remote flink runner job to completion [\#21](https://github.com/gojek/feast/issues/21) - Go packages in protos use incorrect repo [\#16](https://github.com/gojek/feast/issues/16) -**Closed issues:** - -- Build failure in cloudbuild [\#128](https://github.com/gojek/feast/issues/128) -- Add Redis to Feast Helm chart [\#122](https://github.com/gojek/feast/issues/122) -- RedisIO fails to ingest all of the data [\#118](https://github.com/gojek/feast/issues/118) -- Error thrown by core when requesting feature spec is not clear [\#116](https://github.com/gojek/feast/issues/116) -- Add filter to ingress for internal load balancer defined in helm chart [\#113](https://github.com/gojek/feast/issues/113) -- Build on master branch [\#111](https://github.com/gojek/feast/issues/111) -- Cleanup warning while compiling protobuf [\#107](https://github.com/gojek/feast/issues/107) -- Include ui into core's build [\#105](https://github.com/gojek/feast/issues/105) -- Go tests failing for CLI [\#96](https://github.com/gojek/feast/issues/96) -- Update CLI to reflect recent changes in API [\#84](https://github.com/gojek/feast/issues/84) -- Jackson dependency issues [\#81](https://github.com/gojek/feast/issues/81) -- Branch conflicts with tag when using cloud build trigger [\#78](https://github.com/gojek/feast/issues/78) -- Newest \(latest?\) value of a feature [\#77](https://github.com/gojek/feast/issues/77) -- Create helm package repo to host helm charts [\#70](https://github.com/gojek/feast/issues/70) -- Option to add service account to core deployment. [\#69](https://github.com/gojek/feast/issues/69) -- Deduplicate list of storages in specs service [\#66](https://github.com/gojek/feast/issues/66) -- Vulnerability in dependency \(webpack-dev-server\) [\#60](https://github.com/gojek/feast/issues/60) -- Add build process for docker images [\#55](https://github.com/gojek/feast/issues/55) -- Python SDK [\#48](https://github.com/gojek/feast/issues/48) -- Bump Apache Beam SDK version [\#42](https://github.com/gojek/feast/issues/42) -- Fix unit tests script [\#36](https://github.com/gojek/feast/issues/36) -- Create CONTRIBUTING documentation [\#32](https://github.com/gojek/feast/issues/32) -- Errors during kafka deserializer \(passing\) test execution [\#31](https://github.com/gojek/feast/issues/31) -- Removal of ingestion's profile for different runner [\#28](https://github.com/gojek/feast/issues/28) -- Error Store should not require a storage spec [\#27](https://github.com/gojek/feast/issues/27) -- \[FlinkRunner\] Ingestion job tries to connect to every store available in core [\#20](https://github.com/gojek/feast/issues/20) -- FeatureRow proto to wrap FeatureRowKey and FeatureRowMessage [\#13](https://github.com/gojek/feast/issues/13) -- Ingestion should fail immediately when there are no valid stores [\#12](https://github.com/gojek/feast/issues/12) -- Create CI [\#2](https://github.com/gojek/feast/issues/2) - **Merged pull requests:** - Disable test during docker image creation [\#129](https://github.com/gojek/feast/pull/129) ([pradithya](https://github.com/pradithya)) From 98ea90186f5bf5f36509938cc2b53ad4bdd6859c Mon Sep 17 00:00:00 2001 From: Shu Heng Date: Fri, 20 Dec 2019 17:33:13 +0800 Subject: [PATCH 08/18] Allow user to override job options --- .../charts/feast/charts/feast-serving/templates/configmap.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/charts/feast/charts/feast-serving/templates/configmap.yaml b/infra/charts/feast/charts/feast-serving/templates/configmap.yaml index 7ea36fdfb83..0ec80252c16 100644 --- a/infra/charts/feast/charts/feast-serving/templates/configmap.yaml +++ b/infra/charts/feast/charts/feast-serving/templates/configmap.yaml @@ -19,7 +19,7 @@ data: {{- end }} {{- $store := index .Values "store.yaml" }} -{{- if eq $store.type "BIGQUERY" }} +{{- if and (eq $store.type "BIGQUERY") (not (hasKey $config.feast.jobs "store-options")) }} {{- $jobStore := dict "host" (printf "%s-redis-headless" .Release.Name) "port" 6379 }} {{- $newConfig := dict "feast" (dict "jobs" (dict "store-options" $jobStore)) }} {{- $config := mergeOverwrite $config $newConfig }} From 437b3508d216abcadc669cbb038c086dae6ab99b Mon Sep 17 00:00:00 2001 From: voonhous Date: Sun, 22 Dec 2019 11:23:29 +0800 Subject: [PATCH 09/18] Rebasing changes (#355) --- .prow/config.yaml | 22 +- sdk/__init__.py | 0 sdk/python/feast/client.py | 196 +++++++---- sdk/python/feast/feature_set.py | 212 +++++++++++- sdk/python/feast/loaders/abstract_producer.py | 248 ++++++++++++++ sdk/python/feast/loaders/ingest.py | 305 ++++++------------ sdk/python/feast/type_map.py | 166 +++++++++- sdk/python/setup.py | 1 + sdk/python/tests/test_client.py | 6 +- 9 files changed, 857 insertions(+), 299 deletions(-) create mode 100644 sdk/__init__.py create mode 100644 sdk/python/feast/loaders/abstract_producer.py diff --git a/.prow/config.yaml b/.prow/config.yaml index 41f95180fbb..4b6e352a12f 100644 --- a/.prow/config.yaml +++ b/.prow/config.yaml @@ -145,18 +145,18 @@ presubmits: postsubmits: gojek/feast: - name: publish-python-sdk - decorate: true + decorate: true spec: containers: - image: python:3 command: - sh - - -c + - -c - | .prow/scripts/publish-python-sdk.sh \ --directory-path sdk/python --repository pypi volumeMounts: - - name: pypirc + - name: pypirc mountPath: /root/.pypirc subPath: .pypirc readOnly: true @@ -170,7 +170,7 @@ postsubmits: - ^v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ - name: publish-docker-images - decorate: true + decorate: true spec: containers: - image: google/cloud-sdk:273.0.0 @@ -182,14 +182,14 @@ postsubmits: --archive-uri gs://feast-templocation-kf-feast/.m2.2019-10-24.tar \ --output-dir $PWD/ - if [ $PULL_BASE_REF == "master" ]; then - + if [ $PULL_BASE_REF == "master" ]; then + .prow/scripts/publish-docker-image.sh \ --repository gcr.io/kf-feast/feast-core \ --tag dev \ --file infra/docker/core/Dockerfile \ --google-service-account-file /etc/gcloud/service-account.json - + .prow/scripts/publish-docker-image.sh \ --repository gcr.io/kf-feast/feast-serving \ --tag dev \ @@ -203,13 +203,13 @@ postsubmits: docker push gcr.io/kf-feast/feast-serving:${PULL_BASE_SHA} else - + .prow/scripts/publish-docker-image.sh \ --repository gcr.io/kf-feast/feast-core \ --tag ${PULL_BASE_REF:1} \ --file infra/docker/core/Dockerfile \ --google-service-account-file /etc/gcloud/service-account.json - + .prow/scripts/publish-docker-image.sh \ --repository gcr.io/kf-feast/feast-serving \ --tag ${PULL_BASE_REF:1} \ @@ -244,7 +244,7 @@ postsubmits: - ^v(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?(\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?$ - name: publish-helm-chart - decorate: true + decorate: true spec: containers: - image: google/cloud-sdk:273.0.0-slim @@ -253,7 +253,7 @@ postsubmits: - -c - | gcloud auth activate-service-account --key-file /etc/gcloud/service-account.json - + curl -s https://get.helm.sh/helm-v2.16.1-linux-amd64.tar.gz | tar -C /tmp -xz mv /tmp/linux-amd64/helm /usr/bin/helm helm init --client-only diff --git a/sdk/__init__.py b/sdk/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index 20df828a0e8..78596ee3eba 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -15,12 +15,12 @@ import logging import os -import sys +import time from collections import OrderedDict from typing import Dict, Union from typing import List + import grpc -import time import pandas as pd import pyarrow as pa import pyarrow.parquet as pq @@ -35,11 +35,12 @@ ) from feast.core.CoreService_pb2_grpc import CoreServiceStub from feast.core.FeatureSet_pb2 import FeatureSetStatus -from feast.exceptions import format_grpc_exception from feast.feature_set import FeatureSet, Entity from feast.job import Job +from feast.loaders.abstract_producer import get_producer from feast.loaders.file import export_dataframe_to_staging_location -from feast.loaders.ingest import ingest_table_to_kafka +from feast.loaders.ingest import KAFKA_CHUNK_PRODUCTION_TIMEOUT +from feast.loaders.ingest import get_feature_row_chunks from feast.serving.ServingService_pb2 import GetFeastServingInfoResponse from feast.serving.ServingService_pb2 import ( GetOnlineFeaturesRequest, @@ -259,7 +260,7 @@ def _apply_feature_set(self, feature_set: FeatureSet): print(f"No change detected or applied: {feature_set.name}") # Deep copy from the returned feature set to the local feature set - feature_set.update_from_feature_set(applied_fs) + feature_set._update_from_feature_set(applied_fs) def list_feature_sets(self) -> List[FeatureSet]: """ @@ -472,35 +473,55 @@ def get_online_features( ) # type: GetOnlineFeaturesResponse def ingest( - self, - feature_set: Union[str, FeatureSet], - source: Union[pd.DataFrame, str], - version: int = None, - force_update: bool = False, - max_workers: int = CPU_COUNT, - disable_progress_bar: bool = False, - chunk_size: int = 5000, - timeout: int = None, - ): + self, + feature_set: Union[str, FeatureSet], + source: Union[pd.DataFrame, str], + chunk_size: int = 10000, + version: int = None, + force_update: bool = False, + max_workers: int = max(CPU_COUNT - 1, 1), + disable_progress_bar: bool = False, + timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT + ) -> None: """ Loads feature data into Feast for a specific feature set. Args: - feature_set: Name of feature set or a feature set object - source: Either a file path or Pandas Dataframe to ingest into Feast + feature_set (typing.Union[str, FeatureSet]): + Feature set object or the string name of the feature set + (without a version). + + source (typing.Union[pd.DataFrame, str]): + Either a file path or Pandas Dataframe to ingest into Feast Files that are currently supported: - * parquet - * csv - * json - version: Feature set version - force_update: Automatically update feature set based on source data - prior to ingesting. This will also register changes to Feast - max_workers: Number of worker processes to use to encode values - disable_progress_bar: Disable printing of progress statistics - chunk_size: Maximum amount of rows to load into memory and ingest at - a time - timeout: Seconds to wait before ingestion times out + * parquet + * csv + * json + + chunk_size (int): + Amount of rows to load and ingest at a time. + + version (int): + Feature set version. + + force_update (bool): + Automatically update feature set based on source data prior to + ingesting. This will also register changes to Feast. + + max_workers (int): + Number of worker processes to use to encode values. + + disable_progress_bar (bool): + Disable printing of progress statistics. + + timeout (int): + Timeout in seconds to wait for completion. + + Returns: + None: + None """ + if isinstance(feature_set, FeatureSet): name = feature_set.name if version is None: @@ -510,15 +531,21 @@ def ingest( else: raise Exception(f"Feature set name must be provided") - table = _read_table_from_source(source) + # Read table and get row count + tmp_table_name = _read_table_from_source( + source, chunk_size, max_workers + ) + + pq_file = pq.ParquetFile(tmp_table_name) - # Update the feature set based on DataFrame schema - if force_update: - # Use a small as reference DataFrame to infer fields - ref_df = table.to_batches(max_chunksize=20)[0].to_pandas() + row_count = pq_file.metadata.num_rows - feature_set.infer_fields_from_df( - ref_df, discard_unused_fields=True, replace_existing_features=True + # Update the feature set based on PyArrow table of first row group + if force_update: + feature_set.infer_fields_from_pa( + table=pq_file.read_row_group(0), + discard_unused_fields=True, + replace_existing_features=True ) self.apply(feature_set) current_time = time.time() @@ -538,22 +565,49 @@ def ingest( if timeout is not None: timeout = timeout - int(time.time() - current_time) - if feature_set.source.source_type == "Kafka": - print("Ingesting to kafka...") - ingest_table_to_kafka( - feature_set=feature_set, - table=table, - max_workers=max_workers, - disable_pbar=disable_progress_bar, - chunk_size=chunk_size, - timeout=timeout, - ) - else: - raise Exception( - f"Could not determine source type for feature set " - f'"{feature_set.name}" with source type ' - f'"{feature_set.source.source_type}"' - ) + try: + # Kafka configs + brokers = feature_set.get_kafka_source_brokers() + topic = feature_set.get_kafka_source_topic() + producer = get_producer(brokers, row_count, disable_progress_bar) + + # Loop optimization declarations + produce = producer.produce + flush = producer.flush + + # Transform and push data to Kafka + if feature_set.source.source_type == "Kafka": + for chunk in get_feature_row_chunks( + file=tmp_table_name, + row_groups=list(range(pq_file.num_row_groups)), + fs=feature_set, + max_workers=max_workers): + + # Push FeatureRow one chunk at a time to kafka + for serialized_row in chunk: + produce(topic=topic, value=serialized_row) + + # Force a flush after each chunk + flush(timeout=timeout) + + # Remove chunk from memory + del chunk + + else: + raise Exception( + f"Could not determine source type for feature set " + f'"{feature_set.name}" with source type ' + f'"{feature_set.source.source_type}"' + ) + + # Print ingestion statistics + producer.print_results() + finally: + # Remove parquet file(s) that were created earlier + print("Removing temporary file(s)...") + os.remove(tmp_table_name) + + return None def _build_feature_set_request(feature_ids: List[str]) -> List[FeatureSetRequest]: @@ -583,18 +637,38 @@ def _build_feature_set_request(feature_ids: List[str]) -> List[FeatureSetRequest return list(feature_set_request.values()) -def _read_table_from_source(source: Union[pd.DataFrame, str]) -> pa.lib.Table: +def _read_table_from_source( + source: Union[pd.DataFrame, str], + chunk_size: int, + max_workers: int +) -> str: """ Infers a data source type (path or Pandas Dataframe) and reads it in as a PyArrow Table. + The PyArrow Table that is read will be written to a parquet file with row + group size determined by the minimum of: + * (table.num_rows / max_workers) + * chunk_size + + The parquet file that is created will be passed as file path to the + multiprocessing pool workers. + Args: - source: Either a string path or Pandas Dataframe + source (Union[pd.DataFrame, str]): + Either a string path or Pandas DataFrame. + + chunk_size (int): + Number of worker processes to use to encode values. + + max_workers (int): + Amount of rows to load and ingest at a time. Returns: - PyArrow table + str: Path to parquet file that was created. """ - # Pandas dataframe detected + + # Pandas DataFrame detected if isinstance(source, pd.DataFrame): table = pa.Table.from_pandas(df=source) @@ -618,4 +692,14 @@ def _read_table_from_source(source: Union[pd.DataFrame, str]) -> pa.lib.Table: # Ensure that PyArrow table is initialised assert isinstance(table, pa.lib.Table) - return table + + # Write table as parquet file with a specified row_group_size + tmp_table_name = f"{int(time.time())}.parquet" + row_group_size = min(int(table.num_rows/max_workers), chunk_size) + pq.write_table(table=table, where=tmp_table_name, + row_group_size=row_group_size) + + # Remove table from memory + del table + + return tmp_table_name diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index 85d8e137530..b402ef3acd5 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -13,25 +13,27 @@ # limitations under the License. -import pandas as pd -from typing import List, Optional from collections import OrderedDict from typing import Dict -from feast.source import Source -from pandas.api.types import is_datetime64_ns_dtype +from typing import List, Optional + +import pandas as pd +import pyarrow as pa +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto +from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto +from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto from feast.entity import Entity from feast.feature import Feature, Field -from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto -from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto -from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto -from feast.core.FeatureSet_pb2 import FeatureSetStatus -from google.protobuf.duration_pb2 import Duration -from google.protobuf.timestamp_pb2 import Timestamp +from feast.loaders import yaml as feast_yaml +from feast.source import Source +from feast.type_map import DATETIME_COLUMN +from feast.type_map import pa_to_feast_value_type from feast.type_map import python_type_to_feast_value_type -from google.protobuf.json_format import MessageToJson from google.protobuf import json_format -from feast.type_map import DATETIME_COLUMN -from feast.loaders import yaml as feast_yaml +from google.protobuf.duration_pb2 import Duration +from google.protobuf.json_format import MessageToJson +from pandas.api.types import is_datetime64_ns_dtype +from pyarrow.lib import TimestampType class FeatureSet: @@ -290,7 +292,6 @@ def infer_fields_from_df( rows_to_sample: int = 100, ): """ - Adds fields (Features or Entities) to a feature set based on the schema of a Datatframe. Only Pandas dataframes are supported. All columns are detected as features, so setting at least one entity manually is @@ -317,6 +318,7 @@ def infer_fields_from_df( must have consistent types, even values within list types must be homogeneous """ + if entities is None: entities = list() if features is None: @@ -407,7 +409,187 @@ def infer_fields_from_df( self._fields = new_fields print(output_log) - def update_from_feature_set(self, feature_set): + def infer_fields_from_pa( + self, table: pa.lib.Table, + entities: Optional[List[Entity]] = None, + features: Optional[List[Feature]] = None, + replace_existing_features: bool = False, + replace_existing_entities: bool = False, + discard_unused_fields: bool = False + ) -> None: + """ + Adds fields (Features or Entities) to a feature set based on the schema + of a PyArrow table. Only PyArrow tables are supported. All columns are + detected as features, so setting at least one entity manually is + advised. + + + Args: + table (pyarrow.lib.Table): + PyArrow table to read schema from. + + entities (Optional[List[Entity]]): + List of entities that will be set manually and not inferred. + These will take precedence over any existing entities or + entities found in the PyArrow table. + + features (Optional[List[Feature]]): + List of features that will be set manually and not inferred. + These will take precedence over any existing feature or features + found in the PyArrow table. + + replace_existing_features (bool): + Boolean flag. If true, will replace existing features in this + feature set with features found in dataframe. If false, will + skip conflicting features. + + replace_existing_entities (bool): + Boolean flag. If true, will replace existing entities in this + feature set with features found in dataframe. If false, will + skip conflicting entities. + + discard_unused_fields (bool): + Boolean flag. Setting this to True will discard any existing + fields that are not found in the dataset or provided by the + user. + + Returns: + None: + None + """ + if entities is None: + entities = list() + if features is None: + features = list() + + # Validate whether the datetime column exists with the right name + if DATETIME_COLUMN not in table.column_names: + raise Exception("No column 'datetime'") + + # Validate the date type for the datetime column + if not isinstance(table.column(DATETIME_COLUMN).type, TimestampType): + raise Exception( + "Column 'datetime' does not have the correct type: datetime64[ms]" + ) + + # Create dictionary of fields that will not be inferred (manually set) + provided_fields = OrderedDict() + + for field in entities + features: + if not isinstance(field, Field): + raise Exception(f"Invalid field object type provided {type(field)}") + if field.name not in provided_fields: + provided_fields[field.name] = field + else: + raise Exception(f"Duplicate field name detected {field.name}.") + + new_fields = self._fields.copy() + output_log = "" + + # Add in provided fields + for name, field in provided_fields.items(): + if name in new_fields.keys(): + upsert_message = "created" + else: + upsert_message = "updated (replacing an existing field)" + + output_log += ( + f"{type(field).__name__} {field.name}" + f"({field.dtype}) manually {upsert_message}.\n" + ) + new_fields[name] = field + + # Iterate over all of the column names and create features + for column in table.column_names: + column = column.strip() + + # Skip datetime column + if DATETIME_COLUMN in column: + continue + + # Skip user provided fields + if column in provided_fields.keys(): + continue + + # Only overwrite conflicting fields if replacement is allowed + if column in new_fields: + if ( + isinstance(self._fields[column], Feature) + and not replace_existing_features + ): + continue + + if ( + isinstance(self._fields[column], Entity) + and not replace_existing_entities + ): + continue + + # Store this fields as a feature + # TODO: (Minor) Change the parameter name from dtype to patype + new_fields[column] = Feature( + name=column, + dtype=self._infer_pa_column_type(table.column(column)) + ) + + output_log += f"{type(new_fields[column]).__name__} {new_fields[column].name} ({new_fields[column].dtype}) added from PyArrow Table.\n" + + # Discard unused fields from feature set + if discard_unused_fields: + keys_to_remove = [] + for key in new_fields.keys(): + if not (key in table.column_names or key in provided_fields.keys()): + output_log += f"{type(new_fields[key]).__name__} {new_fields[key].name} ({new_fields[key].dtype}) removed because it is unused.\n" + keys_to_remove.append(key) + for key in keys_to_remove: + del new_fields[key] + + # Update feature set + self._fields = new_fields + print(output_log) + + def _infer_pd_column_type(self, column, series, rows_to_sample): + dtype = None + sample_count = 0 + + # Loop over all rows for this column to infer types + for key, value in series.iteritems(): + sample_count += 1 + # Stop sampling at the row limit + if sample_count > rows_to_sample: + continue + + # Infer the specific type for this row + current_dtype = python_type_to_feast_value_type(name=column, value=value) + + # Make sure the type is consistent for column + if dtype: + if dtype != current_dtype: + raise ValueError( + f"Type mismatch detected in column {column}. Both " + f"the types {current_dtype} and {dtype} " + f"have been found." + ) + else: + # Store dtype in field to type map if it isnt already + dtype = current_dtype + + return dtype + + def _infer_pa_column_type(self, column: pa.lib.ChunkedArray): + """ + Infers the PyArrow column type. + + :param column: Column from a PyArrow table + :type column: pa.lib.ChunkedArray + :return: + :rtype: + """ + # Validates the column to ensure that value types are consistent + column.validate() + return pa_to_feast_value_type(column) + + def _update_from_feature_set(self, feature_set): """ Deep replaces one feature set with another diff --git a/sdk/python/feast/loaders/abstract_producer.py b/sdk/python/feast/loaders/abstract_producer.py new file mode 100644 index 00000000000..884ae49984c --- /dev/null +++ b/sdk/python/feast/loaders/abstract_producer.py @@ -0,0 +1,248 @@ +# Copyright 2019 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Union + +from tqdm import tqdm + + +class AbstractProducer: + """ + Abstract class for Kafka producers + """ + + def __init__( + self, + brokers: str, + row_count: int, + disable_progress_bar: bool + ): + self.brokers = brokers + self.row_count = row_count + self.error_count = 0 + self.last_exception = "" + + # Progress bar will always display average rate + self.pbar = tqdm( + total=row_count, + unit="rows", + smoothing=0, + disable=disable_progress_bar + ) + + def produce(self, topic: str, data: str): + message = "{} should implement a produce method".format( + self.__class__.__name__) + raise NotImplementedError(message) + + def flush(self, timeout: int): + message = "{} should implement a flush method".format( + self.__class__.__name__) + raise NotImplementedError(message) + + def _inc_pbar(self, meta): + self.pbar.update(1) + + def _set_error(self, exception: str): + self.error_count += 1 + self.last_exception = exception + + def print_results(self) -> None: + """ + Print ingestion statistics. + + Returns: + None: None + """ + # Refresh and close tqdm progress bar + self.pbar.refresh() + + self.pbar.close() + + print("Ingestion complete!") + + failed_message = ( + "" + if self.error_count == 0 + else f"\nFail: {self.error_count / self.row_count}" + ) + + last_exception_message = ( + "" + if self.last_exception == "" + else f"\nLast exception:\n{self.last_exception}" + ) + + print( + f"\nIngestion statistics:" + f"\nSuccess: {self.pbar.n}/{self.row_count}" + f"{failed_message}" + f"{last_exception_message}" + ) + return None + + +class ConfluentProducer(AbstractProducer): + """ + Concrete implementation of Confluent Kafka producer (confluent-kafka) + """ + + def __init__( + self, + brokers: str, + row_count: int, + disable_progress_bar: bool + ): + from confluent_kafka import Producer + self.producer = Producer({"bootstrap.servers": brokers}) + super().__init__(brokers, row_count, disable_progress_bar) + + def produce(self, topic: str, value: bytes) -> None: + """ + Generic produce that implements confluent-kafka's produce method to + push a byte encoded object into a Kafka topic. + + Args: + topic (str): Kafka topic. + value (bytes): Byte encoded object. + + Returns: + None: None. + """ + + try: + self.producer.produce( + topic, value=value, callback=self._delivery_callback) + # Serve delivery callback queue. + # NOTE: Since produce() is an asynchronous API this poll() call + # will most likely not serve the delivery callback for the + # last produce()d message. + self.producer.poll(0) + except Exception as ex: + self._set_error(str(ex)) + + return None + + def flush(self, timeout: Optional[int]): + """ + Generic flush that implements confluent-kafka's flush method. + + Args: + timeout (Optional[int]): Timeout in seconds to wait for completion. + + Returns: + int: Number of messages still in queue. + """ + return self.producer.flush(timeout=timeout) + + def _delivery_callback(self, err: str, msg) -> None: + """ + Optional per-message delivery callback (triggered by poll() or flush()) + when a message has been successfully delivered or permanently failed + delivery (after retries). + + Although the msg argument is not used, the current method signature is + required as specified in the confluent-kafka documentation. + + Args: + err (str): Error message. + msg (): Kafka message. + + Returns: + None + """ + if err: + self._set_error(err) + else: + self._inc_pbar(None) + + +class KafkaPythonProducer(AbstractProducer): + """ + Concrete implementation of Python Kafka producer (kafka-python) + """ + + def __init__( + self, + brokers: str, + row_count: int, + disable_progress_bar: bool + ): + from kafka import KafkaProducer + self.producer = KafkaProducer(bootstrap_servers=[brokers]) + super().__init__(brokers, row_count, disable_progress_bar) + + def produce(self, topic: str, value: bytes): + """ + Generic produce that implements kafka-python's send method to push a + byte encoded object into a Kafka topic. + + Args: + topic (str): Kafka topic. + value (bytes): Byte encoded object. + + Returns: + FutureRecordMetadata: resolves to RecordMetadata + + Raises: + KafkaTimeoutError: if unable to fetch topic metadata, or unable + to obtain memory buffer prior to configured max_block_ms + """ + return self.producer.send(topic, value=value).add_callback( + self._inc_pbar).add_errback(self._set_error) + + def flush(self, timeout: Optional[int]): + """ + Generic flush that implements kafka-python's flush method. + + Args: + timeout (Optional[int]): timeout in seconds to wait for completion. + + Returns: + None + + Raises: + KafkaTimeoutError: failure to flush buffered records within the + provided timeout + """ + return self.producer.flush(timeout=timeout) + + +def get_producer( + brokers: str, row_count: int, disable_progress_bar: bool +) -> Union[ConfluentProducer, KafkaPythonProducer]: + """ + Simple context helper function that returns a AbstractProducer object when + invoked. + + This helper function will try to import confluent-kafka as a producer first. + + This helper function will fallback to kafka-python if it fails to import + confluent-kafka. + + Args: + brokers (str): Kafka broker information with hostname and port. + row_count (int): Number of rows in table + + Returns: + Union[ConfluentProducer, KafkaPythonProducer]: + Concrete implementation of a Kafka producer. Ig can be: + * confluent-kafka producer + * kafka-python producer + """ + try: + return ConfluentProducer(brokers, row_count, disable_progress_bar) + except ImportError as e: + print("Unable to import confluent-kafka, falling back to kafka-python") + return KafkaPythonProducer(brokers, row_count, disable_progress_bar) diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py index a59c7c66b1a..527ab481fe0 100644 --- a/sdk/python/feast/loaders/ingest.py +++ b/sdk/python/feast/loaders/ingest.py @@ -1,18 +1,16 @@ import logging -import multiprocessing -import os -import time from functools import partial -from multiprocessing import Process, Queue, Pool -from typing import Iterable +from multiprocessing import Pool +from typing import Iterable, List + import pandas as pd -import pyarrow as pa +import pyarrow.parquet as pq +from feast.constants import DATETIME_COLUMN from feast.feature_set import FeatureSet -from feast.type_map import convert_dict_to_proto_values +from feast.type_map import pa_column_to_timestamp_proto_column, \ + pa_column_to_proto_column +from feast.types import Field_pb2 as FieldProto from feast.types.FeatureRow_pb2 import FeatureRow -from kafka import KafkaProducer -from tqdm import tqdm -from feast.constants import DATETIME_COLUMN _logger = logging.getLogger(__name__) @@ -21,221 +19,120 @@ FEAST_SERVING_URL_ENV_KEY = "FEAST_SERVING_URL" # type: str FEAST_CORE_URL_ENV_KEY = "FEAST_CORE_URL" # type: str BATCH_FEATURE_REQUEST_WAIT_TIME_SECONDS = 300 -CPU_COUNT = os.cpu_count() # type: int KAFKA_CHUNK_PRODUCTION_TIMEOUT = 120 # type: int -def _kafka_feature_row_producer( - feature_row_queue: Queue, row_count: int, brokers, topic, ctx: dict, pbar: tqdm -): +def _encode_pa_tables( + file: str, + fs: FeatureSet, + row_group_idx: int, +) -> List[bytes]: """ - Pushes Feature Rows to Kafka. Reads rows from a queue. Function will run - until total row_count is reached. + Helper function to encode a PyArrow table(s) read from parquet file(s) into + FeatureRows. - Args: - feature_row_queue: Queue containing feature rows. - row_count: Total row count to process - brokers: Broker to push to - topic: Topic to push to - ctx: Context dict used to communicate with primary process - pbar: Progress bar object - """ - - # Callback for failed production to Kafka - def on_error(e): - # Save last exception - ctx["last_exception"] = e - - # Increment error count - if "error_count" in ctx: - ctx["error_count"] += 1 - else: - ctx["error_count"] = 1 - - # Callback for succeeded production to Kafka - def on_success(meta): - pbar.update() - - producer = KafkaProducer(bootstrap_servers=brokers) - processed_rows = 0 - - # Loop through feature rows until all rows are processed - while processed_rows < row_count: - # Wait if queue is empty - if feature_row_queue.empty(): - time.sleep(1) - producer.flush(timeout=KAFKA_CHUNK_PRODUCTION_TIMEOUT) - else: - while not feature_row_queue.empty(): - row = feature_row_queue.get() - if row is not None: - # Push row to Kafka - producer.send(topic, row.SerializeToString()).add_callback( - on_success - ).add_errback(on_error) - processed_rows += 1 - - # Force an occasional flush - if processed_rows % 10000 == 0: - producer.flush(timeout=KAFKA_CHUNK_PRODUCTION_TIMEOUT) - del row - pbar.refresh() - - # Ensure that all rows are pushed - producer.flush(timeout=KAFKA_CHUNK_PRODUCTION_TIMEOUT) - - # Using progress bar as counter is much faster than incrementing dict - ctx["success_count"] = pbar.n - pbar.close() - - -def _encode_pa_chunks( - tbl: pa.lib.Table, - fs: FeatureSet, - max_workers: int, - df_datetime_dtype: pd.DataFrame.dtypes, - chunk_size: int = 5000, -) -> Iterable[FeatureRow]: - """ - Generator function to encode rows in PyArrow table to FeatureRows by - breaking up the table into batches. + This function accepts a list of file directory pointing to many parquet + files. All parquet files must have the same schema. - Each batch will have its rows spread accross a pool of workers to be - transformed into FeatureRow objects. + Each parquet file will be read into as a table and encoded into FeatureRows + using a pool of max_workers workers. Args: - tbl: PyArrow table to be processed. - fs: FeatureSet describing PyArrow table. - max_workers: Maximum number of workers. - df_datetime_dtype: Pandas dtype of datetime column. - chunk_size: Maximum size of each chunk when PyArrow table is batched. - - Returns: - Iterable FeatureRow object. - """ - - pool = Pool(max_workers) - - # Create a partial function with static non-iterable arguments - func = partial( - convert_dict_to_proto_values, - df_datetime_dtype=df_datetime_dtype, - feature_set=fs, - ) - - for batch in tbl.to_batches(max_chunksize=chunk_size): - m_df = batch.to_pandas() - results = pool.map_async(func, m_df.to_dict("records")) - yield from results.get() + file (str): + File directory of all the parquet file to encode. + Parquet file must have more than one row group. - pool.close() - pool.join() - return + fs (feast.feature_set.FeatureSet): + FeatureSet describing parquet files. + row_group_idx(int): + Row group index to read and encode into byte like FeatureRow + protobuf objects. -def ingest_table_to_kafka( - feature_set: FeatureSet, - table: pa.lib.Table, - max_workers: int, - chunk_size: int = 5000, - disable_pbar: bool = False, - timeout: int = None, -) -> None: + Returns: + List[bytes]: + List of byte encoded FeatureRows from the parquet file. """ - Ingest a PyArrow Table to a Kafka topic based for a Feature Set + pq_file = pq.ParquetFile(file) + # Read parquet file as a PyArrow table + table = pq_file.read_row_group(row_group_idx) + + # Add datetime column + datetime_col = pa_column_to_timestamp_proto_column( + table.column(DATETIME_COLUMN)) + + # Preprocess the columns by converting all its values to Proto values + proto_columns = { + field_name: pa_column_to_proto_column(field.dtype, + table.column(field_name)) + for field_name, field in fs.fields.items() + } + + feature_set = f"{fs.name}:{fs.version}" + + # List to store result + feature_rows = [] + + # Loop optimization declaration(s) + field = FieldProto.Field + proto_items = proto_columns.items() + append = feature_rows.append + + # Iterate through the rows + for row_idx in range(table.num_rows): + feature_row = FeatureRow(event_timestamp=datetime_col[row_idx], + feature_set=feature_set) + # Loop optimization declaration + ext = feature_row.fields.extend + + # Insert field from each column + for k, v in proto_items: + ext([field(name=k, value=v[row_idx])]) + + # Append FeatureRow in byte string form + append(feature_row.SerializeToString()) + + return feature_rows + + +def get_feature_row_chunks( + file: str, + row_groups: List[int], + fs: FeatureSet, + max_workers: int +) -> Iterable[List[bytes]]: + """ + Iterator function to encode a PyArrow table read from a parquet file to + FeatureRow(s). Args: - feature_set: FeatureSet describing PyArrow table. - table: PyArrow table to be processed. - max_workers: Maximum number of workers. - chunk_size: Maximum size of each chunk when PyArrow table is batched. - disable_pbar: Flag to indicate if tqdm progress bar should be disabled. - timeout: Maximum time before method times out - """ + file (str): + File directory of the parquet file. The parquet file must have more + than one row group. - pbar = tqdm(unit="rows", total=table.num_rows, disable=disable_pbar) - - # Use a small DataFrame to validate feature set schema - ref_df = table.to_batches(max_chunksize=100)[0].to_pandas() - df_datetime_dtype = ref_df[DATETIME_COLUMN].dtype - - # Validate feature set schema - _validate_dataframe(ref_df, feature_set) - - # Create queue through which encoding and production will coordinate - row_queue = Queue() - - # Create a context object to send and receive information across processes - ctx = multiprocessing.Manager().dict( - {"success_count": 0, "error_count": 0, "last_exception": ""} - ) - - # Create producer to push feature rows to Kafka - ingestion_process = Process( - target=_kafka_feature_row_producer, - args=( - row_queue, - table.num_rows, - feature_set.get_kafka_source_brokers(), - feature_set.get_kafka_source_topic(), - ctx, - pbar, - ), - ) - - try: - # Start ingestion process - print( - f"\n(ingest table to kafka) Ingestion started for {feature_set.name}:{feature_set.version}" - ) - ingestion_process.start() - - # Iterate over chunks in the table and return feature rows - for row in _encode_pa_chunks( - tbl=table, - fs=feature_set, - max_workers=max_workers, - chunk_size=chunk_size, - df_datetime_dtype=df_datetime_dtype, - ): - # Push rows onto a queue for the production process to pick up - row_queue.put(row) - # while row_queue.qsize() > chunk_size: - # time.sleep(0.1) - row_queue.put(None) - except Exception as ex: - _logger.error(f"Exception occurred: {ex}") - finally: - # Wait for the Kafka production to complete - ingestion_process.join(timeout=timeout) - failed_message = ( - "" - if ctx["error_count"] == 0 - else f"\nFail: {ctx['error_count']}/{table.num_rows}" - ) + row_groups (List[int]): + Specific row group indexes to be read and transformed in the parquet + file. - last_exception_message = ( - "" - if ctx["last_exception"] == "" - else f"\nLast exception:\n{ctx['last_exception']}" - ) - print( - f"\nIngestion statistics:" - f"\nSuccess: {ctx['success_count']}/{table.num_rows}" - f"{failed_message}" - f"{last_exception_message}" - ) + fs (feast.feature_set.FeatureSet): + FeatureSet describing parquet files. + max_workers (int): + Maximum number of workers to spawn. -def _validate_dataframe(dataframe: pd.DataFrame, feature_set: FeatureSet): + Returns: + Iterable[List[bytes]]: + Iterable list of byte encoded FeatureRow(s). """ - Validates a Pandas dataframe based on a feature set - Args: - dataframe: Pandas dataframe - feature_set: Feature Set instance - """ + pool = Pool(max_workers) + func = partial(_encode_pa_tables, file, fs) + for chunk in pool.imap_unordered(func, row_groups): + yield chunk + return + +def validate_dataframe(dataframe: pd.DataFrame, feature_set: FeatureSet): if "datetime" not in dataframe.columns: raise ValueError( f'Dataframe does not contain entity "datetime" in columns {dataframe.columns}' diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index 7573276d74a..ca13c2573bc 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -12,12 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. +from datetime import datetime, timezone +from typing import List + import numpy as np import pandas as pd -from datetime import datetime, timezone -from feast.value_type import ValueType +import pyarrow as pa +from feast.constants import DATETIME_COLUMN +from feast.types import ( + FeatureRow_pb2 as FeatureRowProto, + Field_pb2 as FieldProto, +) from feast.types.Value_pb2 import ( Value as ProtoValue, + ValueType as ProtoValueType, Int64List, Int32List, BoolList, @@ -26,9 +34,9 @@ StringList, FloatList, ) -from feast.types import FeatureRow_pb2 as FeatureRowProto, Field_pb2 as FieldProto +from feast.value_type import ValueType from google.protobuf.timestamp_pb2 import Timestamp -from feast.constants import DATETIME_COLUMN +from pyarrow.lib import TimestampType def python_type_to_feast_value_type( @@ -104,9 +112,9 @@ def python_type_to_feast_value_type( return ValueType[common_item_value_type.name + "_LIST"] else: raise ValueError( - f"Value type for field {name} is {value.dtype.__str__()} " - f"but recursion is not allowed. Array types can only be one " - f"level deep." + f"Value type for field {name} is {value.dtype.__str__()} but " + f"recursion is not allowed. Array types can only be one level " + f"deep." ) return type_map[value.dtype.__str__()] @@ -160,7 +168,7 @@ def convert_series_to_proto_values(row: pd.Series): def convert_dict_to_proto_values( - row: dict, df_datetime_dtype: pd.DataFrame.dtypes, feature_set + row: dict, df_datetime_dtype: pd.DataFrame.dtypes, feature_set ) -> FeatureRowProto.FeatureRow: """ Encode a dictionary describing a feature row into a FeatureRows object. @@ -211,12 +219,14 @@ def _pd_datetime_to_timestamp_proto(dtype, value) -> Timestamp: # If timestamp does not contain timezone, we assume it is of local # timezone and adjust it to UTC local_timezone = datetime.now(timezone.utc).astimezone().tzinfo - value = value.tz_localize(local_timezone).tz_convert("UTC").tz_localize(None) + value = value.tz_localize(local_timezone).tz_convert("UTC").tz_localize( + None) return Timestamp(seconds=int(value.timestamp())) if dtype.__str__() == "datetime64[ns, UTC]": return Timestamp(seconds=int(value.timestamp())) else: - return Timestamp(seconds=np.datetime64(value).astype("int64") // 1000000) + return Timestamp( + seconds=np.datetime64(value).astype("int64") // 1000000) def _type_err(item, dtype): @@ -344,3 +354,139 @@ def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue: return ProtoValue(bool_val=value) raise Exception(f"Unsupported data type: ${str(type(value))}") + +def pa_to_feast_value_attr(pa_type: object): + """ + Returns the equivalent Feast ValueType string for the given pa.lib type. + + Args: + pa_type (object): + PyArrow type. + + Returns: + str: + Feast attribute name in Feast ValueType string-ed representation. + """ + # Mapping of PyArrow type to attribute name in Feast ValueType strings + type_map = { + "timestamp[ms]": "int64_val", + "int32": "int32_val", + "int64": "int64_val", + "double": "double_val", + "float": "float_val", + "string": "string_val", + "binary": "bytes_val", + "bool": "bool_val", + "list": "int32_list_val", + "list": "int64_list_val", + "list": "double_list_val", + "list": "float_list_val", + "list": "string_list_val", + "list": "bytes_list_val", + "list": "bool_list_val", + } + + return type_map[pa_type.__str__()] + + +def pa_to_value_type(pa_type: object): + """ + Returns the equivalent Feast ValueType for the given pa.lib type. + + Args: + pa_type (object): + PyArrow type. + + Returns: + feast.types.Value_pb2.ValueType: + Feast ValueType. + + """ + + # Mapping of PyArrow to attribute name in Feast ValueType + type_map = { + "timestamp[ms]": ProtoValueType.INT64, + "int32": ProtoValueType.INT32, + "int64": ProtoValueType.INT64, + "double": ProtoValueType.DOUBLE, + "float": ProtoValueType.FLOAT, + "string": ProtoValueType.STRING, + "binary": ProtoValueType.BYTES, + "bool": ProtoValueType.BOOL, + "list": ProtoValueType.INT32_LIST, + "list": ProtoValueType.INT64_LIST, + "list": ProtoValueType.DOUBLE_LIST, + "list": ProtoValueType.FLOAT_LIST, + "list": ProtoValueType.STRING_LIST, + "list": ProtoValueType.BYTES_LIST, + "list": ProtoValueType.BOOL_LIST, + } + return type_map[pa_type.__str__()] + + +def pa_to_feast_value_type( + value: object +) -> ValueType: + type_map = { + "timestamp[ms]": ValueType.INT64, + "int32": ValueType.INT32, + "int64": ValueType.INT64, + "double": ValueType.DOUBLE, + "float": ValueType.FLOAT, + "string": ValueType.STRING, + "binary": ValueType.BYTES, + "bool": ValueType.BOOL, + "list": ValueType.INT32_LIST, + "list": ValueType.INT64_LIST, + "list": ValueType.DOUBLE_LIST, + "list": ValueType.FLOAT_LIST, + "list": ValueType.STRING_LIST, + "list": ValueType.BYTES_LIST, + "list": ValueType.BOOL_LIST, + } + return type_map[value.type.__str__()] + + +def pa_column_to_timestamp_proto_column( + column: pa.lib.ChunkedArray +) -> Timestamp: + if not isinstance(column.type, TimestampType): + raise Exception("Only TimestampType columns are allowed") + + proto_column = [] + for val in column: + timestamp = Timestamp() + timestamp.FromMicroseconds( + micros=int(val.as_py().timestamp() * 1_000_000)) + proto_column.append(timestamp) + return proto_column + + +def pa_column_to_proto_column( + feast_value_type, + column: pa.lib.ChunkedArray +) -> List[ProtoValue]: + type_map = {ValueType.INT32: "int32_val", + ValueType.INT64: "int64_val", + ValueType.FLOAT: "float_val", + ValueType.DOUBLE: "double_val", + ValueType.STRING: "string_val", + ValueType.BYTES: "bytes_val", + ValueType.BOOL: "bool_val", + ValueType.BOOL_LIST: {"bool_list_val": BoolList}, + ValueType.BYTES_LIST: {"bytes_list_val": BytesList}, + ValueType.STRING_LIST: {"string_list_val": StringList}, + ValueType.FLOAT_LIST: {"float_list_val": FloatList}, + ValueType.DOUBLE_LIST: {"double_list_val": DoubleList}, + ValueType.INT32_LIST: {"int32_list_val": Int32List}, + ValueType.INT64_LIST: {"int64_list_val": Int64List}, } + + value = type_map[feast_value_type] + # Process list types + if type(value) == dict: + list_param_name = list(value.keys())[0] + return [ProtoValue( + **{list_param_name: value[list_param_name](val=x.as_py())}) + for x in column] + else: + return [ProtoValue(**{value: x.as_py()}) for x in column] diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 66cad904b01..9ac7225e80e 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -44,6 +44,7 @@ "pyarrow>=0.15.1", "numpy", "google", + "confluent_kafka" ] # README file from Feast repo root directory diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py index 8996b7543c3..c5a98485fef 100644 --- a/sdk/python/tests/test_client.py +++ b/sdk/python/tests/test_client.py @@ -399,7 +399,7 @@ def test_feature_set_ingest_success(self, dataframe, client, mocker): ) # Need to create a mock producer - with patch("feast.loaders.ingest.KafkaProducer") as mocked_queue: + with patch("feast.client.get_producer") as mocked_queue: # Ingest data into Feast client.ingest("driver-feature-set", dataframe) @@ -429,7 +429,7 @@ def test_feature_set_ingest_fail_if_pending( ) # Need to create a mock producer - with patch("feast.loaders.ingest.KafkaProducer") as mocked_queue: + with patch("feast.client.get_producer") as mocked_queue: # Ingest data into Feast client.ingest("driver-feature-set", dataframe, timeout=1) @@ -493,6 +493,6 @@ def test_feature_set_types_success(self, client, dataframe, mocker): ) # Need to create a mock producer - with patch("feast.loaders.ingest.KafkaProducer") as mocked_queue: + with patch("feast.client.get_producer") as mocked_queue: # Ingest data into Feast client.ingest(all_types_fs, dataframe) From 30c2e0fbfdeb4a4c062aa603de63936c496ee9d4 Mon Sep 17 00:00:00 2001 From: voonhous Date: Sun, 22 Dec 2019 16:11:29 +0800 Subject: [PATCH 10/18] Added support to accept local avro files, GCS avro files and GCS wildcard paths (#375) --- sdk/python/feast/client.py | 201 +++++++++++++++++++++------- sdk/python/feast/job.py | 93 +++++++++++-- sdk/python/feast/loaders/file.py | 216 ++++++++++++++++++++++++++----- tests/e2e/bq-batch-retrieval.py | 96 ++++++++++++++ tests/e2e/conftest.py | 1 + tests/e2e/requirements.txt | 1 + 6 files changed, 518 insertions(+), 90 deletions(-) diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index 78596ee3eba..4e5d63e2d86 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. - +import json import logging import os import time from collections import OrderedDict from typing import Dict, Union from typing import List +from urllib.parse import urlparse +import fastavro import grpc import pandas as pd import pyarrow as pa @@ -38,7 +40,7 @@ from feast.feature_set import FeatureSet, Entity from feast.job import Job from feast.loaders.abstract_producer import get_producer -from feast.loaders.file import export_dataframe_to_staging_location +from feast.loaders.file import export_source_to_staging_location from feast.loaders.ingest import KAFKA_CHUNK_PRODUCTION_TIMEOUT from feast.loaders.ingest import get_feature_row_chunks from feast.serving.ServingService_pb2 import GetFeastServingInfoResponse @@ -322,22 +324,28 @@ def list_entities(self) -> Dict[str, Entity]: return entities_dict def get_batch_features( - self, feature_ids: List[str], entity_rows: pd.DataFrame + self, feature_ids: List[str], entity_rows: Union[pd.DataFrame, str] ) -> Job: """ Retrieves historical features from a Feast Serving deployment. Args: - feature_ids: List of feature ids that will be returned for each - entity. Each feature id should have the following format + feature_ids (List[str]): + List of feature ids that will be returned for each entity. + Each feature id should have the following format "feature_set_name:version:feature_name". - entity_rows: Pandas dataframe containing entities and a 'datetime' - column. Each entity in a feature set must be present as a column - in this dataframe. The datetime column must + + entity_rows (Union[pd.DataFrame, str]): + Pandas dataframe containing entities and a 'datetime' column. + Each entity in a feature set must be present as a column in this + dataframe. The datetime column must contain timestamps in + datetime64 format. Returns: - Returns a job object that can be used to monitor retrieval progress - asynchronously, and can be used to materialize the results + feast.job.Job: + Returns a job object that can be used to monitor retrieval + progress asynchronously, and can be used to materialize the + results. Examples: >>> from feast import Client @@ -360,21 +368,11 @@ def get_batch_features( fs_request = _build_feature_set_request(feature_ids) - # Validate entity rows based on entities in Feast Core - self._validate_entity_rows_for_batch_retrieval(entity_rows, fs_request) - - # Remove timezone from datetime column - if isinstance( - entity_rows["datetime"].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype - ): - entity_rows["datetime"] = pd.DatetimeIndex( - entity_rows["datetime"] - ).tz_localize(None) - # Retrieve serving information to determine store type and # staging location serving_info = self._serving_service_stub.GetFeastServingInfo( - GetFeastServingInfoRequest(), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT + GetFeastServingInfoRequest(), + timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT ) # type: GetFeastServingInfoResponse if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH: @@ -383,17 +381,50 @@ def get_batch_features( f"does not support batch retrieval " ) - # Export and upload entity row dataframe to staging location + if isinstance(entity_rows, pd.DataFrame): + # Pandas DataFrame detected + # Validate entity rows to based on entities in Feast Core + self._validate_dataframe_for_batch_retrieval( + entity_rows=entity_rows, + feature_sets_request=fs_request + ) + + # Remove timezone from datetime column + if isinstance( + entity_rows["datetime"].dtype, + pd.core.dtypes.dtypes.DatetimeTZDtype + ): + entity_rows["datetime"] = pd.DatetimeIndex( + entity_rows["datetime"] + ).tz_localize(None) + elif isinstance(entity_rows, str): + # String based source + if entity_rows.endswith((".avro", "*")): + # Validate Avro entity rows to based on entities in Feast Core + self._validate_avro_for_batch_retrieval( + source=entity_rows, + feature_sets_request=fs_request + ) + else: + raise Exception( + f"Only .avro and wildcard paths are accepted as entity_rows" + ) + else: + raise Exception(f"Only pandas.DataFrame and str types are allowed" + f" as entity_rows, but got {type(entity_rows)}.") + + # Export and upload entity row DataFrame to staging location # provided by Feast - staged_file = export_dataframe_to_staging_location( + staged_files = export_source_to_staging_location( entity_rows, serving_info.job_staging_location - ) # type: str + ) # type: List[str] request = GetBatchFeaturesRequest( feature_sets=fs_request, dataset_source=DatasetSource( file_source=DatasetSource.FileSource( - file_uris=[staged_file], data_format=DataFormat.DATA_FORMAT_AVRO + file_uris=staged_files, + data_format=DataFormat.DATA_FORMAT_AVRO ) ), ) @@ -402,28 +433,107 @@ def get_batch_features( response = self._serving_service_stub.GetBatchFeatures(request) return Job(response.job, self._serving_service_stub) - def _validate_entity_rows_for_batch_retrieval( - self, entity_rows, feature_sets_request + def _validate_dataframe_for_batch_retrieval( + self, entity_rows: pd.DataFrame, feature_sets_request ): """ - Validate whether an entity_row dataframe contains the correct - information for batch retrieval + Validate whether an the entity rows in a DataFrame contains the correct + information for batch retrieval. + + Datetime column must be present in the DataFrame. Args: - entity_rows: Pandas dataframe containing entities and datetime - column. Each entity in a feature set must be present as a - column in this dataframe. - feature_sets_request: Feature sets that will be requested + entity_rows (pd.DataFrame): + Pandas DataFrame containing entities and datetime column. Each + entity in a feature set must be present as a column in this + DataFrame. + + feature_sets_request: + Feature sets that will be requested. """ + self._validate_columns( + columns=entity_rows.columns, + feature_sets_request=feature_sets_request, + datetime_field="datetime" + ) + + def _validate_avro_for_batch_retrieval( + self, source: str, feature_sets_request + ): + """ + Validate whether the entity rows in an Avro source file contains the + correct information for batch retrieval. + + Only gs:// and local files (file://) uri schemes are allowed. + + Avro file must have a column named "event_timestamp". + + No checks will be done if a GCS path is provided. + + Args: + source (str): + File path to Avro. + + feature_sets_request: + Feature sets that will be requested. + """ + p = urlparse(source) + + if p.scheme == "gs": + # GCS path provided (Risk is delegated to user) + # No validation if GCS path is provided + return + elif p.scheme == "file" or not p.scheme: + # Local file (file://) provided + file_path = os.path.abspath(os.path.join(p.netloc, p.path)) + else: + raise Exception(f"Unsupported uri scheme provided {p.scheme}, only " + f"local files (file://), and gs:// schemes are " + f"allowed") + + with open(file_path, "rb") as f: + reader = fastavro.reader(f) + schema = json.loads(reader.metadata["avro.schema"]) + columns = [x["name"] for x in schema["fields"]] + self._validate_columns( + columns=columns, + feature_sets_request=feature_sets_request, + datetime_field="event_timestamp" + ) + + def _validate_columns( + self, columns: List[str], + feature_sets_request, + datetime_field: str + ) -> None: + """ + Check if the required column contains the correct values for batch + retrieval. + + Args: + columns (List[str]): + List of columns to validate against feature_sets_request. + + feature_sets_request (): + Feature sets that will be requested. + + datetime_field (str): + Name of the datetime field that must be enforced and present as + a column in the data source. + + Returns: + None: + None + """ # Ensure datetime column exists - if "datetime" not in entity_rows.columns: + if datetime_field not in columns: raise ValueError( - f'Entity rows does not contain "datetime" column in columns ' - f"{entity_rows.columns}" + f'Entity rows does not contain "{datetime_field}" column in ' + f'columns {columns}' ) - # Validate dataframe columns based on feature set entities + # Validate Avro columns based on feature set entities for feature_set in feature_sets_request: fs = self.get_feature_set( name=feature_set.name, version=feature_set.version @@ -434,10 +544,10 @@ def _validate_entity_rows_for_batch_retrieval( f"could not be found" ) for entity_type in fs.entities: - if entity_type.name not in entity_rows.columns: + if entity_type.name not in columns: raise ValueError( - f'Dataframe does not contain entity "{entity_type.name}"' - f' column in columns "{entity_rows.columns}"' + f'Input does not contain entity' + f' "{entity_type.name}" column in columns "{columns}"' ) def get_online_features( @@ -610,7 +720,9 @@ def ingest( return None -def _build_feature_set_request(feature_ids: List[str]) -> List[FeatureSetRequest]: +def _build_feature_set_request( + feature_ids: List[str] +) -> List[FeatureSetRequest]: """ Builds a list of FeatureSet objects from feature set ids in order to retrieve feature data from Feast Serving @@ -643,7 +755,7 @@ def _read_table_from_source( max_workers: int ) -> str: """ - Infers a data source type (path or Pandas Dataframe) and reads it in as + Infers a data source type (path or Pandas DataFrame) and reads it in as a PyArrow Table. The PyArrow Table that is read will be written to a parquet file with row @@ -688,7 +800,8 @@ def _read_table_from_source( else: table = pq.read_table(file_path) else: - raise ValueError(f"Unknown data source provided for ingestion: {source}") + raise ValueError( + f"Unknown data source provided for ingestion: {source}") # Ensure that PyArrow table is initialised assert isinstance(table, pa.lib.Table) diff --git a/sdk/python/feast/job.py b/sdk/python/feast/job.py index 4273f86ea84..26f6181ee2d 100644 --- a/sdk/python/feast/job.py +++ b/sdk/python/feast/job.py @@ -1,12 +1,11 @@ import tempfile import time from datetime import datetime, timedelta -from typing import List +from typing import Iterable from urllib.parse import urlparse import fastavro import pandas as pd -from fastavro import reader as fastavro_reader from google.cloud import storage from feast.serving.ServingService_pb2 import GetJobRequest @@ -62,15 +61,18 @@ def reload(self): """ self.job_proto = self.serving_stub.GetJob(GetJobRequest(job=self.job_proto)).job - def result(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): + def get_avro_files(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): """ - Wait until job is done to get an iterable rows of result. - The row can only represent an Avro row in Feast 0.3. + Wait until job is done to get the file uri to Avro result files on + Google Cloud Storage. Args: - timeout_sec: max no of seconds to wait until job is done. If "timeout_sec" is exceeded, an exception will be raised. + timeout_sec (int): + Max no of seconds to wait until job is done. If "timeout_sec" + is exceeded, an exception will be raised. - Returns: Iterable of Avro rows + Returns: + str: Google Cloud Storage file uris of the returned Avro files. """ max_wait_datetime = datetime.now() + timedelta(seconds=timeout_sec) wait_duration_sec = 2 @@ -78,11 +80,13 @@ def result(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): while self.status != JOB_STATUS_DONE: if datetime.now() > max_wait_datetime: raise Exception( - "Timeout exceeded while waiting for result. Please retry this method or use a longer timeout value." + "Timeout exceeded while waiting for result. Please retry " + "this method or use a longer timeout value." ) self.reload() time.sleep(wait_duration_sec) + # Backoff the wait duration exponentially up till MAX_WAIT_INTERVAL_SEC wait_duration_sec = min(wait_duration_sec * 2, MAX_WAIT_INTERVAL_SEC) @@ -95,7 +99,22 @@ def result(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): "your Feast Serving deployment." ) - uris = [urlparse(uri) for uri in self.job_proto.file_uris] + return [urlparse(uri) for uri in self.job_proto.file_uris] + + def result(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): + """ + Wait until job is done to get an iterable rows of result. The row can + only represent an Avro row in Feast 0.3. + + Args: + timeout_sec (int): + Max no of seconds to wait until job is done. If "timeout_sec" + is exceeded, an exception will be raised. + + Returns: + Iterable of Avro rows. + """ + uris = self.get_avro_files(timeout_sec) for file_uri in uris: if file_uri.scheme == "gs": file_obj = tempfile.TemporaryFile() @@ -113,16 +132,64 @@ def result(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): for record in avro_reader: yield record - def to_dataframe(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): + def to_dataframe( + self, + timeout_sec: int = DEFAULT_TIMEOUT_SEC + ) -> pd.DataFrame: """ - Wait until job is done to get an interable rows of result + Wait until a job is done to get an iterable rows of result. This method + will split the response into chunked DataFrame of a specified size to + to be yielded to the instance calling it. Args: - timeout_sec: max no of seconds to wait until job is done. If "timeout_sec" is exceeded, an exception will be raised. - Returns: pandas Dataframe of the feature values + max_chunk_size (int): + Maximum number of rows that the DataFrame should contain. + + timeout_sec (int): + Max no of seconds to wait until job is done. If "timeout_sec" + is exceeded, an exception will be raised. + + Returns: + pd.DataFrame: + Pandas DataFrame of the feature values. """ records = [r for r in self.result(timeout_sec=timeout_sec)] return pd.DataFrame.from_records(records) + def to_chunked_dataframe( + self, + max_chunk_size: int = -1, + timeout_sec: int = DEFAULT_TIMEOUT_SEC + ) -> pd.DataFrame: + """ + Wait until a job is done to get an iterable rows of result. This method + will split the response into chunked DataFrame of a specified size to + to be yielded to the instance calling it. + + Args: + max_chunk_size (int): + Maximum number of rows that the DataFrame should contain. + + timeout_sec (int): + Max no of seconds to wait until job is done. If "timeout_sec" + is exceeded, an exception will be raised. + + Returns: + pd.DataFrame: + Pandas DataFrame of the feature values. + """ + # Max chunk size defined by user + records = [] + for result in self.result(timeout_sec=timeout_sec): + result.append(records) + if len(records) == max_chunk_size: + df = pd.DataFrame.from_records(records) + records.clear() # Empty records array + yield df + + # Handle for last chunk that is < max_chunk_size + if not records: + yield pd.DataFrame.from_records(records) + def __iter__(self): return iter(self.result()) diff --git a/sdk/python/feast/loaders/file.py b/sdk/python/feast/loaders/file.py index 8dd6b503a74..108f2790dd8 100644 --- a/sdk/python/feast/loaders/file.py +++ b/sdk/python/feast/loaders/file.py @@ -1,70 +1,159 @@ +# Copyright 2019 The Feast Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re import shutil import tempfile -from typing import Optional -from urllib.parse import urlparse import uuid -import pandas as pd from datetime import datetime +from typing import List, Optional, Tuple, Union +from urllib.parse import urlparse, ParseResult + +import pandas as pd from google.cloud import storage from pandavro import to_avro -def export_dataframe_to_staging_location( - df: pd.DataFrame, staging_location_uri: str -) -> str: +def export_source_to_staging_location( + source: Union[pd.DataFrame, str], staging_location_uri: str +) -> List[str]: """ - Uploads a dataframe to a remote staging location + Uploads a DataFrame as an Avro file to a remote staging location. + + The local staging location specified in this function is used for E2E + tests, please do not use it. Args: - df: Pandas dataframe - staging_location_uri: Remote staging location where dataframe should be written + source (Union[pd.DataFrame, str]: + Source of data to be staged. Can be a pandas DataFrame or a file + path. + + Only three types of source are allowed: + * Pandas DataFrame + * Local Avro file + * GCS Avro file + + + staging_location_uri (str): + Remote staging location where DataFrame should be written. Examples: - gs://bucket/path/ - file:///data/subfolder/ + * gs://bucket/path/ + * file:///data/subfolder/ Returns: - Returns the full path to the file in the remote staging location + List[str]: + Returns a list containing the full path to the file(s) in the + remote staging location. """ - # Validate staging location uri = urlparse(staging_location_uri) + + # Prepare Avro file to be exported to staging location + if isinstance(source, pd.DataFrame): + # DataFrame provided as a source + if uri.scheme == "file": + uri_path = uri.path + else: + uri_path = None + + # Remote gs staging location provided by serving + dir_path, file_name, source_path = export_dataframe_to_local( + source, + uri_path + ) + elif urlparse(source).scheme in ["", "file"]: + # Local file provided as a source + dir_path = None + file_name = os.path.basename(source) + source_path = os.path.abspath(os.path.join( + urlparse(source).netloc, urlparse(source).path)) + elif urlparse(source).scheme == "gs": + # Google Cloud Storage path provided + input_source_uri = urlparse(source) + if "*" in source: + # Wildcard path + return _get_files( + bucket=input_source_uri.hostname, + uri=input_source_uri + ) + else: + return [source] + else: + raise Exception(f"Only string and DataFrame types are allowed as a " + f"source, {type(source)} was provided.") + + # Push data to required staging location if uri.scheme == "gs": - dir_path, file_name, source_path = export_dataframe_to_local(df) + # Staging location is a Google Cloud Storage path upload_file_to_gcs( - source_path, uri.hostname, str(uri.path).strip("/") + "/" + file_name + source_path, + uri.hostname, + str(uri.path).strip("/") + "/" + file_name ) - if len(str(dir_path)) < 5: - raise Exception(f"Export location {dir_path} dangerous. Stopping.") - shutil.rmtree(dir_path) elif uri.scheme == "file": - dir_path, file_name, source_path = export_dataframe_to_local(df, uri.path) + # Staging location is a file path + # Used for end-to-end test + pass else: raise Exception( - f"Staging location {staging_location_uri} does not have a valid URI. Only gs:// and file:// are supported" + f"Staging location {staging_location_uri} does not have a " + f"valid URI. Only gs:// and file:// uri scheme are supported." ) - return staging_location_uri.rstrip("/") + "/" + file_name + # Clean up, remove local staging file + if isinstance(source, pd.DataFrame) and len(str(dir_path)) > 4: + shutil.rmtree(dir_path) + + return [staging_location_uri.rstrip("/") + "/" + file_name] -def export_dataframe_to_local(df: pd.DataFrame, dir_path: Optional[str] = None): +def export_dataframe_to_local( + df: pd.DataFrame, + dir_path: Optional[str] = None +) -> Tuple[str, str, str]: """ - Exports a pandas dataframe to the local filesystem + Exports a pandas DataFrame to the local filesystem. Args: - df: Pandas dataframe to save - dir_path: (optional) Absolute directory path '/data/project/subfolder/' + df (pd.DataFrame): + Pandas DataFrame to save. + + dir_path (Optional[str]): + Absolute directory path '/data/project/subfolder/'. + + Returns: + Tuple[str, str, str]: + Tuple of directory path, file name and destination path. The + destination path can be obtained by concatenating the directory + path and file name. """ # Create local staging location if not provided if dir_path is None: dir_path = tempfile.mkdtemp() - file_name = f'{datetime.now().strftime("%d-%m-%Y_%I-%M-%S_%p")}_{str(uuid.uuid4())[:8]}.avro' + file_name = _get_file_name() dest_path = f"{dir_path}/{file_name}" # Temporarily rename datetime column to event_timestamp. Ideally we would # force the schema with our avro writer instead. - df.columns = ["event_timestamp" if col == "datetime" else col for col in df.columns] + df.columns = [ + "event_timestamp" + if col == "datetime" else col + for col in df.columns + ] try: # Export dataset to file in local path @@ -74,23 +163,84 @@ def export_dataframe_to_local(df: pd.DataFrame, dir_path: Optional[str] = None): finally: # Revert event_timestamp column to datetime df.columns = [ - "datetime" if col == "event_timestamp" else col for col in df.columns + "datetime" + if col == "event_timestamp" else col + for col in df.columns ] return dir_path, file_name, dest_path -def upload_file_to_gcs(local_path: str, bucket: str, remote_path: str): +def upload_file_to_gcs(local_path: str, bucket: str, remote_path: str) -> None: """ - Upload a file from the local file system to Google Cloud Storage (GCS) + Upload a file from the local file system to Google Cloud Storage (GCS). Args: - local_path: Local filesystem path of file to upload - bucket: GCS bucket to upload to - remote_path: Path within GCS bucket to upload file to, includes file name + local_path (str): + Local filesystem path of file to upload. + + bucket (str): + GCS bucket destination to upload to. + + remote_path (str): + Path within GCS bucket to upload file to, includes file name. + + Returns: + None: + None """ storage_client = storage.Client(project=None) bucket = storage_client.get_bucket(bucket) blob = bucket.blob(remote_path) blob.upload_from_filename(local_path) + + +def _get_files(bucket: str, uri: ParseResult) -> List[str]: + """ + List all available files within a Google storage bucket that matches a wild + card path. + + Args: + bucket (str): + Google Storage bucket to reference. + + uri (urllib.parse.ParseResult): + Wild card uri path containing the "*" character. + Example: + * gs://feast/staging_location/* + * gs://feast/staging_location/file_*.avro + + Returns: + List[str]: + List of all available files matching the wildcard path. + """ + + storage_client = storage.Client(project=None) + bucket = storage_client.get_bucket(bucket) + path = uri.path + + if "*" in path: + regex = re.compile(path.replace("*", ".*?").strip("/")) + blob_list = bucket.list_blobs( + prefix=path.strip("/").split("*")[0], + delimiter="/" + ) + # File path should not be in path (file path must be longer than path) + return [f"{uri.scheme}://{uri.hostname}/{file}" + for file in [x.name for x in blob_list] + if re.match(regex, file) and file not in path] + else: + raise Exception(f"{path} is not a wildcard path") + + +def _get_file_name() -> str: + """ + Create a random file name. + + Returns: + str: + Randomised file name. + """ + + return f'{datetime.now().strftime("%d-%m-%Y_%I-%M-%S_%p")}_{str(uuid.uuid4())[:8]}.avro' diff --git a/tests/e2e/bq-batch-retrieval.py b/tests/e2e/bq-batch-retrieval.py index 639ca9f5595..3458eb4740b 100644 --- a/tests/e2e/bq-batch-retrieval.py +++ b/tests/e2e/bq-batch-retrieval.py @@ -2,6 +2,7 @@ import time from datetime import datetime from datetime import timedelta +from urllib.parse import urlparse import numpy as np import pandas as pd @@ -12,7 +13,9 @@ from feast.feature import Feature from feast.feature_set import FeatureSet from feast.type_map import ValueType +from google.cloud import storage from google.protobuf.duration_pb2 import Duration +from pandavro import to_avro @pytest.fixture(scope="module") @@ -30,6 +33,11 @@ def allow_dirty(pytestconfig): return True if pytestconfig.getoption("allow_dirty").lower() == "true" else False +@pytest.fixture(scope="module") +def gcs_path(pytestconfig): + return pytestconfig.getoption("gcs_path") + + @pytest.fixture(scope="module") def client(core_url, serving_url, allow_dirty): # Get client for core and serving @@ -44,6 +52,94 @@ def client(core_url, serving_url, allow_dirty): return client +def test_get_batch_features_with_file(client): + file_fs1 = FeatureSet( + "file_feature_set", + features=[Feature("feature_value", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + + client.apply(file_fs1) + file_fs1 = client.get_feature_set(name="file_feature_set", version=1) + + N_ROWS = 10 + time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) + features_1_df = pd.DataFrame( + { + "datetime": [time_offset] * N_ROWS, + "entity_id": [i for i in range(N_ROWS)], + "feature_value": [f"{i}" for i in range(N_ROWS)], + } + ) + client.ingest(file_fs1, features_1_df) + + # Rename column (datetime -> event_timestamp) + features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"}) + + to_avro(df=features_1_df, file_path_or_buffer="file_feature_set.avro") + + feature_retrieval_job = client.get_batch_features( + entity_rows="file://file_feature_set.avro", feature_ids=["file_feature_set:1:feature_value"] + ) + + output = feature_retrieval_job.to_dataframe() + print(output.head()) + + assert output["entity_id"].to_list() == [int(i) for i in output["file_feature_set_v1_feature_value"].to_list()] + + +def test_get_batch_features_with_gs_path(client, gcs_path): + gcs_fs1 = FeatureSet( + "gcs_feature_set", + features=[Feature("feature_value", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + + client.apply(gcs_fs1) + gcs_fs1 = client.get_feature_set(name="gcs_feature_set", version=1) + + N_ROWS = 10 + time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) + features_1_df = pd.DataFrame( + { + "datetime": [time_offset] * N_ROWS, + "entity_id": [i for i in range(N_ROWS)], + "feature_value": [f"{i}" for i in range(N_ROWS)], + } + ) + client.ingest(gcs_fs1, features_1_df) + + # Rename column (datetime -> event_timestamp) + features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"}) + + # Output file to local + file_name = "gcs_feature_set.avro" + to_avro(df=features_1_df, file_path_or_buffer=file_name) + + uri = urlparse(gcs_path) + bucket = uri.hostname + ts = int(time.time()) + remote_path = str(uri.path).strip("/") + f"{ts}/{file_name}" + + # Upload file to gcs + storage_client = storage.Client(project=None) + bucket = storage_client.get_bucket(bucket) + blob = bucket.blob(remote_path) + blob.upload_from_filename(file_name) + + feature_retrieval_job = client.get_batch_features( + entity_rows=f"{gcs_path}{ts}/*", + feature_ids=["gcs_feature_set:1:feature_value"] + ) + + output = feature_retrieval_job.to_dataframe() + print(output.head()) + + assert output["entity_id"].to_list() == [int(i) for i in output["gcs_feature_set_v1_feature_value"].to_list()] + + def test_order_by_creation_time(client): proc_time_fs = FeatureSet( "processing_time", diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index b37770a83f9..8ea472b6620 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -2,3 +2,4 @@ def pytest_addoption(parser): parser.addoption("--core_url", action="store", default="localhost:6565") parser.addoption("--serving_url", action="store", default="localhost:6566") parser.addoption("--allow_dirty", action="store", default="False") + parser.addoption("--gcs_path", action="store", default="gs://feast-templocation-kf-feast/") diff --git a/tests/e2e/requirements.txt b/tests/e2e/requirements.txt index 6b999421c04..0ba345a000f 100644 --- a/tests/e2e/requirements.txt +++ b/tests/e2e/requirements.txt @@ -1,6 +1,7 @@ mock==2.0.0 numpy==1.16.4 pandas==0.24.2 +pandavro==1.5.* pytest==5.2.1 pytest-benchmark==3.2.2 pytest-mock==1.10.4 From 84280d956791b510e0ba0ebaeeb35faab582c295 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Sun, 22 Dec 2019 18:19:12 +0800 Subject: [PATCH 11/18] Add readiness checks for Feast services in end to end test (#337) * Add readiness checks for Kafka, Feast Core and Feast Serving in test-end-to-end So that any error is discovered early and the last log message is more representative of the actual errors * Use kafkacat to check Kafka connection Netcat cannot check it reliably * Reduce timeout duration for job to 1h To free up resources more quickly and since we our tests do not require more than 1 hour to complete * Increase resource request for tests - Values are estimated based on previous actual runs of the tests * Increase wait time for Kafka to start * Increase default Kafka request timeout value to 15s from 1s Value of 1s seems to cause end-to-end test to keep failing due to timeout * Increase cpu request and set timeout for netcat * Use revision=dev for end-to-end-test * Correct the target jar filename --- .prow/config.yaml | 10 ++++----- .prow/scripts/test-end-to-end-batch.sh | 4 ++-- .prow/scripts/test-end-to-end.sh | 21 ++++++++++++++----- .../core/config/FeatureStreamConfig.java | 5 ++++- 4 files changed, 27 insertions(+), 13 deletions(-) diff --git a/.prow/config.yaml b/.prow/config.yaml index 4b6e352a12f..940e2562d72 100644 --- a/.prow/config.yaml +++ b/.prow/config.yaml @@ -70,8 +70,8 @@ presubmits: command: [".prow/scripts/test-core-ingestion.sh"] resources: requests: - cpu: "1000m" - memory: "512Mi" + cpu: "1500m" + memory: "1536Mi" limit: memory: "4096Mi" @@ -116,10 +116,10 @@ presubmits: command: [".prow/scripts/test-end-to-end.sh"] resources: requests: - cpu: "1000m" - memory: "1024Mi" - limit: + cpu: "3000m" memory: "4096Mi" + limit: + memory: "6144Mi" - name: test-end-to-end-batch decorate: true diff --git a/.prow/scripts/test-end-to-end-batch.sh b/.prow/scripts/test-end-to-end-batch.sh index f25c0720edc..4ae7ae1e54b 100755 --- a/.prow/scripts/test-end-to-end-batch.sh +++ b/.prow/scripts/test-end-to-end-batch.sh @@ -73,10 +73,10 @@ Installing Kafka at localhost:9092 wget -qO- https://www-eu.apache.org/dist/kafka/2.3.0/kafka_2.12-2.3.0.tgz | tar xz mv kafka_2.12-2.3.0/ /tmp/kafka nohup /tmp/kafka/bin/zookeeper-server-start.sh /tmp/kafka/config/zookeeper.properties &> /var/log/zookeeper.log 2>&1 & -sleep 5 +sleep 10 tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & -sleep 20 +sleep 30 tail -n10 /var/log/kafka.log echo " diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index f6ebd8c6eef..0206472aae2 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -3,6 +3,8 @@ set -e set -o pipefail +export REVISION=dev + if ! cat /etc/*release | grep -q stretch; then echo ${BASH_SOURCE} only supports Debian stretch. echo Please change your operating system to use this script. @@ -19,15 +21,18 @@ This script will run end-to-end tests for Feast Core and Online Serving. tests/e2e via pytest. " +apt-get -qq update +apt-get -y install wget netcat kafkacat + echo " ============================================================ Installing Redis at localhost:6379 ============================================================ " -apt-get -qq update + # Allow starting serving in this Maven Docker image. Default set to not allowed. echo "exit 0" > /usr/sbin/policy-rc.d -apt-get -y install redis-server wget > /var/log/redis.install.log +apt-get -y install redis-server > /var/log/redis.install.log redis-server --daemonize yes redis-cli ping @@ -61,6 +66,7 @@ tail -n10 /var/log/zookeeper.log nohup /tmp/kafka/bin/kafka-server-start.sh /tmp/kafka/config/server.properties &> /var/log/kafka.log 2>&1 & sleep 20 tail -n10 /var/log/kafka.log +kafkacat -b localhost:9092 -L echo " ============================================================ @@ -73,7 +79,10 @@ Building jars for Feast --output-dir /root/ # Build jars for Feast -mvn --quiet --batch-mode --define skipTests=true clean package +mvn --quiet --batch-mode --define skipTests=true --define revision=$REVISION clean package + +ls -lh core/target/*jar +ls -lh serving/target/*jar echo " ============================================================ @@ -123,11 +132,12 @@ management: enabled: false EOF -nohup java -jar core/target/feast-core-0.3.2-SNAPSHOT.jar \ +nohup java -jar core/target/feast-core-$REVISION.jar \ --spring.config.location=file:///tmp/core.application.yml \ &> /var/log/feast-core.log & sleep 35 tail -n10 /var/log/feast-core.log +nc -w2 localhost 6565 < /dev/null echo " ============================================================ @@ -174,11 +184,12 @@ spring: web-environment: false EOF -nohup java -jar serving/target/feast-serving-0.3.2-SNAPSHOT.jar \ +nohup java -jar serving/target/feast-serving-$REVISION.jar \ --spring.config.location=file:///tmp/serving.online.application.yml \ &> /var/log/feast-serving-online.log & sleep 15 tail -n10 /var/log/feast-serving-online.log +nc -w2 localhost 6566 < /dev/null echo " ============================================================ diff --git a/core/src/main/java/feast/core/config/FeatureStreamConfig.java b/core/src/main/java/feast/core/config/FeatureStreamConfig.java index ca8240d7805..1671a08f28d 100644 --- a/core/src/main/java/feast/core/config/FeatureStreamConfig.java +++ b/core/src/main/java/feast/core/config/FeatureStreamConfig.java @@ -39,6 +39,8 @@ @Configuration public class FeatureStreamConfig { + String DEFAULT_KAFKA_REQUEST_TIMEOUT_MS_CONFIG = "15000"; + @Autowired @Bean public Source getDefaultSource(FeastProperties feastProperties) { @@ -50,7 +52,8 @@ public Source getDefaultSource(FeastProperties feastProperties) { String topicName = streamProperties.getOptions().get("topic"); Map map = new HashMap<>(); map.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); - map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, "1000"); + map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, + DEFAULT_KAFKA_REQUEST_TIMEOUT_MS_CONFIG); AdminClient client = AdminClient.create(map); NewTopic newTopic = From 01c4b4427544f9d90ab97995d573b900dfb54796 Mon Sep 17 00:00:00 2001 From: voonhous Date: Mon, 23 Dec 2019 09:27:57 +0800 Subject: [PATCH 12/18] Added a few minor changes: (#383) - Use tmp dir during ingest - Tweak int to ceil for row_group_size - Changed imap_unordered to imap so that results are ordered --- sdk/python/feast/client.py | 29 +++++++++++++++++------------ sdk/python/feast/loaders/ingest.py | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index 4e5d63e2d86..719022ea7a3 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -15,10 +15,12 @@ import json import logging import os +import shutil +import tempfile import time from collections import OrderedDict -from typing import Dict, Union -from typing import List +from math import ceil +from typing import Dict, List, Tuple, Union from urllib.parse import urlparse import fastavro @@ -642,11 +644,11 @@ def ingest( raise Exception(f"Feature set name must be provided") # Read table and get row count - tmp_table_name = _read_table_from_source( + dir_path, dest_path = _read_table_from_source( source, chunk_size, max_workers ) - pq_file = pq.ParquetFile(tmp_table_name) + pq_file = pq.ParquetFile(dest_path) row_count = pq_file.metadata.num_rows @@ -688,7 +690,7 @@ def ingest( # Transform and push data to Kafka if feature_set.source.source_type == "Kafka": for chunk in get_feature_row_chunks( - file=tmp_table_name, + file=dest_path, row_groups=list(range(pq_file.num_row_groups)), fs=feature_set, max_workers=max_workers): @@ -715,7 +717,7 @@ def ingest( finally: # Remove parquet file(s) that were created earlier print("Removing temporary file(s)...") - os.remove(tmp_table_name) + shutil.rmtree(dir_path) return None @@ -753,7 +755,7 @@ def _read_table_from_source( source: Union[pd.DataFrame, str], chunk_size: int, max_workers: int -) -> str: +) -> Tuple[str, str]: """ Infers a data source type (path or Pandas DataFrame) and reads it in as a PyArrow Table. @@ -777,7 +779,9 @@ def _read_table_from_source( Amount of rows to load and ingest at a time. Returns: - str: Path to parquet file that was created. + Tuple[str, str]: + Tuple containing parent directory path and destination path to + parquet file. """ # Pandas DataFrame detected @@ -807,12 +811,13 @@ def _read_table_from_source( assert isinstance(table, pa.lib.Table) # Write table as parquet file with a specified row_group_size + dir_path = tempfile.mkdtemp() tmp_table_name = f"{int(time.time())}.parquet" - row_group_size = min(int(table.num_rows/max_workers), chunk_size) - pq.write_table(table=table, where=tmp_table_name, - row_group_size=row_group_size) + dest_path = f"{dir_path}/{tmp_table_name}" + row_group_size = min(ceil(table.num_rows/max_workers), chunk_size) + pq.write_table(table=table, where=dest_path, row_group_size=row_group_size) # Remove table from memory del table - return tmp_table_name + return dir_path, dest_path diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py index 527ab481fe0..cbe80086e67 100644 --- a/sdk/python/feast/loaders/ingest.py +++ b/sdk/python/feast/loaders/ingest.py @@ -127,7 +127,7 @@ def get_feature_row_chunks( pool = Pool(max_workers) func = partial(_encode_pa_tables, file, fs) - for chunk in pool.imap_unordered(func, row_groups): + for chunk in pool.imap(func, row_groups): yield chunk return From f669e51030825fa55a6ef436e8c73868ad4682dc Mon Sep 17 00:00:00 2001 From: Willem Pienaar Date: Mon, 23 Dec 2019 05:07:57 +0000 Subject: [PATCH 13/18] GitBook: [master] 2 pages modified --- docs/README.md | 20 --- docs/getting-started/installing-feast.md | 160 ++++++++++++----------- 2 files changed, 85 insertions(+), 95 deletions(-) diff --git a/docs/README.md b/docs/README.md index 76790929af1..62d152ca1e0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -17,23 +17,3 @@ Feast aims to: This means that new ML projects start with a process of feature selection from a catalog instead of having to do feature engineering from scratch. -```python -# Setting things up -fs = feast.Client('feast.example.com') -customer_ids = ['1001', '1002', '1003'] -customer_features = ['CreditScore', 'Balance', 'Age', 'NumOfProducts', 'IsActive'] -from_date = '2019-01-01' -to_date = '2019-12-31' - -# Training your model (typically from a notebook or pipeline) -data = fs.get_batch_features(customer_features, customer_ids, from_date, to_date) -my_model = ml.fit(data.to_train(), data.to_train()) - -# Serving predictions (when serving the model in production) -prediction = my_model.predict(fs.get_online_features(customer_features, customer_ids)) -``` - -The code above is for illustrative purposes. Please see our getting started guide for more realistic examples. - -For more reasons to use Feast, please see [Why Feast?](why-feast.md#why-feast) - diff --git a/docs/getting-started/installing-feast.md b/docs/getting-started/installing-feast.md index a88c3e547e9..0b8b42f26f6 100644 --- a/docs/getting-started/installing-feast.md +++ b/docs/getting-started/installing-feast.md @@ -8,44 +8,102 @@ This installation guide will demonstrate three ways of installing Feast: * [**Minikube**](installing-feast.md#minikube)**:** This installation has no external dependencies, but does not have a historical feature store installed. It allows users to quickly get a feel for Feast. * [**Google Kubernetes Engine:**](installing-feast.md#google-kubernetes-engine) This guide installs a single cluster Feast installation on Google's GKE. It has Google Cloud specific dependencies like BigQuery, Dataflow, and Google Cloud Storage. -## Docker Compose +## Docker Compose \(Quickstart\) ### Overview -A docker compose file is provided to quickly test Feast with the official docker images. There is no hard dependency on GCP, unless batch serving is required. +A docker compose file is provided to quickly test Feast with the official docker images. There is no hard dependency on GCP, unless batch serving is required. Once you have set up Feast using Docker Compose, you will be able to: -* Define and register feature set -* Feature ingestion +* Create, register, and manage feature sets +* Ingest feature data into Feast * Retrieve features for online serving -* Updating the feature set -The docker compose setup uses Direct Runner for the Apache Beam jobs. +{% hint style="info" %} +The docker compose setup uses Direct Runner for the Apache Beam jobs. Running Beam with the Direct Runner means it does not need a dedicated runner like Flink or Dataflow, but this comes at the cost of performance. We recommend the use of a full runner when running Feast with very large workloads. +{% endhint %} ### 0. Requirements -1. [Docker compose](https://docs.docker.com/compose/install/) should be installed. -2. TCP ports 6565, 6566, 8888, and 9094 are not in use. Otherwise, modify the port mappings in `infra/docker-compose/docker-compose.yml` to use unoccupied ports. -3. \(optional\) For batch serving you will also need a [GCP service account key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) that has access to GCS and BigQuery. Port 6567 will be used for the batch serving endpoint. +* [Docker compose](https://docs.docker.com/compose/install/) should be installed. +* TCP ports 6565, 6566, 8888, and 9094 should not be in use. Otherwise, modify the port mappings in `infra/docker-compose/docker-compose.yml` to use unoccupied ports. +* \(for batch serving only\) For batch serving you will also need a [GCP service account key](https://cloud.google.com/iam/docs/creating-managing-service-account-keys) that has access to GCS and BigQuery. Port 6567 will be used for the batch serving endpoint. +* \(for batch serving only\) [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. + +### 1. Step-by-step guide \(Online serving only\) + +Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `docker-compose` sub-directory: + +```bash +git clone https://github.com/gojek/feast.git && \ +cd feast && export FEAST_HOME_DIR=$(pwd) && \ +cd infra/docker-compose +``` + +Make a copy of the `.env.sample` file: + +```bash +cp .env.sample .env +``` + +Start Feast: + +```javascript +docker-compose up -d +``` + +A Jupyter notebook is now available to use Feast: + +[http://localhost:8888/notebooks/feast-notebooks/feast-quickstart.ipynb](http://localhost:8888/notebooks/feast-notebooks/feast-quickstart.ipynb) + +### 2. Step-by-step guide \(Batch and online serving\) + +Clone the [Feast repository](https://github.com/gojek/feast/) and navigate to the `docker-compose` sub-directory: + +```bash +git clone https://github.com/gojek/feast.git && \ +cd feast && export FEAST_HOME_DIR=$(pwd) && \ +cd infra/docker-compose +``` + +Create a [service account ](https://cloud.google.com/iam/docs/creating-managing-service-accounts)from the GCP console and copy it to the `gcp-service-accounts` folder: + +```javascript +cp my-service-account.json ${FEAST_HOME_DIR}/infra/docker-compose/gcp-service-accounts +``` + +Create a Google Cloud Storage bucket. Make sure that your service account above has read/write permissions to this bucket: + +```bash +gsutil mb gs://my-feast-staging-bucket +``` + +Make a copy of the `.env.sample` file: -### 1. Step-by-step guide \(Online serving\) +```bash +cp .env.sample .env +``` -1. Navigate to `infra/docker-compose`. -2. Copy `.env.sample` to `.env`. -3. `docker-compose up -d` -4. A Jupyter Notebook server should be accessible via `localhost:8888` -5. Please wait a minute or two for the Feast services to be ready before running the notebook. You will know that the services are ready when port `6565` and `6566` starts listening. +Customize the `.env` file based on your environment. At the very least you have to modify: + +* **FEAST\_CORE\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. +* **FEAST\_BATCH\_SERVING\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. +* **FEAST\_JUPYTER\_GCP\_SERVICE\_ACCOUNT\_KEY:** This should be your service account file name without the .json extension. +* **FEAST\_JOB\_STAGING\_LOCATION:** Google Cloud Storage bucket that Feast will use to stage data exports and batch retrieval requests. + +We will also need to customize the `bq-store.yml` file inside `infra/docker-compose/serving/` to configure the BigQuery storage configuration as well as the feature sets that the store subscribes to. At a minimum you will need to set: + +* **project\_id:** This is you GCP project id. +* **dataset\_id:** This is the name of the BigQuery dataset that tables will be created in. Each feature set will have one table in BigQuery. + +Start Feast: + +```javascript +docker-compose -f docker-compose.yml -f docker-compose.batch.yml up -d +``` -### 2. Step-by-step guide \(Batch serving\) +A Jupyter notebook is now available to use Feast: -1. Navigate to `infra/docker-compose`. -2. Copy `.env.sample` to `.env`. -3. Copy your GCP account service key\(s\) to `infra/docker-compose/gcp-service-accounts`. -4. Modify the value of `FEAST__GCP_SERVICE_ACCOUNT_KEY` in your `.env` file. It should be the json file name without extension. -5. Modify the value of `infra/docker-compose/serving/bq-store.yml`. Alternatively, you can also point to a different store configuration file by modifying `FEAST_BATCH_STORE_CONFIG` in your `.env` file. -6. `docker-compose -f docker-compose.yml -f docker-compose.batch.yml up -d` -7. A jupyter notebook server should be accessible via `localhost:8888` -8. Please wait a minute or two for the Feast services to be ready before running the notebook. You will know that the services are ready when port `6565` and `6567` starts listening. -9. When you are done, run `docker-compose -f docker-compose.yml -f docker-compose.batch.yml down` to shutdown the services. +[http://localhost:8888/notebooks/feast-notebooks](http://localhost:8888/tree/feast-notebooks) ## Minikube @@ -67,7 +125,7 @@ The following software should be installed prior to starting: 1. [Minikube](https://kubernetes.io/docs/tasks/tools/install-minikube/) should be installed. 2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed and configured to work with Minikube. -3. [Helm](https://helm.sh/3) \(2.16.0 or greater\). +3. [Helm](https://helm.sh/3) \(2.16.0 or greater\). Helm 3 has not been tested yet. ### 1. Set up Minikube @@ -154,30 +212,6 @@ feast config set core_url ${FEAST_CORE_URL} feast config set serving_url ${FEAST_SERVING_URL} ``` -Make sure that both Feast Core and Feast Serving are connected: - -```bash -feast version -``` - -```javascript -{ - "sdk": { - "version": "feast 0.3.2" - }, - "core": { - "url": "192.168.99.100:32090", - "version": "0.3", - "status": "connected" - }, - "serving": { - "url": "192.168.99.100:32091", - "version": "0.3", - "status": "connected" - } -} -``` - That's it! You can now start to use Feast! ## Google Kubernetes Engine @@ -203,7 +237,7 @@ This guide requires [Google Cloud Platform](https://cloud.google.com/) for insta 1. [Google Cloud SDK ](https://cloud.google.com/sdk/install)installed, authenticated, and configured to the project you will use. 2. [Kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) installed. -3. [Helm](https://helm.sh/3) \(2.16.0 or greater\) installed on your local machine with Tiller installed in your cluster. +3. [Helm](https://helm.sh/3) \(2.16.0 or greater\) installed on your local machine with Tiller installed in your cluster. Helm 3 has not been tested yet. ### 1. Set up GCP @@ -386,29 +420,5 @@ feast config set core_url ${FEAST_CORE_URL} feast config set serving_url ${FEAST_ONLINE_SERVING_URL} ``` -Make sure that both Feast Core and Feast Serving are connected: - -```bash -feast version -``` - -```javascript -{ - "sdk": { - "version": "feast 0.3.2" - }, - "core": { - "url": "192.168.99.100:32090", - "version": "0.3", - "status": "connected" - }, - "serving": { - "url": "192.168.99.100:32091", - "version": "0.3", - "status": "connected" - } -} -``` - That's it! You can now start to use Feast! From 5fc779cd5db6fd6570b6f793c6fca54b2501705e Mon Sep 17 00:00:00 2001 From: Khor Shu Heng <32997938+khorshuheng@users.noreply.github.com> Date: Mon, 23 Dec 2019 17:05:57 +0800 Subject: [PATCH 14/18] Remove alpha v1 from java package name (#387) --- .../main/java/com/gojek/feast/{v1alpha1 => }/FeastClient.java | 2 +- .../main/java/com/gojek/feast/{v1alpha1 => }/RequestUtil.java | 2 +- sdk/java/src/main/java/com/gojek/feast/{v1alpha1 => }/Row.java | 2 +- .../java/com/gojek/feast/{v1alpha1 => }/RequestUtilTest.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename sdk/java/src/main/java/com/gojek/feast/{v1alpha1 => }/FeastClient.java (99%) rename sdk/java/src/main/java/com/gojek/feast/{v1alpha1 => }/RequestUtil.java (98%) rename sdk/java/src/main/java/com/gojek/feast/{v1alpha1 => }/Row.java (99%) rename sdk/java/src/test/java/com/gojek/feast/{v1alpha1 => }/RequestUtilTest.java (99%) diff --git a/sdk/java/src/main/java/com/gojek/feast/v1alpha1/FeastClient.java b/sdk/java/src/main/java/com/gojek/feast/FeastClient.java similarity index 99% rename from sdk/java/src/main/java/com/gojek/feast/v1alpha1/FeastClient.java rename to sdk/java/src/main/java/com/gojek/feast/FeastClient.java index b7a3e78ab13..91ddd2a442c 100644 --- a/sdk/java/src/main/java/com/gojek/feast/v1alpha1/FeastClient.java +++ b/sdk/java/src/main/java/com/gojek/feast/FeastClient.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast.v1alpha1; +package com.gojek.feast; import feast.serving.ServingAPIProto.FeatureSetRequest; import feast.serving.ServingAPIProto.GetFeastServingInfoRequest; diff --git a/sdk/java/src/main/java/com/gojek/feast/v1alpha1/RequestUtil.java b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java similarity index 98% rename from sdk/java/src/main/java/com/gojek/feast/v1alpha1/RequestUtil.java rename to sdk/java/src/main/java/com/gojek/feast/RequestUtil.java index 72fbe289f2f..e80b40bad9c 100644 --- a/sdk/java/src/main/java/com/gojek/feast/v1alpha1/RequestUtil.java +++ b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast.v1alpha1; +package com.gojek.feast; import feast.serving.ServingAPIProto.FeatureSetRequest; import java.util.ArrayList; diff --git a/sdk/java/src/main/java/com/gojek/feast/v1alpha1/Row.java b/sdk/java/src/main/java/com/gojek/feast/Row.java similarity index 99% rename from sdk/java/src/main/java/com/gojek/feast/v1alpha1/Row.java rename to sdk/java/src/main/java/com/gojek/feast/Row.java index 77f9f298873..9366fe1bb03 100644 --- a/sdk/java/src/main/java/com/gojek/feast/v1alpha1/Row.java +++ b/sdk/java/src/main/java/com/gojek/feast/Row.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast.v1alpha1; +package com.gojek.feast; import com.google.protobuf.ByteString; import com.google.protobuf.Timestamp; diff --git a/sdk/java/src/test/java/com/gojek/feast/v1alpha1/RequestUtilTest.java b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java similarity index 99% rename from sdk/java/src/test/java/com/gojek/feast/v1alpha1/RequestUtilTest.java rename to sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java index 5f87ba01535..21c8bde15ec 100644 --- a/sdk/java/src/test/java/com/gojek/feast/v1alpha1/RequestUtilTest.java +++ b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.gojek.feast.v1alpha1; +package com.gojek.feast; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; From 51cbdc784cea34ebee4b0bc7014263701e1ad682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=B5=E6=B3=B0=E7=91=8B=28Chang=20Tai=20Wei=29?= Date: Tue, 24 Dec 2019 12:28:32 +0800 Subject: [PATCH 15/18] (concepts): change data types to upper case because lower case would fail when you want to apply this FeatureSet (#389) --- docs/concepts.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/concepts.md b/docs/concepts.md index 5d15e28ba1f..860515c3699 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -99,20 +99,20 @@ Entities in Feast are defined within Feature Sets and are not treated as standal Feast supports the following types for feature values -* Bytes -* String -* Int32 -* Int64 -* Double -* Float -* Bool -* Bytes List -* String List -* Int32 List -* Int64 List -* Double List -* Float List -* Bool List +* BYTES +* STRING +* INT32 +* INT64 +* DOUBLE +* FLOAT +* BOOL +* BYTES_LIST +* STRING_LIST +* INT32_LIST +* INT64_LIST +* DOUBLE_LIST +* FLOAT_LIST +* BOOL_LIST ## Glossary From 5b69331ea5589976093a9b7bae60a135036ddaa1 Mon Sep 17 00:00:00 2001 From: Willem Pienaar <6728866+woop@users.noreply.github.com> Date: Fri, 27 Dec 2019 16:55:23 +0800 Subject: [PATCH 16/18] Project Namespacing (#393) * Implement project namespacing (without auth) * Update Protos, Java SDK, Golang SDK to support namespacing * Fixed Python SDK to support project namespacing protos * Add integration with projects, update code to be compliant with new protos * Move name, version and project back to spec * Update Feast Core and Feast Ingestion to support project namespacing * Update Core and Ingestion based on refactored FeatureSet proto * Remove entity dataset validation * Register feature sets first to speed up tests * Apply PR #392 * Apply spotless * Order test output Co-authored-by: Chen Zhiling --- .prow/scripts/test-end-to-end-batch.sh | 7 +- .prow/scripts/test-end-to-end.sh | 9 +- CONTRIBUTING.md | 6 +- .../core/config/FeatureStreamConfig.java | 4 +- .../feast/core/dao/FeatureSetRepository.java | 31 +- .../feast/core/dao/ProjectRepository.java | 27 + .../java/feast/core/grpc/CoreServiceImpl.java | 87 ++- .../java/feast/core/job/JobUpdateTask.java | 39 +- .../core/job/dataflow/DataflowJobManager.java | 53 +- .../job/direct/DirectRunnerJobManager.java | 18 +- .../java/feast/core/model/FeatureSet.java | 154 +++++- .../src/main/java/feast/core/model/Field.java | 41 +- core/src/main/java/feast/core/model/Job.java | 7 +- .../main/java/feast/core/model/Project.java | 84 +++ .../main/java/feast/core/model/Source.java | 18 + .../src/main/java/feast/core/model/Store.java | 14 +- .../core/service/AccessManagementService.java | 78 +++ .../core/service/JobCoordinatorService.java | 68 ++- .../java/feast/core/service/SpecService.java | 294 ++++++---- .../core/validators/FeatureSetValidator.java | 21 +- .../java/feast/core/validators/Matchers.java | 6 +- core/src/main/resources/application.yml | 6 +- .../java/feast/core/CoreApplicationTest.java | 132 +---- .../feast/core/job/JobUpdateTaskTest.java | 75 ++- .../job/dataflow/DataflowJobManagerTest.java | 42 +- .../direct/DirectRunnerJobManagerTest.java | 26 +- .../service/JobCoordinatorServiceTest.java | 65 ++- .../feast/core/service/SpecServiceTest.java | 308 +++++++---- .../feast/charts/feast-serving/values.yaml | 10 +- infra/charts/feast/values-demo.yaml | 3 +- infra/charts/feast/values.yaml | 6 +- infra/docker-compose/serving/bq-store.yml | 5 +- infra/docker-compose/serving/redis-store.yml | 5 +- .../example/core_specs/entity/product.json | 5 - ingestion/example/core_specs/entity/user.json | 5 - .../feature/product.day.completed_orders.json | 19 - .../core_specs/feature/user.none.age.json | 19 - .../feature/user.none.completed_orders.json | 19 - .../core_specs/storage/example_errors.json | 7 - .../core_specs/storage/example_serving.json | 7 - .../core_specs/storage/example_warehouse.json | 7 - ingestion/example/import_products.yaml | 15 - ingestion/example/import_users.yaml | 16 - .../example/sample_data/daily_products.csv | 5 - ingestion/example/sample_data/users.csv | 3 - .../main/java/feast/ingestion/ImportJob.java | 41 +- .../ingestion/options/ImportOptions.java | 10 +- .../transform/ValidateFeatureRows.java | 15 +- .../ingestion/transform/WriteToStore.java | 8 +- .../transform/fn/ValidateFeatureRowDoFn.java | 27 +- .../WriteDeadletterRowMetricsDoFn.java | 2 + .../metrics/WriteRowMetricsDoFn.java | 15 +- .../java/feast/ingestion/utils/SpecUtil.java | 72 ++- .../java/feast/ingestion/utils/StoreUtil.java | 16 +- .../feast/ingestion/values/FailedElement.java | 5 + .../{FeatureSetSpec.java => FeatureSet.java} | 15 +- .../serving/bigquery/GetTableDestination.java | 4 +- .../redis/FeatureRowToRedisMutationDoFn.java | 12 +- .../java/feast/ingestion/ImportJobTest.java | 13 +- .../transform/ValidateFeatureRowsTest.java | 104 ++-- .../feast/ingestion/util/StoreUtilTest.java | 21 +- .../src/test/java/feast/test/TestUtil.java | 42 +- .../import-job-specs/invalid-empty.yaml | 2 - .../invalid-source-spec-1.yaml | 34 -- .../resources/import-job-specs/valid-1.yaml | 34 -- .../resources/import-job-specs/valid-2.yaml | 33 -- .../resources/import-specs/csv_to_store1.yaml | 16 - .../test/resources/specs/importJobSpecs.yaml | 44 -- protos/feast/core/CoreService.proto | 86 ++- protos/feast/core/FeatureSet.proto | 17 +- protos/feast/core/Store.proto | 28 +- protos/feast/serving/ServingService.proto | 22 +- sdk/go/README.md | 49 ++ sdk/go/go.mod | 2 +- sdk/go/go.sum | 2 + sdk/go/protos/feast/core/CoreService.pb.go | 516 +++++++++++++++--- sdk/go/protos/feast/core/FeatureSet.pb.go | 211 ++++++- sdk/go/protos/feast/core/Store.pb.go | 92 ++-- .../protos/feast/serving/ServingService.pb.go | 221 ++++---- sdk/go/request.go | 104 ++-- sdk/go/request_test.go | 80 ++- sdk/go/response_test.go | 18 +- .../java/com/gojek/feast/FeastClient.java | 36 +- .../java/com/gojek/feast/RequestUtil.java | 90 +-- .../java/com/gojek/feast/RequestUtilTest.java | 76 +-- sdk/python/feast/__init__.py | 1 + sdk/python/feast/client.py | 460 +++++++++------- sdk/python/feast/core/CoreService_pb2.py | 330 +++++++++-- sdk/python/feast/core/CoreService_pb2.pyi | 94 +++- sdk/python/feast/core/CoreService_pb2_grpc.py | 56 ++ sdk/python/feast/core/FeatureSet_pb2.py | 39 +- sdk/python/feast/core/FeatureSet_pb2.pyi | 6 +- sdk/python/feast/core/Store_pb2.py | 21 +- sdk/python/feast/core/Store_pb2.pyi | 6 +- sdk/python/feast/exceptions.py | 20 - sdk/python/feast/feature_set.py | 75 ++- sdk/python/feast/job.py | 9 +- sdk/python/feast/loaders/abstract_producer.py | 46 +- sdk/python/feast/loaders/file.py | 51 +- sdk/python/feast/loaders/ingest.py | 30 +- .../feast/serving/ServingService_pb2.py | 122 ++--- .../feast/serving/ServingService_pb2.pyi | 28 +- sdk/python/feast/type_map.py | 71 ++- sdk/python/feast/types/__init__.py | 0 sdk/python/setup.py | 2 +- sdk/python/tests/fake_kafka.py | 21 - sdk/python/tests/feast_core_server.py | 11 +- sdk/python/tests/feast_serving_server.py | 39 +- sdk/python/tests/stores.py | 98 ---- sdk/python/tests/test_client.py | 93 ++-- sdk/python/tests/test_stores.py | 60 -- serving/sample_redis_config.yml | 3 +- .../configuration/JobServiceConfig.java | 2 +- .../configuration/ServingServiceConfig.java | 2 +- .../configuration/SpecServiceConfig.java | 4 +- .../controller/HealthServiceController.java | 2 +- .../service/BigQueryServingService.java | 77 +-- .../serving/service/CachedSpecService.java | 199 ------- .../serving/service/RedisServingService.java | 109 ++-- .../serving/specs/CachedSpecService.java | 262 +++++++++ .../{service => specs}/CoreSpecService.java | 8 +- .../serving/specs/FeatureSetRequest.java | 53 ++ .../bigquery/BatchRetrievalQueryRunnable.java | 30 +- .../store/bigquery/QueryTemplater.java | 38 +- .../store/bigquery/SubqueryCallable.java | 16 + .../store/bigquery/model/FeatureSetInfo.java | 12 +- .../main/java/feast/serving/util/Metrics.java | 10 +- .../main/java/feast/serving/util/RefUtil.java | 38 ++ .../feast/serving/util/RequestHelper.java | 13 + .../resources/templates/join_featuresets.sql | 4 +- .../templates/single_featureset_pit_join.sql | 18 +- .../ServingServiceGRpcControllerTest.java | 17 +- .../service/CachedSpecServiceTest.java | 173 +++++- .../service/RedisServingServiceTest.java | 222 +++++--- .../util/mappers/YamlToProtoMapperTest.java | 8 +- .../all_types_parquet/all_types_parquet.yaml | 24 +- tests/e2e/basic-ingest-redis-serving.py | 96 ++-- tests/e2e/bq-batch-retrieval.py | 177 +++--- .../e2e/large_volume/cust_trans_large_fs.yaml | 4 +- 139 files changed, 4666 insertions(+), 2855 deletions(-) create mode 100644 core/src/main/java/feast/core/dao/ProjectRepository.java create mode 100644 core/src/main/java/feast/core/model/Project.java create mode 100644 core/src/main/java/feast/core/service/AccessManagementService.java delete mode 100644 ingestion/example/core_specs/entity/product.json delete mode 100644 ingestion/example/core_specs/entity/user.json delete mode 100644 ingestion/example/core_specs/feature/product.day.completed_orders.json delete mode 100644 ingestion/example/core_specs/feature/user.none.age.json delete mode 100644 ingestion/example/core_specs/feature/user.none.completed_orders.json delete mode 100644 ingestion/example/core_specs/storage/example_errors.json delete mode 100644 ingestion/example/core_specs/storage/example_serving.json delete mode 100644 ingestion/example/core_specs/storage/example_warehouse.json delete mode 100644 ingestion/example/import_products.yaml delete mode 100644 ingestion/example/import_users.yaml delete mode 100644 ingestion/example/sample_data/daily_products.csv delete mode 100644 ingestion/example/sample_data/users.csv rename ingestion/src/main/java/feast/ingestion/values/{FeatureSetSpec.java => FeatureSet.java} (76%) delete mode 100644 ingestion/src/test/resources/import-job-specs/invalid-empty.yaml delete mode 100644 ingestion/src/test/resources/import-job-specs/invalid-source-spec-1.yaml delete mode 100644 ingestion/src/test/resources/import-job-specs/valid-1.yaml delete mode 100644 ingestion/src/test/resources/import-job-specs/valid-2.yaml delete mode 100644 ingestion/src/test/resources/import-specs/csv_to_store1.yaml delete mode 100644 ingestion/src/test/resources/specs/importJobSpecs.yaml create mode 100644 sdk/go/README.md delete mode 100644 sdk/python/feast/exceptions.py delete mode 100644 sdk/python/feast/types/__init__.py delete mode 100644 sdk/python/tests/fake_kafka.py delete mode 100644 sdk/python/tests/stores.py delete mode 100644 sdk/python/tests/test_stores.py delete mode 100644 serving/src/main/java/feast/serving/service/CachedSpecService.java create mode 100644 serving/src/main/java/feast/serving/specs/CachedSpecService.java rename serving/src/main/java/feast/serving/{service => specs}/CoreSpecService.java (85%) create mode 100644 serving/src/main/java/feast/serving/specs/FeatureSetRequest.java create mode 100644 serving/src/main/java/feast/serving/util/RefUtil.java diff --git a/.prow/scripts/test-end-to-end-batch.sh b/.prow/scripts/test-end-to-end-batch.sh index 4ae7ae1e54b..ac282a0c33a 100755 --- a/.prow/scripts/test-end-to-end-batch.sh +++ b/.prow/scripts/test-end-to-end-batch.sh @@ -123,7 +123,9 @@ feast: spring: jpa: - properties.hibernate.format_sql: true + properties.hibernate: + format_sql: true + event.merge.entity_copy_observer: allow hibernate.naming.physical-strategy=org.hibernate.boot.model.naming: PhysicalNamingStrategyStandardImpl hibernate.ddl-auto: update datasource: @@ -167,7 +169,8 @@ bigquery_config: datasetId: $DATASET_NAME subscriptions: - name: "*" - version: ">0" + version: "*" + project: "*" EOF cat < /tmp/serving.warehouse.application.yml diff --git a/.prow/scripts/test-end-to-end.sh b/.prow/scripts/test-end-to-end.sh index 0206472aae2..c7ff461ec31 100755 --- a/.prow/scripts/test-end-to-end.sh +++ b/.prow/scripts/test-end-to-end.sh @@ -115,9 +115,12 @@ feast: spring: jpa: - properties.hibernate.format_sql: true + properties.hibernate: + format_sql: true + event.merge.entity_copy_observer: allow hibernate.naming.physical-strategy=org.hibernate.boot.model.naming: PhysicalNamingStrategyStandardImpl hibernate.ddl-auto: update + datasource: url: jdbc:postgresql://localhost:5432/postgres username: postgres @@ -153,7 +156,8 @@ redis_config: port: 6379 subscriptions: - name: "*" - version: ">0" + version: "*" + project: "*" EOF cat < /tmp/serving.online.application.yml @@ -182,6 +186,7 @@ grpc: spring: main: web-environment: false + EOF nohup java -jar serving/target/feast-serving-$REVISION.jar \ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 06476c0156d..eb38db30080 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -64,8 +64,9 @@ store { name: "SERVING" type: REDIS subscriptions { + project: "*" name: "*" - version: ">0" + version: "*" } redis_config { host: "localhost" @@ -76,8 +77,9 @@ store { name: "WAREHOUSE" type: BIGQUERY subscriptions { + project: "*" name: "*" - version: ">0" + version: "*" } bigquery_config { project_id: "my-google-project-id" diff --git a/core/src/main/java/feast/core/config/FeatureStreamConfig.java b/core/src/main/java/feast/core/config/FeatureStreamConfig.java index 1671a08f28d..45de359ac76 100644 --- a/core/src/main/java/feast/core/config/FeatureStreamConfig.java +++ b/core/src/main/java/feast/core/config/FeatureStreamConfig.java @@ -52,8 +52,8 @@ public Source getDefaultSource(FeastProperties feastProperties) { String topicName = streamProperties.getOptions().get("topic"); Map map = new HashMap<>(); map.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers); - map.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, - DEFAULT_KAFKA_REQUEST_TIMEOUT_MS_CONFIG); + map.put( + AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, DEFAULT_KAFKA_REQUEST_TIMEOUT_MS_CONFIG); AdminClient client = AdminClient.create(map); NewTopic newTopic = diff --git a/core/src/main/java/feast/core/dao/FeatureSetRepository.java b/core/src/main/java/feast/core/dao/FeatureSetRepository.java index fd996b331c2..3eba2108889 100644 --- a/core/src/main/java/feast/core/dao/FeatureSetRepository.java +++ b/core/src/main/java/feast/core/dao/FeatureSetRepository.java @@ -19,28 +19,31 @@ import feast.core.model.FeatureSet; import java.util.List; import org.springframework.data.jpa.repository.JpaRepository; -import org.springframework.data.jpa.repository.Query; /** JPA repository supplying FeatureSet objects keyed by id. */ public interface FeatureSetRepository extends JpaRepository { long count(); - // Find feature set by name and version - FeatureSet findFeatureSetByNameAndVersion(String name, Integer version); + // Find single feature set by project, name, and version + FeatureSet findFeatureSetByNameAndProject_NameAndVersion( + String name, String project, Integer version); - // Find latest version of a feature set by name - FeatureSet findFirstFeatureSetByNameOrderByVersionDesc(String name); - - // find all versions of featureSets matching the given name. - List findByName(String name); - - // find all versions of featureSets with names matching the regex - @Query( - nativeQuery = true, - value = "SELECT * FROM feature_sets " + "WHERE name LIKE ?1 ORDER BY name ASC, version ASC") - List findByNameWithWildcardOrderByNameAscVersionAsc(String name); + // Find single latest version of a feature set by project and name (LIKE) + FeatureSet findFirstFeatureSetByNameLikeAndProject_NameOrderByVersionDesc( + String name, String project); // find all feature sets and order by name and version List findAllByOrderByNameAscVersionAsc(); + + // find all feature sets within a project and order by name and version + List findAllByProject_NameOrderByNameAscVersionAsc(String project_name); + + // find all versions of feature sets matching the given name pattern with a specific project. + List findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + String name, String project_name); + + // find all versions of feature sets matching the given name pattern and project pattern + List findAllByNameLikeAndProject_NameLikeOrderByNameAscVersionAsc( + String name, String project_name); } diff --git a/core/src/main/java/feast/core/dao/ProjectRepository.java b/core/src/main/java/feast/core/dao/ProjectRepository.java new file mode 100644 index 00000000000..5adb7d44c2e --- /dev/null +++ b/core/src/main/java/feast/core/dao/ProjectRepository.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.dao; + +import feast.core.model.Project; +import java.util.List; +import org.springframework.data.jpa.repository.JpaRepository; + +/** JPA repository supplying Project objects keyed by id. */ +public interface ProjectRepository extends JpaRepository { + + List findAllByArchivedIsFalse(); +} diff --git a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java index 1d42cfb3554..b8d0670d0d2 100644 --- a/core/src/main/java/feast/core/grpc/CoreServiceImpl.java +++ b/core/src/main/java/feast/core/grpc/CoreServiceImpl.java @@ -16,25 +16,35 @@ */ package feast.core.grpc; -import com.google.protobuf.InvalidProtocolBufferException; import feast.core.CoreServiceGrpc.CoreServiceImplBase; import feast.core.CoreServiceProto.ApplyFeatureSetRequest; import feast.core.CoreServiceProto.ApplyFeatureSetResponse; +import feast.core.CoreServiceProto.ArchiveProjectRequest; +import feast.core.CoreServiceProto.ArchiveProjectResponse; +import feast.core.CoreServiceProto.CreateProjectRequest; +import feast.core.CoreServiceProto.CreateProjectResponse; import feast.core.CoreServiceProto.GetFeastCoreVersionRequest; import feast.core.CoreServiceProto.GetFeastCoreVersionResponse; import feast.core.CoreServiceProto.GetFeatureSetRequest; import feast.core.CoreServiceProto.GetFeatureSetResponse; import feast.core.CoreServiceProto.ListFeatureSetsRequest; import feast.core.CoreServiceProto.ListFeatureSetsResponse; +import feast.core.CoreServiceProto.ListProjectsRequest; +import feast.core.CoreServiceProto.ListProjectsResponse; import feast.core.CoreServiceProto.ListStoresRequest; import feast.core.CoreServiceProto.ListStoresResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.exception.RetrievalException; import feast.core.grpc.interceptors.MonitoringInterceptor; +import feast.core.model.Project; +import feast.core.service.AccessManagementService; import feast.core.service.SpecService; +import io.grpc.Status; import io.grpc.StatusRuntimeException; import io.grpc.stub.StreamObserver; +import java.util.List; +import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; import org.lognet.springboot.grpc.GRpcService; import org.springframework.beans.factory.annotation.Autowired; @@ -45,10 +55,12 @@ public class CoreServiceImpl extends CoreServiceImplBase { private SpecService specService; + private AccessManagementService accessManagementService; @Autowired - public CoreServiceImpl(SpecService specService) { + public CoreServiceImpl(SpecService specService, AccessManagementService accessManagementService) { this.specService = specService; + this.accessManagementService = accessManagementService; } @Override @@ -65,9 +77,10 @@ public void getFeatureSet( GetFeatureSetResponse response = specService.getFeatureSet(request); responseObserver.onNext(response); responseObserver.onCompleted(); - } catch (RetrievalException | InvalidProtocolBufferException | StatusRuntimeException e) { + } catch (RetrievalException | StatusRuntimeException e) { log.error("Exception has occurred in GetFeatureSet method: ", e); - responseObserver.onError(e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); } } @@ -78,9 +91,10 @@ public void listFeatureSets( ListFeatureSetsResponse response = specService.listFeatureSets(request.getFilter()); responseObserver.onNext(response); responseObserver.onCompleted(); - } catch (RetrievalException | InvalidProtocolBufferException e) { + } catch (RetrievalException | IllegalArgumentException e) { log.error("Exception has occurred in ListFeatureSet method: ", e); - responseObserver.onError(e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); } } @@ -93,7 +107,8 @@ public void listStores( responseObserver.onCompleted(); } catch (RetrievalException e) { log.error("Exception has occurred in ListStores method: ", e); - responseObserver.onError(e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); } } @@ -104,9 +119,17 @@ public void applyFeatureSet( ApplyFeatureSetResponse response = specService.applyFeatureSet(request.getFeatureSet()); responseObserver.onNext(response); responseObserver.onCompleted(); + } catch (org.hibernate.exception.ConstraintViolationException e) { + log.error( + "Unable to persist this feature set due to a constraint violation. Please ensure that" + + " field names are unique within the project namespace: ", + e); + responseObserver.onError( + Status.ALREADY_EXISTS.withDescription(e.getMessage()).withCause(e).asRuntimeException()); } catch (Exception e) { log.error("Exception has occurred in ApplyFeatureSet method: ", e); - responseObserver.onError(e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); } } @@ -119,7 +142,53 @@ public void updateStore( responseObserver.onCompleted(); } catch (Exception e) { log.error("Exception has occurred in UpdateStore method: ", e); - responseObserver.onError(e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } + } + + @Override + public void createProject( + CreateProjectRequest request, StreamObserver responseObserver) { + try { + accessManagementService.createProject(request.getName()); + responseObserver.onNext(CreateProjectResponse.getDefaultInstance()); + responseObserver.onCompleted(); + } catch (Exception e) { + log.error("Exception has occurred in the createProject method: ", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } + } + + @Override + public void archiveProject( + ArchiveProjectRequest request, StreamObserver responseObserver) { + try { + accessManagementService.archiveProject(request.getName()); + responseObserver.onNext(ArchiveProjectResponse.getDefaultInstance()); + responseObserver.onCompleted(); + } catch (Exception e) { + log.error("Exception has occurred in the createProject method: ", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); + } + } + + @Override + public void listProjects( + ListProjectsRequest request, StreamObserver responseObserver) { + try { + List projects = accessManagementService.listProjects(); + responseObserver.onNext( + ListProjectsResponse.newBuilder() + .addAllProjects(projects.stream().map(Project::getName).collect(Collectors.toList())) + .build()); + responseObserver.onCompleted(); + } catch (Exception e) { + log.error("Exception has occurred in the listProjects method: ", e); + responseObserver.onError( + Status.INTERNAL.withDescription(e.getMessage()).withCause(e).asRuntimeException()); } } } diff --git a/core/src/main/java/feast/core/job/JobUpdateTask.java b/core/src/main/java/feast/core/job/JobUpdateTask.java index 373a4a113d7..57b2dfee4f3 100644 --- a/core/src/main/java/feast/core/job/JobUpdateTask.java +++ b/core/src/main/java/feast/core/job/JobUpdateTask.java @@ -17,7 +17,6 @@ package feast.core.job; import feast.core.FeatureSetProto; -import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.log.Action; @@ -53,7 +52,7 @@ @Getter public class JobUpdateTask implements Callable { - private final List featureSetSpecs; + private final List featureSets; private final SourceProto.Source sourceSpec; private final StoreProto.Store store; private final Optional currentJob; @@ -61,14 +60,14 @@ public class JobUpdateTask implements Callable { private long jobUpdateTimeoutSeconds; public JobUpdateTask( - List featureSetSpecs, + List featureSets, SourceProto.Source sourceSpec, StoreProto.Store store, Optional currentJob, JobManager jobManager, long jobUpdateTimeoutSeconds) { - this.featureSetSpecs = featureSetSpecs; + this.featureSets = featureSets; this.sourceSpec = sourceSpec; this.store = store; this.currentJob = currentJob; @@ -87,8 +86,8 @@ public Job call() { .map(FeatureSet::getId) .collect(Collectors.toSet()); Set newFeatureSetsPopulatedByJob = - featureSetSpecs.stream() - .map(fs -> fs.getName() + ":" + fs.getVersion()) + featureSets.stream() + .map(fs -> FeatureSet.fromProto(fs).getId()) .collect(Collectors.toSet()); if (existingFeatureSetsPopulatedByJob.size() == newFeatureSetsPopulatedByJob.size() && existingFeatureSetsPopulatedByJob.containsAll(newFeatureSetsPopulatedByJob)) { @@ -107,12 +106,11 @@ public Job call() { return job; } else { submittedJob = - executorService.submit(() -> updateJob(currentJob.get(), featureSetSpecs, store)); + executorService.submit(() -> updateJob(currentJob.get(), featureSets, store)); } } else { String jobId = createJobId(source.getId(), store.getName()); - submittedJob = - executorService.submit(() -> startJob(jobId, featureSetSpecs, sourceSpec, store)); + submittedJob = executorService.submit(() -> startJob(jobId, featureSets, sourceSpec, store)); } Job job = null; @@ -128,16 +126,19 @@ public Job call() { /** Start or update the job to ingest data to the sink. */ private Job startJob( String jobId, - List featureSetSpecs, + List featureSetProtos, SourceProto.Source source, StoreProto.Store sinkSpec) { List featureSets = - featureSetSpecs.stream() + featureSetProtos.stream() .map( - spec -> + fsp -> FeatureSet.fromProto( - FeatureSetProto.FeatureSet.newBuilder().setSpec(spec).build())) + FeatureSetProto.FeatureSet.newBuilder() + .setSpec(fsp.getSpec()) + .setMeta(fsp.getMeta()) + .build())) .collect(Collectors.toList()); Job job = new Job( @@ -185,13 +186,17 @@ private Job startJob( } /** Update the given job */ - private Job updateJob(Job job, List featureSetSpecs, StoreProto.Store store) { + private Job updateJob( + Job job, List featureSets, StoreProto.Store store) { job.setFeatureSets( - featureSetSpecs.stream() + featureSets.stream() .map( - spec -> + fs -> FeatureSet.fromProto( - FeatureSetProto.FeatureSet.newBuilder().setSpec(spec).build())) + FeatureSetProto.FeatureSet.newBuilder() + .setSpec(fs.getSpec()) + .setMeta(fs.getMeta()) + .build())) .collect(Collectors.toList())); job.setStore(feast.core.model.Store.fromProto(store)); AuditLogger.log( diff --git a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java index 92763e7971f..2de46ae1f2d 100644 --- a/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java +++ b/core/src/main/java/feast/core/job/dataflow/DataflowJobManager.java @@ -23,7 +23,7 @@ import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; -import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto; import feast.core.SourceProto; import feast.core.StoreProto; import feast.core.config.FeastProperties.MetricsProperties; @@ -33,6 +33,7 @@ import feast.core.model.FeatureSet; import feast.core.model.Job; import feast.core.model.JobStatus; +import feast.core.model.Project; import feast.core.model.Source; import feast.core.model.Store; import feast.core.util.TypeConversion; @@ -77,13 +78,15 @@ public Runner getRunnerType() { @Override public Job startJob(Job job) { - List featureSetSpecs = - job.getFeatureSets().stream() - .map(fs -> fs.toProto().getSpec()) - .collect(Collectors.toList()); + List featureSetProtos = + job.getFeatureSets().stream().map(FeatureSet::toProto).collect(Collectors.toList()); try { return submitDataflowJob( - job.getId(), featureSetSpecs, job.getSource().toProto(), job.getStore().toProto(), false); + job.getId(), + featureSetProtos, + job.getSource().toProto(), + job.getStore().toProto(), + false); } catch (InvalidProtocolBufferException e) { throw new RuntimeException(String.format("Unable to start job %s", job.getId()), e); } @@ -98,13 +101,11 @@ public Job startJob(Job job) { @Override public Job updateJob(Job job) { try { - List featureSetSpecs = - job.getFeatureSets().stream() - .map(fs -> fs.toProto().getSpec()) - .collect(Collectors.toList()); + List featureSetProtos = + job.getFeatureSets().stream().map(FeatureSet::toProto).collect(Collectors.toList()); return submitDataflowJob( - job.getId(), featureSetSpecs, job.getSource().toProto(), job.getStore().toProto(), true); + job.getId(), featureSetProtos, job.getSource().toProto(), job.getStore().toProto(), true); } catch (InvalidProtocolBufferException e) { throw new RuntimeException(String.format("Unable to update job %s", job.getId()), e); @@ -155,12 +156,7 @@ public JobStatus getJobStatus(Job job) { try { com.google.api.services.dataflow.model.Job dataflowJob = - dataflow - .projects() - .locations() - .jobs() - .get(projectId, location, job.getExtId()) - .execute(); + dataflow.projects().locations().jobs().get(projectId, location, job.getExtId()).execute(); return DataflowJobStateMapper.map(dataflowJob.getCurrentState()); } catch (Exception e) { log.error( @@ -173,19 +169,21 @@ public JobStatus getJobStatus(Job job) { private Job submitDataflowJob( String jobName, - List featureSetSpecs, + List featureSetProtos, SourceProto.Source source, StoreProto.Store sink, boolean update) { try { - ImportOptions pipelineOptions = getPipelineOptions(jobName, featureSetSpecs, sink, update); + ImportOptions pipelineOptions = getPipelineOptions(jobName, featureSetProtos, sink, update); DataflowPipelineJob pipelineResult = runPipeline(pipelineOptions); List featureSets = - featureSetSpecs.stream() + featureSetProtos.stream() .map( - spec -> { + fsp -> { FeatureSet featureSet = new FeatureSet(); - featureSet.setId(spec.getName() + ":" + spec.getVersion()); + featureSet.setName(fsp.getSpec().getName()); + featureSet.setVersion(fsp.getSpec().getVersion()); + featureSet.setProject(new Project(fsp.getSpec().getProject())); return featureSet; }) .collect(Collectors.toList()); @@ -205,16 +203,19 @@ private Job submitDataflowJob( } private ImportOptions getPipelineOptions( - String jobName, List featureSets, StoreProto.Store sink, boolean update) + String jobName, + List featureSets, + StoreProto.Store sink, + boolean update) throws IOException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); Printer printer = JsonFormat.printer(); List featureSetsJson = new ArrayList<>(); - for (FeatureSetSpec featureSet : featureSets) { - featureSetsJson.add(printer.print(featureSet)); + for (FeatureSetProto.FeatureSet featureSet : featureSets) { + featureSetsJson.add(printer.print(featureSet.getSpec())); } - pipelineOptions.setFeatureSetSpecJson(featureSetsJson); + pipelineOptions.setFeatureSetJson(featureSetsJson); pipelineOptions.setStoreJson(Collections.singletonList(printer.print(sink))); pipelineOptions.setProject(projectId); pipelineOptions.setUpdate(update); diff --git a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java index 89c6dc38488..fdf3aad9bc3 100644 --- a/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java +++ b/core/src/main/java/feast/core/job/direct/DirectRunnerJobManager.java @@ -20,6 +20,7 @@ import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; +import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.StoreProto; import feast.core.config.FeastProperties.MetricsProperties; @@ -74,11 +75,10 @@ public Runner getRunnerType() { @Override public Job startJob(Job job) { try { - List featureSetSpecs = - job.getFeatureSets().stream() - .map(fs -> fs.toProto().getSpec()) - .collect(Collectors.toList()); - ImportOptions pipelineOptions = getPipelineOptions(featureSetSpecs, job.getStore().toProto()); + List featureSetProtos = + job.getFeatureSets().stream().map(FeatureSet::toProto).collect(Collectors.toList()); + ImportOptions pipelineOptions = + getPipelineOptions(featureSetProtos, job.getStore().toProto()); PipelineResult pipelineResult = runPipeline(pipelineOptions); DirectJob directJob = new DirectJob(job.getId(), pipelineResult); jobs.add(directJob); @@ -92,16 +92,16 @@ public Job startJob(Job job) { } private ImportOptions getPipelineOptions( - List featureSetSpecs, StoreProto.Store sink) + List featureSets, StoreProto.Store sink) throws InvalidProtocolBufferException { String[] args = TypeConversion.convertMapToArgs(defaultOptions); ImportOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).as(ImportOptions.class); Printer printer = JsonFormat.printer(); List featureSetsJson = new ArrayList<>(); - for (FeatureSetSpec featureSetSpec : featureSetSpecs) { - featureSetsJson.add(printer.print(featureSetSpec)); + for (FeatureSetProto.FeatureSet featureSet : featureSets) { + featureSetsJson.add(printer.print(featureSet.getSpec())); } - pipelineOptions.setFeatureSetSpecJson(featureSetsJson); + pipelineOptions.setFeatureSetJson(featureSetsJson); pipelineOptions.setStoreJson(Collections.singletonList(printer.print(sink))); pipelineOptions.setRunner(DirectRunner.class); pipelineOptions.setProject(""); // set to default value to satisfy validation diff --git a/core/src/main/java/feast/core/model/FeatureSet.java b/core/src/main/java/feast/core/model/FeatureSet.java index 388b27cb04a..e4687050208 100644 --- a/core/src/main/java/feast/core/model/FeatureSet.java +++ b/core/src/main/java/feast/core/model/FeatureSet.java @@ -27,19 +27,24 @@ import feast.types.ValueProto.ValueType; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import javax.persistence.CascadeType; +import javax.persistence.CollectionTable; import javax.persistence.Column; +import javax.persistence.ElementCollection; import javax.persistence.Entity; import javax.persistence.FetchType; import javax.persistence.Id; import javax.persistence.JoinColumn; import javax.persistence.ManyToOne; -import javax.persistence.OneToMany; import javax.persistence.Table; +import javax.persistence.UniqueConstraint; import lombok.Getter; import lombok.Setter; +import org.apache.commons.lang3.builder.HashCodeBuilder; import org.hibernate.annotations.Fetch; import org.hibernate.annotations.FetchMode; @@ -49,7 +54,7 @@ @Table(name = "feature_sets") public class FeatureSet extends AbstractTimestampEntity implements Comparable { - // Id of the featureSet, defined as name:version + // Id of the featureSet, defined as project/feature_set_name:feature_set_version @Id @Column(name = "id", nullable = false, unique = true) private String id; @@ -62,20 +67,29 @@ public class FeatureSet extends AbstractTimestampEntity implements Comparable entities; + // Entity fields inside this feature set + @ElementCollection(fetch = FetchType.EAGER) + @CollectionTable(name = "entities", joinColumns = @JoinColumn(name = "feature_set_id")) + @Fetch(FetchMode.SUBSELECT) + private Set entities; - // Features inside this featureSet - @OneToMany(cascade = CascadeType.ALL, fetch = FetchType.EAGER) - @Fetch(value = FetchMode.SUBSELECT) - @JoinColumn(name = "features") - private List features; + // Feature fields inside this feature set + @ElementCollection(fetch = FetchType.EAGER) + @CollectionTable( + name = "features", + joinColumns = @JoinColumn(name = "feature_set_id"), + uniqueConstraints = @UniqueConstraint(columnNames = {"name", "project", "version"})) + @Fetch(FetchMode.SUBSELECT) + private Set features; // Source on which feature rows can be found @ManyToOne(cascade = CascadeType.ALL, fetch = FetchType.EAGER) @@ -92,38 +106,71 @@ public FeatureSet() { public FeatureSet( String name, + String project, int version, long maxAgeSeconds, List entities, List features, Source source, FeatureSetStatus status) { - this.id = String.format("%s:%s", name, version); - this.name = name; - this.version = version; this.maxAgeSeconds = maxAgeSeconds; - this.entities = entities; - this.features = features; this.source = source; this.status = status.toString(); + this.entities = new HashSet<>(); + this.features = new HashSet<>(); + this.name = name; + this.project = new Project(project); + this.version = version; + this.setId(project, name, version); + addEntities(entities); + addFeatures(features); + } + + private void setId(String project, String name, int version) { + this.id = project + "/" + name + ":" + version; + } + + public void setVersion(int version) { + this.version = version; + this.setId(getProjectName(), getName(), version); + } + + public void setName(String name) { + this.name = name; + this.setId(getProjectName(), name, getVersion()); + } + + private String getProjectName() { + if (getProject() != null) { + return getProject().getName(); + } else { + return ""; + } + } + + public void setProject(Project project) { + this.project = project; + this.setId(project.getName(), getName(), getVersion()); } public static FeatureSet fromProto(FeatureSetProto.FeatureSet featureSetProto) { FeatureSetSpec featureSetSpec = featureSetProto.getSpec(); Source source = Source.fromProto(featureSetSpec.getSource()); - String id = String.format("%s:%d", featureSetSpec.getName(), featureSetSpec.getVersion()); + List features = new ArrayList<>(); for (FeatureSpec feature : featureSetSpec.getFeaturesList()) { - features.add(new Field(id, feature.getName(), feature.getValueType())); + features.add(new Field(feature.getName(), feature.getValueType())); } + List entities = new ArrayList<>(); for (EntitySpec entity : featureSetSpec.getEntitiesList()) { - entities.add(new Field(id, entity.getName(), entity.getValueType())); + entities.add(new Field(entity.getName(), entity.getValueType())); } return new FeatureSet( - featureSetSpec.getName(), - featureSetSpec.getVersion(), + featureSetProto.getSpec().getName(), + featureSetProto.getSpec().getProject(), + featureSetProto.getSpec().getVersion(), featureSetSpec.getMaxAge().getSeconds(), entities, features, @@ -131,6 +178,30 @@ public static FeatureSet fromProto(FeatureSetProto.FeatureSet featureSetProto) { featureSetProto.getMeta().getStatus()); } + public void addEntities(List fields) { + for (Field field : fields) { + addEntity(field); + } + } + + public void addEntity(Field field) { + field.setProject(this.project.getName()); + field.setVersion(this.getVersion()); + entities.add(field); + } + + public void addFeatures(List fields) { + for (Field field : fields) { + addFeature(field); + } + } + + public void addFeature(Field field) { + field.setProject(this.project.getName()); + field.setVersion(this.getVersion()); + features.add(field); + } + public FeatureSetProto.FeatureSet toProto() { List entitySpecs = new ArrayList<>(); for (Field entity : entities) { @@ -157,8 +228,9 @@ public FeatureSetProto.FeatureSet toProto() { FeatureSetSpec.Builder spec = FeatureSetSpec.newBuilder() - .setName(name) - .setVersion(version) + .setName(getName()) + .setVersion(getVersion()) + .setProject(project.getName()) .setMaxAge(Duration.newBuilder().setSeconds(maxAgeSeconds)) .addAllEntities(entitySpecs) .addAllFeatures(featureSpecs) @@ -174,7 +246,11 @@ public FeatureSetProto.FeatureSet toProto() { * @return boolean denoting if the source or schema have changed. */ public boolean equalTo(FeatureSet other) { - if (!name.equals(other.getName())) { + if (!getName().equals(other.getName())) { + return false; + } + + if (!project.getName().equals(other.project.getName())) { return false; } @@ -198,12 +274,12 @@ public boolean equalTo(FeatureSet other) { } // Ensure map size is consistent with existing fields - if (fields.size() != other.features.size() + other.entities.size()) { + if (fields.size() != other.getFeatures().size() + other.getEntities().size()) { return false; } - // Ensure the other entities and fields exist in the field map - for (Field e : other.entities) { + // Ensure the other entities and features exist in the field map + for (Field e : other.getEntities()) { if (!fields.containsKey(e.getName())) { return false; } @@ -212,7 +288,7 @@ public boolean equalTo(FeatureSet other) { } } - for (Field f : features) { + for (Field f : other.getFeatures()) { if (!fields.containsKey(f.getName())) { return false; } @@ -224,8 +300,28 @@ public boolean equalTo(FeatureSet other) { return true; } + @Override + public int hashCode() { + HashCodeBuilder hcb = new HashCodeBuilder(); + hcb.append(project.getName()); + hcb.append(getName()); + hcb.append(getVersion()); + return hcb.toHashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (!(obj instanceof FeatureSet)) { + return false; + } + return this.equalTo(((FeatureSet) obj)); + } + @Override public int compareTo(FeatureSet o) { - return Integer.compare(version, o.version); + return Integer.compare(getVersion(), o.getVersion()); } } diff --git a/core/src/main/java/feast/core/model/Field.java b/core/src/main/java/feast/core/model/Field.java index 3eaeb93e27a..7573fcbf5e3 100644 --- a/core/src/main/java/feast/core/model/Field.java +++ b/core/src/main/java/feast/core/model/Field.java @@ -19,31 +19,15 @@ import feast.types.ValueProto.ValueType; import java.util.Objects; import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.FetchType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Table; +import javax.persistence.Embeddable; import lombok.Getter; import lombok.Setter; @Getter @Setter -@Entity -@Table(name = "fields") +@Embeddable public class Field { - // Id of the field, defined as featureSetId.name - @Id - @Column(name = "id", nullable = false, unique = true) - private String id; - - // FeatureSet this feature belongs to - @ManyToOne(fetch = FetchType.LAZY) - @JoinColumn(name = "feature_set_id") - private FeatureSet featureSet; - // Name of the feature @Column(name = "name", nullable = false) private String name; @@ -52,16 +36,17 @@ public class Field { @Column(name = "type", nullable = false) private String type; - public Field() { - super(); - } + // Version of the field + @Column(name = "version") + private int version; + + // Project that this field belongs to + @Column(name = "project") + private String project; + + public Field() {} - public Field(String featureSetId, String name, ValueType.Enum type) { - // TODO: Remove all mention of feature sets inside of this class! - FeatureSet featureSet = new FeatureSet(); - featureSet.setId(featureSetId); - this.featureSet = featureSet; - this.id = String.format("%s:%s", featureSetId, name); + public Field(String name, ValueType.Enum type) { this.name = name; this.type = type.toString(); } @@ -80,6 +65,6 @@ public boolean equals(Object o) { @Override public int hashCode() { - return Objects.hash(super.hashCode(), id, featureSet, name, type); + return Objects.hash(super.hashCode(), name, type); } } diff --git a/core/src/main/java/feast/core/model/Job.java b/core/src/main/java/feast/core/model/Job.java index 851e68367b5..bbd661309d1 100644 --- a/core/src/main/java/feast/core/model/Job.java +++ b/core/src/main/java/feast/core/model/Job.java @@ -24,7 +24,6 @@ import javax.persistence.Enumerated; import javax.persistence.Id; import javax.persistence.JoinColumn; -import javax.persistence.JoinTable; import javax.persistence.ManyToMany; import javax.persistence.ManyToOne; import javax.persistence.OneToMany; @@ -64,11 +63,7 @@ public class Job extends AbstractTimestampEntity { private Store store; // FeatureSets populated by the job - @ManyToMany - @JoinTable( - joinColumns = {@JoinColumn(name = "job_id")}, - inverseJoinColumns = {@JoinColumn(name = "feature_set_id")}) - private List featureSets; + @ManyToMany private List featureSets; // Job Metrics @OneToMany(mappedBy = "job", cascade = CascadeType.ALL) diff --git a/core/src/main/java/feast/core/model/Project.java b/core/src/main/java/feast/core/model/Project.java new file mode 100644 index 00000000000..d6e6149394b --- /dev/null +++ b/core/src/main/java/feast/core/model/Project.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.model; + +import java.util.HashSet; +import java.util.Objects; +import java.util.Set; +import javax.persistence.CascadeType; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.FetchType; +import javax.persistence.Id; +import javax.persistence.OneToMany; +import javax.persistence.Table; +import lombok.Getter; +import lombok.Setter; + +@Getter +@Setter +@Entity +@Table(name = "projects") +public class Project { + + // Name of the project + @Id + @Column(name = "name", nullable = false, unique = true) + private String name; + + // Flag to set whether the project has been archived + @Column(name = "archived", nullable = false) + private boolean archived; + + @OneToMany( + cascade = CascadeType.ALL, + fetch = FetchType.EAGER, + orphanRemoval = true, + mappedBy = "project") + private Set featureSets; + + public Project() { + super(); + } + + public Project(String name) { + this.name = name; + this.featureSets = new HashSet<>(); + } + + public void addFeatureSet(FeatureSet featureSet) { + featureSet.setProject(this); + featureSets.add(featureSet); + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Project field = (Project) o; + return name.equals(field.getName()); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), name); + } +} diff --git a/core/src/main/java/feast/core/model/Source.java b/core/src/main/java/feast/core/model/Source.java index 7aa6378dee3..28db1e9a5ba 100644 --- a/core/src/main/java/feast/core/model/Source.java +++ b/core/src/main/java/feast/core/model/Source.java @@ -23,6 +23,7 @@ import feast.core.SourceProto.Source.Builder; import feast.core.SourceProto.SourceType; import io.grpc.Status; +import java.util.Objects; import java.util.Set; import javax.persistence.Column; import javax.persistence.Entity; @@ -199,4 +200,21 @@ private String generateId() { return ""; } } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Source source = (Source) o; + return id.equals(source.id); + } + + @Override + public int hashCode() { + return Objects.hash(id); + } } diff --git a/core/src/main/java/feast/core/model/Store.java b/core/src/main/java/feast/core/model/Store.java index 9bfc27db1fc..9dc44bdc73a 100644 --- a/core/src/main/java/feast/core/model/Store.java +++ b/core/src/main/java/feast/core/model/Store.java @@ -118,14 +118,22 @@ public List getSubscriptions() { } private static String convertSubscriptionToString(Subscription sub) { - return String.format("%s:%s", sub.getName(), sub.getVersion()); + if (sub.getVersion().isEmpty() || sub.getName().isEmpty() || sub.getProject().isEmpty()) { + throw new IllegalArgumentException( + String.format("Missing arguments in subscription string: %s", sub.toString())); + } + return String.format("%s:%s:%s", sub.getProject(), sub.getName(), sub.getVersion()); } private Subscription convertStringToSubscription(String sub) { if (sub.equals("")) { return Subscription.newBuilder().build(); } - String[] split = sub.split(":"); - return Subscription.newBuilder().setName(split[0]).setVersion(split[1]).build(); + String[] split = sub.split(":", 3); + return Subscription.newBuilder() + .setProject(split[0]) + .setName(split[1]) + .setVersion(split[2]) + .build(); } } diff --git a/core/src/main/java/feast/core/service/AccessManagementService.java b/core/src/main/java/feast/core/service/AccessManagementService.java new file mode 100644 index 00000000000..6f627df33d6 --- /dev/null +++ b/core/src/main/java/feast/core/service/AccessManagementService.java @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.core.service; + +import feast.core.dao.ProjectRepository; +import feast.core.model.Project; +import java.util.List; +import java.util.Optional; +import lombok.extern.slf4j.Slf4j; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +@Slf4j +@Service +public class AccessManagementService { + + private ProjectRepository projectRepository; + + @Autowired + public AccessManagementService(ProjectRepository projectRepository) { + this.projectRepository = projectRepository; + } + + /** + * Creates a project + * + * @param name Name of project to be created + */ + @Transactional + public void createProject(String name) { + if (projectRepository.existsById(name)) { + throw new IllegalArgumentException(String.format("Project already exists: %s", name)); + } + Project project = new Project(name); + projectRepository.saveAndFlush(project); + } + + /** + * Archives a project + * + * @param name Name of the project to be archived + */ + @Transactional + public void archiveProject(String name) { + Optional project = projectRepository.findById(name); + if (!project.isPresent()) { + throw new IllegalArgumentException(String.format("Could not find project: \"%s\"", name)); + } + Project p = project.get(); + p.setArchived(true); + projectRepository.saveAndFlush(p); + } + + /** + * List all active projects + * + * @return List of active projects + */ + @Transactional + public List listProjects() { + return projectRepository.findAllByArchivedIsFalse(); + } +} diff --git a/core/src/main/java/feast/core/service/JobCoordinatorService.java b/core/src/main/java/feast/core/service/JobCoordinatorService.java index 76a1cc27dd8..23ad041b81d 100644 --- a/core/src/main/java/feast/core/service/JobCoordinatorService.java +++ b/core/src/main/java/feast/core/service/JobCoordinatorService.java @@ -16,12 +16,10 @@ */ package feast.core.service; -import com.google.protobuf.InvalidProtocolBufferException; import feast.core.CoreServiceProto.ListFeatureSetsRequest; import feast.core.CoreServiceProto.ListStoresRequest.Filter; import feast.core.CoreServiceProto.ListStoresResponse; import feast.core.FeatureSetProto; -import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSetStatus; import feast.core.StoreProto; import feast.core.StoreProto.Store.Subscription; @@ -94,41 +92,37 @@ public void Poll() { List jobUpdateTasks = new ArrayList<>(); ListStoresResponse listStoresResponse = specService.listStores(Filter.newBuilder().build()); for (StoreProto.Store store : listStoresResponse.getStoreList()) { - Set featureSetSpecs = new HashSet<>(); - try { - for (Subscription subscription : store.getSubscriptionsList()) { - featureSetSpecs.addAll( - specService - .listFeatureSets( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(subscription.getName()) - .setFeatureSetVersion(subscription.getVersion()) - .build()) - .getFeatureSetsList().stream() - .map(FeatureSetProto.FeatureSet::getSpec) - .collect(Collectors.toList())); - } - if (!featureSetSpecs.isEmpty()) { - featureSetSpecs.stream() - .collect(Collectors.groupingBy(FeatureSetSpec::getSource)) - .entrySet() - .stream() - .forEach( - kv -> { - Optional originalJob = - getJob(Source.fromProto(kv.getKey()), Store.fromProto(store)); - jobUpdateTasks.add( - new JobUpdateTask( - kv.getValue(), - kv.getKey(), - store, - originalJob, - jobManager, - jobUpdatesProperties.getTimeoutSeconds())); - }); - } - } catch (InvalidProtocolBufferException e) { - log.warn("Unable to retrieve feature sets for store {}: {}", store, e.getMessage()); + Set featureSets = new HashSet<>(); + for (Subscription subscription : store.getSubscriptionsList()) { + featureSets.addAll( + new ArrayList<>( + specService + .listFeatureSets( + ListFeatureSetsRequest.Filter.newBuilder() + .setFeatureSetName(subscription.getName()) + .setFeatureSetVersion(subscription.getVersion()) + .setProject(subscription.getProject()) + .build()) + .getFeatureSetsList())); + } + if (!featureSets.isEmpty()) { + featureSets.stream() + .collect(Collectors.groupingBy(fs -> fs.getSpec().getSource())) + .entrySet() + .stream() + .forEach( + kv -> { + Optional originalJob = + getJob(Source.fromProto(kv.getKey()), Store.fromProto(store)); + jobUpdateTasks.add( + new JobUpdateTask( + kv.getValue(), + kv.getKey(), + store, + originalJob, + jobManager, + jobUpdatesProperties.getTimeoutSeconds())); + }); } } if (jobUpdateTasks.size() == 0) { diff --git a/core/src/main/java/feast/core/service/SpecService.java b/core/src/main/java/feast/core/service/SpecService.java index 937fc297171..2d977fcd3f5 100644 --- a/core/src/main/java/feast/core/service/SpecService.java +++ b/core/src/main/java/feast/core/service/SpecService.java @@ -17,7 +17,7 @@ package feast.core.service; import static feast.core.validators.Matchers.checkValidCharacters; -import static feast.core.validators.Matchers.checkValidFeatureSetFilterName; +import static feast.core.validators.Matchers.checkValidCharactersAllowAsterisk; import com.google.common.collect.Ordering; import com.google.protobuf.InvalidProtocolBufferException; @@ -33,22 +33,22 @@ import feast.core.CoreServiceProto.UpdateStoreRequest; import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.FeatureSetProto; -import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.StoreProto; +import feast.core.StoreProto.Store.Subscription; import feast.core.dao.FeatureSetRepository; +import feast.core.dao.ProjectRepository; import feast.core.dao.StoreRepository; import feast.core.exception.RetrievalException; import feast.core.model.FeatureSet; +import feast.core.model.Project; import feast.core.model.Source; import feast.core.model.Store; import feast.core.validators.FeatureSetValidator; +import java.util.ArrayList; import java.util.List; -import java.util.function.Predicate; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -62,44 +62,43 @@ public class SpecService { private final FeatureSetRepository featureSetRepository; + private final ProjectRepository projectRepository; private final StoreRepository storeRepository; private final Source defaultSource; - private final Pattern versionPattern = - Pattern.compile("^(?[\\>\\<\\=]{0,2})(?\\d*)$"); - @Autowired public SpecService( FeatureSetRepository featureSetRepository, StoreRepository storeRepository, + ProjectRepository projectRepository, Source defaultSource) { this.featureSetRepository = featureSetRepository; this.storeRepository = storeRepository; + this.projectRepository = projectRepository; this.defaultSource = defaultSource; } /** - * Get a feature set matching the feature name and version provided in the filter. The name is - * required. If the version is provided then it will be used for the lookup. If the version is - * omitted then the latest version will be returned. + * Get a feature set matching the feature name and version and project. The feature set name and + * project are required, but version can be omitted by providing 0 for its value. If the version + * is omitted, the latest feature set will be provided. * - * @param GetFeatureSetRequest containing the name and version of the feature set - * @return GetFeatureSetResponse containing a single feature set + * @param request: GetFeatureSetRequest Request containing filter parameters. + * @return Returns a GetFeatureSetResponse containing a feature set.. */ - public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) - throws InvalidProtocolBufferException { + public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) { // Validate input arguments checkValidCharacters(request.getName(), "featureSetName"); + if (request.getName().isEmpty()) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("No feature set name provided") - .asRuntimeException(); + throw new IllegalArgumentException("No feature set name provided"); + } + if (request.getProject().isEmpty()) { + throw new IllegalArgumentException("No project provided"); } if (request.getVersion() < 0) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription("Version number cannot be less than 0") - .asRuntimeException(); + throw new IllegalArgumentException("Version number cannot be less than 0"); } FeatureSet featureSet; @@ -107,27 +106,23 @@ public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) // Filter the list based on version if (request.getVersion() == 0) { featureSet = - featureSetRepository.findFirstFeatureSetByNameOrderByVersionDesc(request.getName()); + featureSetRepository.findFirstFeatureSetByNameLikeAndProject_NameOrderByVersionDesc( + request.getName(), request.getProject()); if (featureSet == null) { - throw io.grpc.Status.NOT_FOUND - .withDescription( - String.format( - "Feature set with name \"%s\" could not be found.", request.getName())) - .asRuntimeException(); + throw new RetrievalException( + String.format("Feature set with name \"%s\" could not be found.", request.getName())); } } else { featureSet = - featureSetRepository.findFeatureSetByNameAndVersion( - request.getName(), request.getVersion()); + featureSetRepository.findFeatureSetByNameAndProject_NameAndVersion( + request.getName(), request.getProject(), request.getVersion()); if (featureSet == null) { - throw io.grpc.Status.NOT_FOUND - .withDescription( - String.format( - "Feature set with name \"%s\" and version \"%s\" could " + "not be found.", - request.getName(), request.getVersion())) - .asRuntimeException(); + throw new RetrievalException( + String.format( + "Feature set with name \"%s\" and version \"%s\" could " + "not be found.", + request.getName(), request.getVersion())); } } @@ -136,39 +131,108 @@ public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest request) } /** - * Get featureSets matching the feature name and version provided in the filter. If the feature - * name is not provided, the method will return all featureSets currently registered to Feast. + * Return a list of feature sets matching the feature set name, version, and project provided in + * the filter. All fields are requried. Use '*' for all three arguments in order to return all + * feature sets and versions in all projects. * - *

The feature set name in the filter accepts any valid regex string. All matching featureSets - * will be returned. + *

Project name can be explicitly provided, or an asterisk can be provided to match all + * projects. It is not possible to provide a combination of asterisks/wildcards and text. * - *

The version filter is optional; If not provided, this method will return all featureSet - * versions of the featureSet name provided. Valid version filters should optionally contain a - * comparator (<, <=, >, etc) and a version number, e.g. 10, <10, >=1 + *

The feature set name in the filter accepts an asterisk as a wildcard. All matching feature + * sets will be returned. Regex is not supported. Explicitly defining a feature set name is not + * possible if a project name is not set explicitly + * + *

The version field can be one of - '*' - This will match all versions - 'latest' - This will + * match the latest feature set version - '' - This will match a specific feature set + * version. This property can only be set if both the feature set name and project name are + * explicitly set. * * @param filter filter containing the desired featureSet name and version filter * @return ListFeatureSetsResponse with list of featureSets found matching the filter */ - public ListFeatureSetsResponse listFeatureSets(ListFeatureSetsRequest.Filter filter) - throws InvalidProtocolBufferException { + public ListFeatureSetsResponse listFeatureSets(ListFeatureSetsRequest.Filter filter) { String name = filter.getFeatureSetName(); - checkValidFeatureSetFilterName(name, "featureSetName"); - List featureSets; - if (name.equals("")) { - featureSets = featureSetRepository.findAllByOrderByNameAscVersionAsc(); + String project = filter.getProject(); + String version = filter.getFeatureSetVersion(); + + if (project.isEmpty() || name.isEmpty() || version.isEmpty()) { + throw new IllegalArgumentException( + String.format( + "Invalid listFeatureSetRequest, missing arguments. Must provide project, feature set name, and version.", + filter.toString())); + } + + checkValidCharactersAllowAsterisk(name, "featureSetName"); + checkValidCharactersAllowAsterisk(project, "projectName"); + + List featureSets = new ArrayList() {}; + + if (project.equals("*")) { + // Matching all projects + + if (name.equals("*") && version.equals("*")) { + featureSets = + featureSetRepository.findAllByNameLikeAndProject_NameLikeOrderByNameAscVersionAsc( + name.replace('*', '%'), project.replace('*', '%')); + } else { + throw new IllegalArgumentException( + String.format( + "Invalid listFeatureSetRequest. Version and feature set name must be set to " + + "\"*\" if the project name and feature set name aren't set explicitly: \n%s", + filter.toString())); + } + } else if (!project.contains("*")) { + // Matching a specific project + + if (name.contains("*") && version.equals("*")) { + // Find all feature sets matching a pattern and versions in a specific project + featureSets = + featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + name.replace('*', '%'), project); + + } else if (!name.contains("*") && version.equals("*")) { + // Find all versions of a specific feature set in a specific project + featureSets = + featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + name, project); + + } else if (version.equals("latest")) { + // Find the latest version of a feature set matching a specific pattern in a specific + // project + FeatureSet latestFeatureSet = + featureSetRepository.findFirstFeatureSetByNameLikeAndProject_NameOrderByVersionDesc( + name.replace('*', '%'), project); + featureSets.add(latestFeatureSet); + + } else if (!name.contains("*") && StringUtils.isNumeric(version)) { + // Find a specific version of a feature set matching a specific name in a specific project + FeatureSet specificFeatureSet = + featureSetRepository.findFeatureSetByNameAndProject_NameAndVersion( + name, project, Integer.parseInt(version)); + featureSets.add(specificFeatureSet); + + } else { + throw new IllegalArgumentException( + String.format( + "Invalid listFeatureSetRequest. Version must be set to \"*\" if the project " + + "name and feature set name aren't set explicitly: \n%s", + filter.toString())); + } } else { - featureSets = - featureSetRepository.findByNameWithWildcardOrderByNameAscVersionAsc( - name.replace('*', '%')); - featureSets = - featureSets.stream() - .filter(getVersionFilter(filter.getFeatureSetVersion())) - .collect(Collectors.toList()); + throw new IllegalArgumentException( + String.format( + "Invalid listFeatureSetRequest. Project name cannot be a pattern. It may only be" + + "a specific project name or an asterisk: \n%s", + filter.toString())); } + ListFeatureSetsResponse.Builder response = ListFeatureSetsResponse.newBuilder(); - for (FeatureSet featureSet : featureSets) { - response.addFeatureSets(featureSet.toProto()); + if (featureSets.size() > 0) { + for (FeatureSet featureSet : featureSets) { + response.addFeatureSets(featureSet.toProto()); + } } + return response.build(); } @@ -207,26 +271,51 @@ public ListStoresResponse listStores(ListStoresRequest.Filter filter) { } /** - * Adds the featureSet to the repository, and prepares the sink for the feature creator to write - * to. If there is a change in the featureSet's schema or source, the featureSet version will be - * incremented. + * Creates or updates a feature set in the repository. If there is a change in the feature set + * schema, then the feature set version will be incremented. * *

This function is idempotent. If no changes are detected in the incoming featureSet's schema, * this method will update the incoming featureSet spec with the latest version stored in the * repository, and return that. * - * @param newFeatureSet featureSet to add. + * @param newFeatureSet Feature set that will be created or updated. */ - public ApplyFeatureSetResponse applyFeatureSet(FeatureSetProto.FeatureSet newFeatureSet) - throws InvalidProtocolBufferException { - FeatureSetSpec newFeatureSetSpec = newFeatureSet.getSpec(); - FeatureSetValidator.validateSpec(newFeatureSetSpec); + public ApplyFeatureSetResponse applyFeatureSet(FeatureSetProto.FeatureSet newFeatureSet) { + + // Validate incoming feature set + FeatureSetValidator.validateSpec(newFeatureSet); + + // Ensure that the project already exists + String project_name = newFeatureSet.getSpec().getProject(); + Project project = + projectRepository + .findById(newFeatureSet.getSpec().getProject()) + .orElseThrow( + () -> + new IllegalArgumentException( + String.format( + "Project name does not exist. Please create a project first: %s", + project_name))); + + // Ensure that the project is not archived + if (project.isArchived()) { + throw new IllegalArgumentException(String.format("Project is archived: %s", project_name)); + } + + // Retrieve all existing FeatureSet objects List existingFeatureSets = - featureSetRepository.findByName(newFeatureSetSpec.getName()); + featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + newFeatureSet.getSpec().getName(), project_name); if (existingFeatureSets.size() == 0) { - newFeatureSetSpec = newFeatureSetSpec.toBuilder().setVersion(1).build(); + // Create new feature set since it doesn't exist + newFeatureSet = + newFeatureSet + .toBuilder() + .setSpec(newFeatureSet.getSpec().toBuilder().setVersion(1)) + .build(); } else { + // Retrieve the latest feature set if the name does exist existingFeatureSets = Ordering.natural().reverse().sortedCopy(existingFeatureSets); FeatureSet latest = existingFeatureSets.get(0); FeatureSet featureSet = FeatureSet.fromProto(newFeatureSet); @@ -238,15 +327,25 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetProto.FeatureSet newFea .setStatus(Status.NO_CHANGE) .build(); } - newFeatureSetSpec = newFeatureSetSpec.toBuilder().setVersion(latest.getVersion() + 1).build(); + // TODO: There is a race condition here with incrementing the version + newFeatureSet = + newFeatureSet + .toBuilder() + .setSpec(newFeatureSet.getSpec().toBuilder().setVersion(latest.getVersion() + 1)) + .build(); } - newFeatureSet = newFeatureSet.toBuilder().setSpec(newFeatureSetSpec).build(); + + // Build a new FeatureSet object which includes the new properties FeatureSet featureSet = FeatureSet.fromProto(newFeatureSet); - if (newFeatureSetSpec.getSource() == SourceProto.Source.getDefaultInstance()) { + if (newFeatureSet.getSpec().getSource() == SourceProto.Source.getDefaultInstance()) { featureSet.setSource(defaultSource); } - featureSetRepository.saveAndFlush(featureSet); + // Persist the FeatureSet object + project.addFeatureSet(featureSet); + projectRepository.saveAndFlush(project); + + // Build ApplyFeatureSetResponse return ApplyFeatureSetResponse.newBuilder() .setFeatureSet(featureSet.toProto()) .setStatus(Status.CREATED) @@ -258,12 +357,20 @@ public ApplyFeatureSetResponse applyFeatureSet(FeatureSetProto.FeatureSet newFea * * @param updateStoreRequest containing the new store definition * @return UpdateStoreResponse containing the new store definition - * @throws InvalidProtocolBufferException */ @Transactional public UpdateStoreResponse updateStore(UpdateStoreRequest updateStoreRequest) throws InvalidProtocolBufferException { StoreProto.Store newStoreProto = updateStoreRequest.getStore(); + + List subs = newStoreProto.getSubscriptionsList(); + for (Subscription sub : subs) { + // Ensure that all fields in a subscription contain values + if ((sub.getVersion().isEmpty() || sub.getName().isEmpty()) || sub.getProject().isEmpty()) { + throw new IllegalArgumentException( + String.format("Missing parameter in subscription: %s", sub)); + } + } Store existingStore = storeRepository.findById(newStoreProto.getName()).orElse(null); // Do nothing if no change @@ -281,45 +388,4 @@ public UpdateStoreResponse updateStore(UpdateStoreRequest updateStoreRequest) .setStore(updateStoreRequest.getStore()) .build(); } - - private Predicate getVersionFilter(String versionFilter) { - if (versionFilter.equals("")) { - return v -> true; - } - Matcher match = versionPattern.matcher(versionFilter); - match.find(); - - if (!match.matches()) { - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription( - String.format( - "Invalid version string '%s' provided. Version string may either " - + "be a fixed version, e.g. 10, or contain a comparator, e.g. >10.", - versionFilter)) - .asRuntimeException(); - } - - int versionNumber = Integer.valueOf(match.group("version")); - String comparator = match.group("comparator"); - switch (comparator) { - case "<": - return v -> v.getVersion() < versionNumber; - case ">": - return v -> v.getVersion() > versionNumber; - case "<=": - return v -> v.getVersion() <= versionNumber; - case ">=": - return v -> v.getVersion() >= versionNumber; - case "": - return v -> v.getVersion() == versionNumber; - default: - throw io.grpc.Status.INVALID_ARGUMENT - .withDescription( - String.format( - "Invalid comparator '%s' provided. Version string may either " - + "be a fixed version, e.g. 10, or contain a comparator, e.g. >10.", - comparator)) - .asRuntimeException(); - } - } } diff --git a/core/src/main/java/feast/core/validators/FeatureSetValidator.java b/core/src/main/java/feast/core/validators/FeatureSetValidator.java index e14fde72cb7..213e3898d51 100644 --- a/core/src/main/java/feast/core/validators/FeatureSetValidator.java +++ b/core/src/main/java/feast/core/validators/FeatureSetValidator.java @@ -20,20 +20,29 @@ import com.google.common.collect.Sets; import feast.core.FeatureSetProto.EntitySpec; -import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSpec; import java.util.HashSet; import java.util.List; import java.util.stream.Collectors; public class FeatureSetValidator { - public static void validateSpec(FeatureSetSpec featureSetSpec) { - checkValidCharacters(featureSetSpec.getName(), "name"); - checkUniqueColumns(featureSetSpec.getEntitiesList(), featureSetSpec.getFeaturesList()); - for (EntitySpec entitySpec : featureSetSpec.getEntitiesList()) { + public static void validateSpec(FeatureSet featureSet) { + if (featureSet.getSpec().getProject().isEmpty()) { + throw new IllegalArgumentException("Project name must be provided"); + } + if (featureSet.getSpec().getName().isEmpty()) { + throw new IllegalArgumentException("Feature set name must be provided"); + } + + checkValidCharacters(featureSet.getSpec().getProject(), "project"); + checkValidCharacters(featureSet.getSpec().getName(), "name"); + checkUniqueColumns( + featureSet.getSpec().getEntitiesList(), featureSet.getSpec().getFeaturesList()); + for (EntitySpec entitySpec : featureSet.getSpec().getEntitiesList()) { checkValidCharacters(entitySpec.getName(), "entities::name"); } - for (FeatureSpec featureSpec : featureSetSpec.getFeaturesList()) { + for (FeatureSpec featureSpec : featureSet.getSpec().getFeaturesList()) { checkValidCharacters(featureSpec.getName(), "features::name"); } } diff --git a/core/src/main/java/feast/core/validators/Matchers.java b/core/src/main/java/feast/core/validators/Matchers.java index 03bafafdbfe..87e2b126f02 100644 --- a/core/src/main/java/feast/core/validators/Matchers.java +++ b/core/src/main/java/feast/core/validators/Matchers.java @@ -23,7 +23,7 @@ public class Matchers { private static Pattern UPPER_SNAKE_CASE_REGEX = Pattern.compile("^[A-Z0-9]+(_[A-Z0-9]+)*$"); private static Pattern LOWER_SNAKE_CASE_REGEX = Pattern.compile("^[a-z0-9]+(_[a-z0-9]+)*$"); private static Pattern VALID_CHARACTERS_REGEX = Pattern.compile("^[a-zA-Z0-9\\-_]*$"); - private static Pattern VALID_CHARACTERS_FSET_FILTER_REGEX = + private static Pattern VALID_CHARACTERS_REGEX_WITH_ASTERISK_WILDCARD = Pattern.compile("^[a-zA-Z0-9\\-_*]*$"); private static String ERROR_MESSAGE_TEMPLATE = "invalid value for field %s: %s"; @@ -61,9 +61,9 @@ public static void checkValidCharacters(String input, String fieldName) } } - public static void checkValidFeatureSetFilterName(String input, String fieldName) + public static void checkValidCharactersAllowAsterisk(String input, String fieldName) throws IllegalArgumentException { - if (!VALID_CHARACTERS_FSET_FILTER_REGEX.matcher(input).matches()) { + if (!VALID_CHARACTERS_REGEX_WITH_ASTERISK_WILDCARD.matcher(input).matches()) { throw new IllegalArgumentException( String.format( ERROR_MESSAGE_TEMPLATE, diff --git a/core/src/main/resources/application.yml b/core/src/main/resources/application.yml index 7c0d90e5f47..dc78719f22e 100644 --- a/core/src/main/resources/application.yml +++ b/core/src/main/resources/application.yml @@ -55,7 +55,11 @@ feast: spring: jpa: - properties.hibernate.format_sql: true + properties.hibernate: + format_sql: true + event: + merge: + entity_copy_observer: allow hibernate.naming.physical-strategy=org.hibernate.boot.model.naming: PhysicalNamingStrategyStandardImpl hibernate.ddl-auto: update datasource: diff --git a/core/src/test/java/feast/core/CoreApplicationTest.java b/core/src/test/java/feast/core/CoreApplicationTest.java index 59c4dfdaaa9..7a35fc4369a 100644 --- a/core/src/test/java/feast/core/CoreApplicationTest.java +++ b/core/src/test/java/feast/core/CoreApplicationTest.java @@ -15,133 +15,5 @@ * limitations under the License. */ package feast.core; -// -// import static feast.core.config.StorageConfig.DEFAULT_SERVING_ID; -// import static feast.core.config.StorageConfig.DEFAULT_WAREHOUSE_ID; -// import static org.junit.Assert.assertEquals; -// import static org.mockito.ArgumentMatchers.any; -// import static org.mockito.Mockito.when; -// -// import feast.core.config.ImportJobDefaults; -// import feast.core.job.JobManager; -// import feast.core.model.StorageInfo; -// import feast.core.service.SpecService; -// import feast.core.stream.FeatureStream; -// import feast.specs.EntitySpecProto.EntitySpec; -// import feast.specs.FeatureSpecProto.FeatureSpec; -// import feast.specs.StorageSpecProto.StorageSpec; -// import feast.types.ValueProto.ValueType; -// import io.grpc.ManagedChannel; -// import io.grpc.ManagedChannelBuilder; -// import java.io.IOException; -// import java.nio.file.Files; -// import java.nio.file.Paths; -// import java.util.Collections; -// import java.util.HashMap; -// import java.util.List; -// import java.util.Map; -// import org.junit.Test; -// import org.junit.runner.RunWith; -// import org.mockito.ArgumentMatchers; -// import org.mockito.Mockito; -// import org.mockito.stubbing.Answer; -// import org.springframework.beans.factory.annotation.Autowired; -// import org.springframework.boot.test.context.SpringBootTest; -// import org.springframework.boot.test.context.TestConfiguration; -// import org.springframework.context.annotation.Bean; -// import org.springframework.test.annotation.DirtiesContext; -// import org.springframework.test.context.junit4.SpringRunner; -// -/// ** -// * Starts the application context with some properties -// */ -// @RunWith(SpringRunner.class) -// @SpringBootTest(properties = { -// "feast.jobs.workspace=${java.io.tmpdir}/${random.uuid}", -// "spring.datasource.url=jdbc:h2:mem:testdb", -// "feast.store.warehouse.type=FILE.JSON", -// "feast.store.warehouse.options={\"path\":\"/tmp/foobar\"}", -// "feast.store.serving.type=REDIS", -// "feast.store.serving.options={\"host\":\"localhost\",\"port\":1234}", -// "feast.store.errors.type=STDERR", -// "feast.stream.type=kafka", -// "feast.stream.options={\"bootstrapServers\":\"localhost:8081\"}" -// }) -// @DirtiesContext -public class CoreApplicationTest { - // - // @Autowired - // SpecService specService; - // @Autowired - // ImportJobDefaults jobDefaults; - // @Autowired - // JobManager jobManager; - // @Autowired - // FeatureStream featureStream; - // - // @Test - // public void test_withProperties_systemServingAndWarehouseStoresRegistered() throws IOException - // { - // Files.createDirectory(Paths.get(jobDefaults.getWorkspace())); - // - // List warehouseStorageInfo = specService - // .getStorage(Collections.singletonList(DEFAULT_WAREHOUSE_ID)); - // assertEquals(warehouseStorageInfo.size(), 1); - // assertEquals(warehouseStorageInfo.get(0).getStorageSpec(), StorageSpec.newBuilder() - // .setId(DEFAULT_WAREHOUSE_ID).setType("FILE.JSON").putOptions("path", "/tmp/foobar") - // .build()); - // - // List servingStorageInfo = specService - // .getStorage(Collections.singletonList(DEFAULT_SERVING_ID)); - // assertEquals(servingStorageInfo.size(), 1); - // assertEquals(servingStorageInfo.get(0).getStorageSpec(), StorageSpec.newBuilder() - // .setId(DEFAULT_SERVING_ID).setType("REDIS") - // .putOptions("host", "localhost") - // .putOptions("port", "1234") - // .build()); - // - // ManagedChannelBuilder channelBuilder = ManagedChannelBuilder.forAddress("localhost", - // 6565); - // ManagedChannel channel = channelBuilder.usePlaintext(true).build(); - // CoreServiceGrpc.CoreServiceBlockingStub coreService = - // CoreServiceGrpc.newBlockingStub(channel); - // - // EntitySpec entitySpec = EntitySpec.newBuilder().setName("test").build(); - // FeatureSpec featureSpec = FeatureSpec.newBuilder() - // .setId("test.int64") - // .setName("int64") - // .setEntity("test") - // .setValueType(ValueType.Enum.INT64) - // .setOwner("hermione@example.com") - // .setDescription("Test is a test") - // .setUri("http://example.com/test.int64").build(); - // - // when(featureStream.generateTopicName(ArgumentMatchers.anyString())).thenReturn("my-topic"); - // when(featureStream.getType()).thenReturn("kafka"); - // - // coreService.applyEntity(entitySpec); - // - // Map args = new HashMap<>(); - // when(jobManager.startJob(any(), any())).thenAnswer((Answer) invocation -> { - // args.put(0, invocation.getArgument(0)); - // args.put(1, invocation.getArgument(1)); - // return "externalJobId1234"; - // }); - // - // coreService.applyFeature(featureSpec); - // } - // - // @TestConfiguration - // public static class MockProvider { - // - // @Bean - // public JobManager jobManager() { - // return Mockito.mock(JobManager.class); - // } - // - // @Bean - // public FeatureStream featureStream() { - // return Mockito.mock(FeatureStream.class); - // } - // } -} + +public class CoreApplicationTest {} diff --git a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java index a1b4cdbab2d..19ce0858b20 100644 --- a/core/src/test/java/feast/core/job/JobUpdateTaskTest.java +++ b/core/src/test/java/feast/core/job/JobUpdateTaskTest.java @@ -25,6 +25,7 @@ import static org.mockito.MockitoAnnotations.initMocks; import feast.core.FeatureSetProto; +import feast.core.FeatureSetProto.FeatureSetMeta; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.SourceProto.KafkaSourceConfig; @@ -60,7 +61,8 @@ public void setUp() { .setName("test") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().build()) - .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .addSubscriptions( + Subscription.newBuilder().setProject("*").setName("*").setVersion("*").build()) .build(); source = @@ -79,12 +81,22 @@ public void shouldUpdateJobIfPresent() { FeatureSetProto.FeatureSet featureSet1 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("featureSet1") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); FeatureSetProto.FeatureSet featureSet2 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("featureSet2").setVersion(1).setSource(source)) + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("featureSet2") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); Job originalJob = new Job( @@ -97,7 +109,7 @@ public void shouldUpdateJobIfPresent() { JobStatus.RUNNING); JobUpdateTask jobUpdateTask = new JobUpdateTask( - Arrays.asList(featureSet1.getSpec(), featureSet2.getSpec()), + Arrays.asList(featureSet1, featureSet2), source, store, Optional.of(originalJob), @@ -134,17 +146,17 @@ public void shouldCreateJobIfNotPresent() { FeatureSetProto.FeatureSet featureSet1 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("featureSet1") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( - Arrays.asList(featureSet1.getSpec()), - source, - store, - Optional.empty(), - jobManager, - 100L)); + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); Job expectedInput = @@ -179,7 +191,12 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { FeatureSetProto.FeatureSet featureSet1 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("featureSet1") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); Job originalJob = new Job( @@ -192,12 +209,7 @@ public void shouldUpdateJobStatusIfNotCreateOrUpdate() { JobStatus.RUNNING); JobUpdateTask jobUpdateTask = new JobUpdateTask( - Arrays.asList(featureSet1.getSpec()), - source, - store, - Optional.of(originalJob), - jobManager, - 100L); + Arrays.asList(featureSet1), source, store, Optional.of(originalJob), jobManager, 100L); when(jobManager.getJobStatus(originalJob)).thenReturn(JobStatus.ABORTING); Job expected = @@ -219,17 +231,17 @@ public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { FeatureSetProto.FeatureSet featureSet1 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source)) + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("featureSet1") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( - Arrays.asList(featureSet1.getSpec()), - source, - store, - Optional.empty(), - jobManager, - 100L)); + Arrays.asList(featureSet1), source, store, Optional.empty(), jobManager, 100L)); doReturn("job").when(jobUpdateTask).createJobId("KAFKA/servers:9092/topic", "test"); Job expectedInput = @@ -262,8 +274,17 @@ public void shouldReturnJobWithErrorStatusIfFailedToSubmit() { @Test public void shouldTimeout() { - FeatureSetSpec featureSet1 = - FeatureSetSpec.newBuilder().setName("featureSet1").setVersion(1).setSource(source).build(); + FeatureSetProto.FeatureSet featureSet1 = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("featureSet1") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) + .build(); + JobUpdateTask jobUpdateTask = spy( new JobUpdateTask( diff --git a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java index 5f72f0dd7a6..c263515ed08 100644 --- a/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java +++ b/core/src/test/java/feast/core/job/dataflow/DataflowJobManagerTest.java @@ -32,6 +32,7 @@ import com.google.protobuf.util.JsonFormat; import com.google.protobuf.util.JsonFormat.Printer; import feast.core.FeatureSetProto; +import feast.core.FeatureSetProto.FeatureSetMeta; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto; import feast.core.SourceProto.KafkaSourceConfig; @@ -92,7 +93,8 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) - .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .addSubscriptions( + Subscription.newBuilder().setProject("*").setName("*").setVersion("*").build()) .build(); SourceProto.Source source = @@ -105,12 +107,15 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { .build()) .build(); - FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder() - .setName("featureSet") - .setVersion(1) - .setSource(source) - .setMaxAge(Duration.newBuilder().build()) + FeatureSetProto.FeatureSet featureSet = + FeatureSetProto.FeatureSet.newBuilder() + .setMeta(FeatureSetMeta.newBuilder()) + .setSpec( + FeatureSetSpec.newBuilder() + .setSource(source) + .setName("featureSet") + .setVersion(1) + .setMaxAge(Duration.newBuilder().build())) .build(); Printer printer = JsonFormat.printer(); @@ -126,8 +131,8 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { expectedPipelineOptions.setAppName("DataflowJobManager"); expectedPipelineOptions.setJobName(jobName); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); - expectedPipelineOptions.setFeatureSetSpecJson( - Lists.newArrayList(printer.print(featureSetSpec))); + expectedPipelineOptions.setFeatureSetJson( + Lists.newArrayList(printer.print(featureSet.getSpec()))); ArgumentCaptor captor = ArgumentCaptor.forClass(ImportOptions.class); @@ -143,9 +148,7 @@ public void shouldStartJobWithCorrectPipelineOptions() throws IOException { Runner.DATAFLOW.getName(), Source.fromProto(source), Store.fromProto(store), - Lists.newArrayList( - FeatureSet.fromProto( - FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + Lists.newArrayList(FeatureSet.fromProto(featureSet)), JobStatus.PENDING); Job actual = dfJobManager.startJob(job); @@ -190,8 +193,15 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { .build()) .build(); - FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder().setName("featureSet").setVersion(1).setSource(source).build(); + FeatureSetProto.FeatureSet featureSet = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setName("featureSet") + .setVersion(1) + .setSource(source) + .build()) + .build(); dfJobManager = Mockito.spy(dfJobManager); @@ -207,9 +217,7 @@ public void shouldThrowExceptionWhenJobStateTerminal() throws IOException { Runner.DATAFLOW.getName(), Source.fromProto(source), Store.fromProto(store), - Lists.newArrayList( - FeatureSet.fromProto( - FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + Lists.newArrayList(FeatureSet.fromProto(featureSet)), JobStatus.PENDING); expectedException.expect(JobExecutionException.class); diff --git a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java index 73cbd9030f2..2dd87cfc6e3 100644 --- a/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java +++ b/core/src/test/java/feast/core/job/direct/DirectRunnerJobManagerTest.java @@ -86,7 +86,8 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build()) - .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .addSubscriptions( + Subscription.newBuilder().setProject("*").setName("*").setVersion("*").build()) .build(); SourceProto.Source source = @@ -99,12 +100,15 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { .build()) .build(); - FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder() - .setName("featureSet") - .setVersion(1) - .setMaxAge(Duration.newBuilder()) - .setSource(source) + FeatureSetProto.FeatureSet featureSet = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setName("featureSet") + .setVersion(1) + .setMaxAge(Duration.newBuilder()) + .setSource(source) + .build()) .build(); Printer printer = JsonFormat.printer(); @@ -117,8 +121,8 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { expectedPipelineOptions.setProject(""); expectedPipelineOptions.setStoreJson(Lists.newArrayList(printer.print(store))); expectedPipelineOptions.setProject(""); - expectedPipelineOptions.setFeatureSetSpecJson( - Lists.newArrayList(printer.print(featureSetSpec))); + expectedPipelineOptions.setFeatureSetJson( + Lists.newArrayList(printer.print(featureSet.getSpec()))); String expectedJobId = "feast-job-0"; ArgumentCaptor pipelineOptionsCaptor = @@ -135,9 +139,7 @@ public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException { Runner.DIRECT.getName(), Source.fromProto(source), Store.fromProto(store), - Lists.newArrayList( - FeatureSet.fromProto( - FeatureSetProto.FeatureSet.newBuilder().setSpec(featureSetSpec).build())), + Lists.newArrayList(FeatureSet.fromProto(featureSet)), JobStatus.PENDING); Job actual = drJobManager.startJob(job); verify(drJobManager, times(1)).runPipeline(pipelineOptionsCaptor.capture()); diff --git a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java index 5b892d30aa7..775cb028b02 100644 --- a/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java +++ b/core/src/test/java/feast/core/service/JobCoordinatorServiceTest.java @@ -30,6 +30,7 @@ import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.ListStoresResponse; import feast.core.FeatureSetProto; +import feast.core.FeatureSetProto.FeatureSetMeta; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.SourceProto.KafkaSourceConfig; import feast.core.SourceProto.Source; @@ -59,8 +60,7 @@ public class JobCoordinatorServiceTest { @Rule public final ExpectedException exception = ExpectedException.none(); - @Mock - JobRepository jobRepository; + @Mock JobRepository jobRepository; @Mock JobManager jobManager; @Mock SpecService specService; @Mock FeatureSetRepository featureSetRepository; @@ -91,12 +91,17 @@ public void shouldDoNothingIfNoMatchingFeatureSetsFound() throws InvalidProtocol .setName("test") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().build()) - .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0").build()) + .addSubscriptions( + Subscription.newBuilder().setProject("*").setName("*").setVersion("*").build()) .build(); when(specService.listStores(any())) .thenReturn(ListStoresResponse.newBuilder().addStore(store).build()); when(specService.listFeatureSets( - Filter.newBuilder().setFeatureSetName("*").setFeatureSetVersion(">0").build())) + Filter.newBuilder() + .setProject("*") + .setFeatureSetName("*") + .setFeatureSetVersion("*") + .build())) .thenReturn(ListFeatureSetsResponse.newBuilder().build()); JobCoordinatorService jcs = new JobCoordinatorService( @@ -113,7 +118,11 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().build()) .addSubscriptions( - Subscription.newBuilder().setName("features").setVersion(">0").build()) + Subscription.newBuilder() + .setProject("project1") + .setName("features") + .setVersion("*") + .build()) .build(); Source source = Source.newBuilder() @@ -128,12 +137,22 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep FeatureSetProto.FeatureSet featureSet1 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source)) + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("features") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); FeatureSetProto.FeatureSet featureSet2 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source)) + FeatureSetSpec.newBuilder() + .setSource(source) + .setProject("project1") + .setName("features") + .setVersion(2)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); String extId = "ext"; ArgumentCaptor jobArgCaptor = ArgumentCaptor.forClass(Job.class); @@ -159,7 +178,11 @@ public void shouldGenerateAndSubmitJobsIfAny() throws InvalidProtocolBufferExcep JobStatus.RUNNING); when(specService.listFeatureSets( - Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) + Filter.newBuilder() + .setProject("project1") + .setFeatureSetName("features") + .setFeatureSetVersion("*") + .build())) .thenReturn( ListFeatureSetsResponse.newBuilder() .addFeatureSets(featureSet1) @@ -188,7 +211,11 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().build()) .addSubscriptions( - Subscription.newBuilder().setName("features").setVersion(">0").build()) + Subscription.newBuilder() + .setProject("project1") + .setName("features") + .setVersion("*") + .build()) .build(); Source source1 = Source.newBuilder() @@ -212,12 +239,22 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { FeatureSetProto.FeatureSet featureSet1 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("features").setVersion(1).setSource(source1)) + FeatureSetSpec.newBuilder() + .setSource(source1) + .setProject("project1") + .setName("features") + .setVersion(1)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); FeatureSetProto.FeatureSet featureSet2 = FeatureSetProto.FeatureSet.newBuilder() .setSpec( - FeatureSetSpec.newBuilder().setName("features").setVersion(2).setSource(source2)) + FeatureSetSpec.newBuilder() + .setSource(source2) + .setProject("project1") + .setName("features") + .setVersion(2)) + .setMeta(FeatureSetMeta.newBuilder()) .build(); Job expectedInput1 = @@ -262,7 +299,11 @@ public void shouldGroupJobsBySource() throws InvalidProtocolBufferException { ArgumentCaptor jobArgCaptor = ArgumentCaptor.forClass(Job.class); when(specService.listFeatureSets( - Filter.newBuilder().setFeatureSetName("features").setFeatureSetVersion(">0").build())) + Filter.newBuilder() + .setProject("project1") + .setFeatureSetName("features") + .setFeatureSetVersion("*") + .build())) .thenReturn( ListFeatureSetsResponse.newBuilder() .addFeatureSets(featureSet1) diff --git a/core/src/test/java/feast/core/service/SpecServiceTest.java b/core/src/test/java/feast/core/service/SpecServiceTest.java index dbf1290fb61..edd99aa4940 100644 --- a/core/src/test/java/feast/core/service/SpecServiceTest.java +++ b/core/src/test/java/feast/core/service/SpecServiceTest.java @@ -18,6 +18,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -46,18 +47,20 @@ import feast.core.StoreProto.Store.StoreType; import feast.core.StoreProto.Store.Subscription; import feast.core.dao.FeatureSetRepository; +import feast.core.dao.ProjectRepository; import feast.core.dao.StoreRepository; import feast.core.exception.RetrievalException; import feast.core.model.FeatureSet; import feast.core.model.Field; +import feast.core.model.Project; import feast.core.model.Source; import feast.core.model.Store; import feast.types.ValueProto.ValueType.Enum; -import io.grpc.StatusRuntimeException; import java.sql.Date; import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; import java.util.Optional; import java.util.stream.Collectors; @@ -75,6 +78,8 @@ public class SpecServiceTest { @Mock private StoreRepository storeRepository; + @Mock private ProjectRepository projectRepository; + @Rule public final ExpectedException expectedException = ExpectedException.none(); private SpecService specService; @@ -94,17 +99,18 @@ public void setUp() { .build(), true); - FeatureSet featureSet1v1 = newDummyFeatureSet("f1", 1); - FeatureSet featureSet1v2 = newDummyFeatureSet("f1", 2); - FeatureSet featureSet1v3 = newDummyFeatureSet("f1", 3); - FeatureSet featureSet2v1 = newDummyFeatureSet("f2", 1); + FeatureSet featureSet1v1 = newDummyFeatureSet("f1", 1, "project1"); + FeatureSet featureSet1v2 = newDummyFeatureSet("f1", 2, "project1"); + FeatureSet featureSet1v3 = newDummyFeatureSet("f1", 3, "project1"); + FeatureSet featureSet2v1 = newDummyFeatureSet("f2", 1, "project1"); - Field f3f1 = new Field("f3", "f3f1", Enum.INT64); - Field f3f2 = new Field("f3", "f3f2", Enum.INT64); - Field f3e1 = new Field("f3", "f3e1", Enum.STRING); + Field f3f1 = new Field("f3f1", Enum.INT64); + Field f3f2 = new Field("f3f2", Enum.INT64); + Field f3e1 = new Field("f3e1", Enum.STRING); FeatureSet featureSet3v1 = new FeatureSet( "f3", + "project1", 1, 100L, Arrays.asList(f3e1), @@ -116,16 +122,35 @@ public void setUp() { Arrays.asList(featureSet1v1, featureSet1v2, featureSet1v3, featureSet2v1, featureSet3v1); when(featureSetRepository.findAll()).thenReturn(featureSets); when(featureSetRepository.findAllByOrderByNameAscVersionAsc()).thenReturn(featureSets); - when(featureSetRepository.findByName("f1")).thenReturn(featureSets.subList(0, 3)); - when(featureSetRepository.findByName("f3")).thenReturn(featureSets.subList(4, 5)); - when(featureSetRepository.findFirstFeatureSetByNameOrderByVersionDesc("f1")) + + when(featureSetRepository.findFeatureSetByNameAndProject_NameAndVersion("f1", "project1", 1)) + .thenReturn(featureSets.get(0)); + when(featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + "f1", "project1")) + .thenReturn(featureSets.subList(0, 3)); + when(featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + "f3", "project1")) + .thenReturn(featureSets.subList(4, 5)); + when(featureSetRepository.findFirstFeatureSetByNameLikeAndProject_NameOrderByVersionDesc( + "f1", "project1")) .thenReturn(featureSet1v3); - when(featureSetRepository.findByNameWithWildcardOrderByNameAscVersionAsc("f1")) + when(featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + "f1", "project1")) .thenReturn(featureSets.subList(0, 3)); - when(featureSetRepository.findByName("asd")).thenReturn(Lists.newArrayList()); - when(featureSetRepository.findByNameWithWildcardOrderByNameAscVersionAsc("f%")) + when(featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + "asd", "project1")) + .thenReturn(Lists.newArrayList()); + when(featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + "f%", "project1")) + .thenReturn(featureSets); + when(featureSetRepository.findAllByNameLikeAndProject_NameLikeOrderByNameAscVersionAsc( + "%", "%")) .thenReturn(featureSets); + when(projectRepository.findAllByArchivedIsFalse()) + .thenReturn(Collections.singletonList(new Project("project1"))); + when(projectRepository.findById("project1")).thenReturn(Optional.of(new Project("project1"))); + Store store1 = newDummyStore("SERVING"); Store store2 = newDummyStore("WAREHOUSE"); stores = Arrays.asList(store1, store2); @@ -133,13 +158,19 @@ public void setUp() { when(storeRepository.findById("SERVING")).thenReturn(Optional.of(store1)); when(storeRepository.findById("NOTFOUND")).thenReturn(Optional.empty()); - specService = new SpecService(featureSetRepository, storeRepository, defaultSource); + specService = + new SpecService(featureSetRepository, storeRepository, projectRepository, defaultSource); } @Test - public void shouldGetAllFeatureSetsIfNoFilterProvided() throws InvalidProtocolBufferException { + public void shouldGetAllFeatureSetsIfOnlyWildcardsProvided() { ListFeatureSetsResponse actual = - specService.listFeatureSets(Filter.newBuilder().setFeatureSetName("").build()); + specService.listFeatureSets( + Filter.newBuilder() + .setFeatureSetName("*") + .setProject("*") + .setFeatureSetVersion("*") + .build()); List list = new ArrayList<>(); for (FeatureSet featureSet : featureSets) { FeatureSetProto.FeatureSet toProto = featureSet.toProto(); @@ -151,10 +182,23 @@ public void shouldGetAllFeatureSetsIfNoFilterProvided() throws InvalidProtocolBu } @Test - public void shouldGetAllFeatureSetsMatchingNameIfNoVersionProvided() - throws InvalidProtocolBufferException { + public void listFeatureSetShouldFailIfFeatureSetProvidedWithoutProject() { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage( + "Invalid listFeatureSetRequest, missing arguments. Must provide project, feature set name, and version."); + specService.listFeatureSets( + Filter.newBuilder().setFeatureSetName("f1").setFeatureSetVersion("1").build()); + } + + @Test + public void shouldGetAllFeatureSetsMatchingNameIfWildcardVersionProvided() { ListFeatureSetsResponse actual = - specService.listFeatureSets(Filter.newBuilder().setFeatureSetName("f1").build()); + specService.listFeatureSets( + Filter.newBuilder() + .setProject("project1") + .setFeatureSetName("f1") + .setFeatureSetVersion("*") + .build()); List expectedFeatureSets = featureSets.stream().filter(fs -> fs.getName().equals("f1")).collect(Collectors.toList()); List list = new ArrayList<>(); @@ -168,10 +212,14 @@ public void shouldGetAllFeatureSetsMatchingNameIfNoVersionProvided() } @Test - public void shouldGetAllFeatureSetsMatchingNameWithWildcardSearch() - throws InvalidProtocolBufferException { + public void shouldGetAllFeatureSetsMatchingNameWithWildcardSearch() { ListFeatureSetsResponse actual = - specService.listFeatureSets(Filter.newBuilder().setFeatureSetName("f*").build()); + specService.listFeatureSets( + Filter.newBuilder() + .setProject("project1") + .setFeatureSetName("f*") + .setFeatureSetVersion("*") + .build()); List expectedFeatureSets = featureSets.stream() .filter(fs -> fs.getName().startsWith("f")) @@ -187,11 +235,14 @@ public void shouldGetAllFeatureSetsMatchingNameWithWildcardSearch() } @Test - public void shouldGetAllFeatureSetsMatchingVersionIfNoComparator() - throws InvalidProtocolBufferException { + public void shouldGetAllFeatureSetsMatchingVersionIfNoComparator() { ListFeatureSetsResponse actual = specService.listFeatureSets( - Filter.newBuilder().setFeatureSetName("f1").setFeatureSetVersion("1").build()); + Filter.newBuilder() + .setProject("project1") + .setFeatureSetName("f1") + .setFeatureSetVersion("1") + .build()); List expectedFeatureSets = featureSets.stream() .filter(fs -> fs.getName().equals("f1")) @@ -208,71 +259,74 @@ public void shouldGetAllFeatureSetsMatchingVersionIfNoComparator() } @Test - public void shouldGetAllFeatureSetsGivenVersionWithComparator() - throws InvalidProtocolBufferException { - ListFeatureSetsResponse actual = - specService.listFeatureSets( - Filter.newBuilder().setFeatureSetName("f1").setFeatureSetVersion(">1").build()); - List expectedFeatureSets = - featureSets.stream() - .filter(fs -> fs.getName().equals("f1")) - .filter(fs -> fs.getVersion() > 1) - .collect(Collectors.toList()); - List list = new ArrayList<>(); - for (FeatureSet expectedFeatureSet : expectedFeatureSets) { - FeatureSetProto.FeatureSet toProto = expectedFeatureSet.toProto(); - list.add(toProto); - } - ListFeatureSetsResponse expected = - ListFeatureSetsResponse.newBuilder().addAllFeatureSets(list).build(); - assertThat(actual, equalTo(expected)); + public void shouldThrowExceptionIfGetAllFeatureSetsGivenVersionWithComparator() { + expectedException.expect(IllegalArgumentException.class); + specService.listFeatureSets( + Filter.newBuilder() + .setProject("project1") + .setFeatureSetName("f1") + .setFeatureSetVersion(">1") + .build()); } @Test - public void shouldGetLatestFeatureSetGivenMissingVersionFilter() - throws InvalidProtocolBufferException { + public void shouldGetLatestFeatureSetGivenMissingVersionFilter() { GetFeatureSetResponse actual = - specService.getFeatureSet(GetFeatureSetRequest.newBuilder().setName("f1").build()); + specService.getFeatureSet( + GetFeatureSetRequest.newBuilder().setName("f1").setProject("project1").build()); FeatureSet expected = featureSets.get(2); assertThat(actual.getFeatureSet(), equalTo(expected.toProto())); } @Test - public void shouldGetSpecificFeatureSetGivenSpecificVersionFilter() - throws InvalidProtocolBufferException { - when(featureSetRepository.findFeatureSetByNameAndVersion("f1", 2)) + public void shouldGetSpecificFeatureSetGivenSpecificVersionFilter() { + when(featureSetRepository.findFeatureSetByNameAndProject_NameAndVersion("f1", "project1", 2)) .thenReturn(featureSets.get(1)); GetFeatureSetResponse actual = specService.getFeatureSet( - GetFeatureSetRequest.newBuilder().setName("f1").setVersion(2).build()); + GetFeatureSetRequest.newBuilder() + .setProject("project1") + .setName("f1") + .setVersion(2) + .build()); FeatureSet expected = featureSets.get(1); assertThat(actual.getFeatureSet(), equalTo(expected.toProto())); } @Test - public void shouldThrowExceptionGivenMissingFeatureSetName() - throws InvalidProtocolBufferException { - expectedException.expect(StatusRuntimeException.class); - expectedException.expectMessage("INVALID_ARGUMENT: No feature set name provided"); + public void shouldThrowExceptionGivenMissingFeatureSetName() { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("No feature set name provided"); specService.getFeatureSet(GetFeatureSetRequest.newBuilder().setVersion(2).build()); } @Test - public void shouldThrowExceptionGivenMissingFeatureSet() throws InvalidProtocolBufferException { - expectedException.expect(StatusRuntimeException.class); + public void shouldThrowExceptionGivenMissingFeatureSet() { + expectedException.expect(RetrievalException.class); expectedException.expectMessage( - "NOT_FOUND: Feature set with name \"f1000\" and version \"2\" could not be found."); + "Feature set with name \"f1000\" and version \"2\" could not be found."); specService.getFeatureSet( - GetFeatureSetRequest.newBuilder().setName("f1000").setVersion(2).build()); + GetFeatureSetRequest.newBuilder() + .setName("f1000") + .setProject("project1") + .setVersion(2) + .build()); } @Test - public void shouldThrowRetrievalExceptionGivenInvalidFeatureSetVersionComparator() - throws InvalidProtocolBufferException { - expectedException.expect(StatusRuntimeException.class); - expectedException.expectMessage("Invalid comparator '=<' provided."); + public void shouldThrowRetrievalExceptionGivenInvalidFeatureSetVersionComparator() { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage( + "Invalid listFeatureSetRequest. Version must be set to \"*\" if the project name and feature set name aren't set explicitly: \n" + + "feature_set_name: \"f1\"\n" + + "feature_set_version: \">1\"\n" + + "project: \"project1\""); specService.listFeatureSets( - Filter.newBuilder().setFeatureSetName("f1").setFeatureSetVersion("=<1").build()); + Filter.newBuilder() + .setProject("project1") + .setFeatureSetName("f1") + .setFeatureSetVersion(">1") + .build()); } @Test @@ -307,10 +361,10 @@ public void shouldThrowRetrievalExceptionIfNoStoresFoundWithName() { } @Test - public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHasNotChanged() - throws InvalidProtocolBufferException { + public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHasNotChanged() { FeatureSetSpec incomingFeatureSetSpec = featureSets.get(2).toProto().getSpec().toBuilder().clearVersion().build(); + ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet( FeatureSetProto.FeatureSet.newBuilder().setSpec(incomingFeatureSetSpec).build()); @@ -321,54 +375,90 @@ public void applyFeatureSetShouldReturnFeatureSetWithLatestVersionIfFeatureSetHa } @Test - public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() - throws InvalidProtocolBufferException { - when(featureSetRepository.findByName("f2")).thenReturn(Lists.newArrayList()); - FeatureSetSpec incomingFeatureSetSpec = - newDummyFeatureSet("f2", 1).toProto().getSpec().toBuilder().clearVersion().build(); + public void applyFeatureSetShouldApplyFeatureSetWithInitVersionIfNotExists() { + when(featureSetRepository.findAllByNameLikeAndProject_NameOrderByNameAscVersionAsc( + "f2", "project1")) + .thenReturn(Lists.newArrayList()); + + FeatureSetProto.FeatureSet incomingFeatureSet = + newDummyFeatureSet("f2", 1, "project1").toProto(); + + FeatureSetProto.FeatureSetSpec incomingFeatureSetSpec = + incomingFeatureSet.getSpec().toBuilder().clearVersion().build(); ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet( - FeatureSetProto.FeatureSet.newBuilder().setSpec(incomingFeatureSetSpec).build()); - verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); - FeatureSetSpec expected = - incomingFeatureSetSpec.toBuilder().setVersion(1).setSource(defaultSource.toProto()).build(); + FeatureSetProto.FeatureSet.newBuilder().setSpec(incomingFeatureSet.getSpec()).build()); + verify(projectRepository).saveAndFlush(ArgumentMatchers.any(Project.class)); + + FeatureSetProto.FeatureSet expected = + FeatureSetProto.FeatureSet.newBuilder() + .setSpec( + incomingFeatureSetSpec + .toBuilder() + .setVersion(1) + .setSource(defaultSource.toProto()) + .build()) + .build(); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); - assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); + assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected.getSpec())); + assertThat( + applyFeatureSetResponse.getFeatureSet().getSpec().getVersion(), + equalTo(expected.getSpec().getVersion())); } @Test - public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() - throws InvalidProtocolBufferException { - FeatureSetSpec incomingFeatureSet = - featureSets - .get(2) - .toProto() - .getSpec() + public void applyFeatureSetShouldIncrementFeatureSetVersionIfAlreadyExists() { + FeatureSetProto.FeatureSet incomingFeatureSet = featureSets.get(2).toProto(); + incomingFeatureSet = + incomingFeatureSet + .toBuilder() + .setMeta(incomingFeatureSet.getMeta()) + .setSpec( + incomingFeatureSet + .getSpec() + .toBuilder() + .clearVersion() + .addFeatures( + FeatureSpec.newBuilder().setName("feature2").setValueType(Enum.STRING)) + .build()) + .build(); + + FeatureSetProto.FeatureSet expected = + incomingFeatureSet .toBuilder() - .clearVersion() - .addFeatures(FeatureSpec.newBuilder().setName("feature2").setValueType(Enum.STRING)) + .setMeta(incomingFeatureSet.getMeta().toBuilder().build()) + .setSpec( + incomingFeatureSet + .getSpec() + .toBuilder() + .setVersion(4) + .setSource(defaultSource.toProto()) + .build()) .build(); - FeatureSetSpec expected = - incomingFeatureSet.toBuilder().setVersion(4).setSource(defaultSource.toProto()).build(); + ApplyFeatureSetResponse applyFeatureSetResponse = - specService.applyFeatureSet( - FeatureSetProto.FeatureSet.newBuilder().setSpec(expected).build()); - verify(featureSetRepository).saveAndFlush(ArgumentMatchers.any(FeatureSet.class)); + specService.applyFeatureSet(incomingFeatureSet); + verify(projectRepository).saveAndFlush(ArgumentMatchers.any(Project.class)); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.CREATED)); - assertThat(applyFeatureSetResponse.getFeatureSet().getSpec(), equalTo(expected)); + assertEquals( + FeatureSet.fromProto(applyFeatureSetResponse.getFeatureSet()), + FeatureSet.fromProto(expected)); + assertThat( + applyFeatureSetResponse.getFeatureSet().getSpec().getVersion(), + equalTo(expected.getSpec().getVersion())); } @Test - public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() - throws InvalidProtocolBufferException { + public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() { - Field f3f1 = new Field("f3", "f3f1", Enum.INT64); - Field f3f2 = new Field("f3", "f3f2", Enum.INT64); - Field f3e1 = new Field("f3", "f3e1", Enum.STRING); + Field f3f1 = new Field("f3f1", Enum.INT64); + Field f3f2 = new Field("f3f2", Enum.INT64); + Field f3e1 = new Field("f3e1", Enum.STRING); FeatureSetProto.FeatureSet incomingFeatureSet = (new FeatureSet( "f3", + "project1", 5, 100L, Arrays.asList(f3e1), @@ -377,19 +467,18 @@ public void applyFeatureSetShouldNotCreateFeatureSetIfFieldsUnordered() FeatureSetStatus.STATUS_READY)) .toProto(); - FeatureSetProto.FeatureSet expected = incomingFeatureSet; ApplyFeatureSetResponse applyFeatureSetResponse = specService.applyFeatureSet(incomingFeatureSet); assertThat(applyFeatureSetResponse.getStatus(), equalTo(Status.NO_CHANGE)); assertThat( applyFeatureSetResponse.getFeatureSet().getSpec().getMaxAge(), - equalTo(expected.getSpec().getMaxAge())); + equalTo(incomingFeatureSet.getSpec().getMaxAge())); assertThat( applyFeatureSetResponse.getFeatureSet().getSpec().getEntities(0), - equalTo(expected.getSpec().getEntities(0))); + equalTo(incomingFeatureSet.getSpec().getEntities(0))); assertThat( applyFeatureSetResponse.getFeatureSet().getSpec().getName(), - equalTo(expected.getSpec().getName())); + equalTo(incomingFeatureSet.getSpec().getName())); } @Test @@ -400,7 +489,8 @@ public void shouldUpdateStoreIfConfigChanges() throws InvalidProtocolBufferExcep .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder()) - .addSubscriptions(Subscription.newBuilder().setName("a").setVersion(">1")) + .addSubscriptions( + Subscription.newBuilder().setProject("project1").setName("a").setVersion(">1")) .build(); UpdateStoreResponse actual = specService.updateStore(UpdateStoreRequest.newBuilder().setStore(newStore).build()); @@ -430,12 +520,20 @@ public void shouldDoNothingIfNoChange() throws InvalidProtocolBufferException { assertThat(actual, equalTo(expected)); } - private FeatureSet newDummyFeatureSet(String name, int version) { - Field feature = new Field(name, "feature", Enum.INT64); - Field entity = new Field(name, "entity", Enum.STRING); + @Test + public void shouldFailIfGetFeatureSetWithoutProject() { + expectedException.expect(IllegalArgumentException.class); + expectedException.expectMessage("No project provided"); + specService.getFeatureSet(GetFeatureSetRequest.newBuilder().setName("f1").build()); + } + + private FeatureSet newDummyFeatureSet(String name, int version, String project) { + Field feature = new Field("feature", Enum.INT64); + Field entity = new Field("entity", Enum.STRING); FeatureSet fs = new FeatureSet( name, + project, version, 100L, Arrays.asList(entity), @@ -451,7 +549,7 @@ private Store newDummyStore(String name) { Store store = new Store(); store.setName(name); store.setType(StoreType.REDIS.toString()); - store.setSubscriptions(""); + store.setSubscriptions("*:*:*"); store.setConfig(RedisConfig.newBuilder().setPort(6379).build().toByteArray()); return store; } diff --git a/infra/charts/feast/charts/feast-serving/values.yaml b/infra/charts/feast/charts/feast-serving/values.yaml index 857c3fcfd50..d489a48748d 100644 --- a/infra/charts/feast/charts/feast-serving/values.yaml +++ b/infra/charts/feast/charts/feast-serving/values.yaml @@ -90,8 +90,9 @@ application.yaml: # host: localhost # port: 6379 # subscriptions: -# - name: "*" -# version: ">0" +# - project: "*" +# name: "*" +# version: "*" # # store.yaml: # name: bigquery @@ -100,8 +101,9 @@ application.yaml: # project_id: PROJECT_ID # dataset_id: DATASET_ID # subscriptions: -# - name: "*" -# version: ">0" +# - project: "*" +# name: "*" +# version: "*" # springConfigMountPath is the directory path where application.yaml and # store.yaml will be mounted in the container. diff --git a/infra/charts/feast/values-demo.yaml b/infra/charts/feast/values-demo.yaml index 9212070eb5d..fad4bc0afb0 100644 --- a/infra/charts/feast/values-demo.yaml +++ b/infra/charts/feast/values-demo.yaml @@ -64,7 +64,8 @@ feast-serving-online: type: REDIS subscriptions: - name: "*" - version: ">0" + project: "*" + version: "*" feast-serving-batch: enabled: false diff --git a/infra/charts/feast/values.yaml b/infra/charts/feast/values.yaml index fd75a3fce6f..ebc8c802a16 100644 --- a/infra/charts/feast/values.yaml +++ b/infra/charts/feast/values.yaml @@ -124,7 +124,8 @@ feast-serving-online: port: 6379 subscriptions: - name: "*" - version: ">0" + project: "*" + version: "*" # ============================================================ # Feast Serving Batch @@ -200,4 +201,5 @@ feast-serving-batch: dataset_id: DATASET_ID subscriptions: - name: "*" - version: ">0" + project: "*" + version: "*" diff --git a/infra/docker-compose/serving/bq-store.yml b/infra/docker-compose/serving/bq-store.yml index bb912819310..cdebee3497f 100644 --- a/infra/docker-compose/serving/bq-store.yml +++ b/infra/docker-compose/serving/bq-store.yml @@ -4,5 +4,6 @@ bigquery_config: project_id: dataset_id: subscriptions: - - name: "*" - version: ">0" \ No newline at end of file +- name: "*" + project: "*" + version: "*" \ No newline at end of file diff --git a/infra/docker-compose/serving/redis-store.yml b/infra/docker-compose/serving/redis-store.yml index 600a2e2e4f0..16870be94d7 100644 --- a/infra/docker-compose/serving/redis-store.yml +++ b/infra/docker-compose/serving/redis-store.yml @@ -4,5 +4,6 @@ redis_config: host: redis port: 6379 subscriptions: - - name: "*" - version: ">0" +- name: "*" + project: "*" + version: "*" diff --git a/ingestion/example/core_specs/entity/product.json b/ingestion/example/core_specs/entity/product.json deleted file mode 100644 index c8cb1177a5d..00000000000 --- a/ingestion/example/core_specs/entity/product.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "name": "product", - "description": "This entity capture features for products, keys for this entity are product ids", - "tags": [] -} \ No newline at end of file diff --git a/ingestion/example/core_specs/entity/user.json b/ingestion/example/core_specs/entity/user.json deleted file mode 100644 index b1864fe4afd..00000000000 --- a/ingestion/example/core_specs/entity/user.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "name": "user", - "description": "This entity capture features for users, keys for this entity are user ids", - "tags": [] -} \ No newline at end of file diff --git a/ingestion/example/core_specs/feature/product.day.completed_orders.json b/ingestion/example/core_specs/feature/product.day.completed_orders.json deleted file mode 100644 index a4582848e02..00000000000 --- a/ingestion/example/core_specs/feature/product.day.completed_orders.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "id": "product.completed_orders", - "entity": "product", - "name": "completed_orders", - "owner": "feast@example.com", - "description": "This feature represents a product's completed orders per day", - "uri": "https://example.com/", - "valueType": "INT32", - "tags": [], - "options": {}, - "dataStores": { - "serving": { - "id": "example_serving" - }, - "warehouse": { - "id": "example_warehouse" - } - } -} \ No newline at end of file diff --git a/ingestion/example/core_specs/feature/user.none.age.json b/ingestion/example/core_specs/feature/user.none.age.json deleted file mode 100644 index 9b8f8a68cf6..00000000000 --- a/ingestion/example/core_specs/feature/user.none.age.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "id": "user.age", - "entity": "user", - "name": "age", - "owner": "feast@example.com", - "description": "This feature represents a user's age", - "uri": "https://example.com/", - "valueType": "INT32", - "tags": [], - "options": {}, - "dataStores": { - "serving": { - "id": "example_serving" - }, - "warehouse": { - "id": "example_warehouse" - } - } -} \ No newline at end of file diff --git a/ingestion/example/core_specs/feature/user.none.completed_orders.json b/ingestion/example/core_specs/feature/user.none.completed_orders.json deleted file mode 100644 index 0280cf74831..00000000000 --- a/ingestion/example/core_specs/feature/user.none.completed_orders.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "id": "user.completed_orders", - "entity": "user", - "name": "completed_orders", - "owner": "feast@example.com", - "description": "This feature represents a user's total completed orders", - "uri": "https://example.com/", - "valueType": "INT32", - "tags": [], - "options": {}, - "dataStores": { - "serving": { - "id": "example_serving" - }, - "warehouse": { - "id": "example_warehouse" - } - } -} diff --git a/ingestion/example/core_specs/storage/example_errors.json b/ingestion/example/core_specs/storage/example_errors.json deleted file mode 100644 index f5f0be752d8..00000000000 --- a/ingestion/example/core_specs/storage/example_errors.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "id": "example_errors", - "type": "file.json", - "options": { - "path": "output/errors/data" - } -} \ No newline at end of file diff --git a/ingestion/example/core_specs/storage/example_serving.json b/ingestion/example/core_specs/storage/example_serving.json deleted file mode 100644 index 49224898a90..00000000000 --- a/ingestion/example/core_specs/storage/example_serving.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "id": "example_serving", - "type": "file.json", - "options": { - "path": "output/serving/data" - } -} \ No newline at end of file diff --git a/ingestion/example/core_specs/storage/example_warehouse.json b/ingestion/example/core_specs/storage/example_warehouse.json deleted file mode 100644 index baa5b5a055f..00000000000 --- a/ingestion/example/core_specs/storage/example_warehouse.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "id": "example_warehouse", - "type": "file.json", - "options": { - "path": "output/warehouse/data" - } -} \ No newline at end of file diff --git a/ingestion/example/import_products.yaml b/ingestion/example/import_products.yaml deleted file mode 100644 index cb52580cf0d..00000000000 --- a/ingestion/example/import_products.yaml +++ /dev/null @@ -1,15 +0,0 @@ ---- -type: file -options: - format: csv - path: sample_data/daily_products.csv -entities: - - product -schema: - entityIdColumn: id - timestampColumn: timestamp - fields: - - name: id - - name: timestamp - - featureId: product.completed_orders - diff --git a/ingestion/example/import_users.yaml b/ingestion/example/import_users.yaml deleted file mode 100644 index 5e2d6e553ac..00000000000 --- a/ingestion/example/import_users.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -type: file -options: - format: csv - path: sample_data/users.csv -entities: - - user -schema: - entityIdColumn: id - timestampValue: 2018-10-25T00:00:00.000Z - fields: - - name: id - - name: name - - featureId: user.age - - featureId: user.completed_orders - diff --git a/ingestion/example/sample_data/daily_products.csv b/ingestion/example/sample_data/daily_products.csv deleted file mode 100644 index 66939b7c190..00000000000 --- a/ingestion/example/sample_data/daily_products.csv +++ /dev/null @@ -1,5 +0,0 @@ -1,2018-10-25,7 -1,2018-10-26,6 -2,2018-10-26,1 -3,2018-10-25,15 -3,2018-10-26,13 diff --git a/ingestion/example/sample_data/users.csv b/ingestion/example/sample_data/users.csv deleted file mode 100644 index f4eda616a25..00000000000 --- a/ingestion/example/sample_data/users.csv +++ /dev/null @@ -1,3 +0,0 @@ -1,Tim,37,13 -2,Aria,28,12 -2,Zhiling,28,14 \ No newline at end of file diff --git a/ingestion/src/main/java/feast/ingestion/ImportJob.java b/ingestion/src/main/java/feast/ingestion/ImportJob.java index fb719120d40..41af5f9bb40 100644 --- a/ingestion/src/main/java/feast/ingestion/ImportJob.java +++ b/ingestion/src/main/java/feast/ingestion/ImportJob.java @@ -16,8 +16,10 @@ */ package feast.ingestion; +import static feast.ingestion.utils.SpecUtil.getFeatureSetReference; + import com.google.protobuf.InvalidProtocolBufferException; -import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.SourceProto.Source; import feast.core.StoreProto.Store; import feast.ingestion.options.ImportOptions; @@ -31,16 +33,15 @@ import feast.ingestion.utils.StoreUtil; import feast.ingestion.values.FailedElement; import feast.types.FeatureRowProto.FeatureRow; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; import org.apache.beam.sdk.Pipeline; import org.apache.beam.sdk.PipelineResult; import org.apache.beam.sdk.options.PipelineOptionsFactory; import org.apache.beam.sdk.options.PipelineOptionsValidator; import org.apache.beam.sdk.values.PCollectionTuple; import org.apache.beam.sdk.values.TupleTag; -import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; public class ImportJob { @@ -79,26 +80,25 @@ public static PipelineResult runPipeline(ImportOptions options) log.info("Starting import job with settings: \n{}", options.toString()); - List featureSetSpecs = - SpecUtil.parseFeatureSetSpecJsonList(options.getFeatureSetSpecJson()); + List featureSets = + SpecUtil.parseFeatureSetSpecJsonList(options.getFeatureSetJson()); List stores = SpecUtil.parseStoreJsonList(options.getStoreJson()); for (Store store : stores) { - List subscribedFeatureSets = - SpecUtil.getSubscribedFeatureSets(store.getSubscriptionsList(), featureSetSpecs); + List subscribedFeatureSets = + SpecUtil.getSubscribedFeatureSets(store.getSubscriptionsList(), featureSets); // Generate tags by key - Map featureSetSpecsByKey = - subscribedFeatureSets.stream() - .map( - fs -> { - String id = String.format("%s:%s", fs.getName(), fs.getVersion()); - return Pair.of(id, fs); - }) - .collect(Collectors.toMap(Pair::getLeft, Pair::getRight)); + Map featureSetsByKey = new HashMap<>(); + subscribedFeatureSets.stream() + .forEach( + fs -> { + String ref = getFeatureSetReference(fs); + featureSetsByKey.put(ref, fs); + }); // TODO: make the source part of the job initialisation options - Source source = subscribedFeatureSets.get(0).getSource(); + Source source = subscribedFeatureSets.get(0).getSpec().getSource(); // Step 1. Read messages from Feast Source as FeatureRow. PCollectionTuple convertedFeatureRows = @@ -110,7 +110,7 @@ public static PipelineResult runPipeline(ImportOptions options) .setFailureTag(DEADLETTER_OUT) .build()); - for (FeatureSetSpec featureSet : subscribedFeatureSets) { + for (FeatureSet featureSet : subscribedFeatureSets) { // Ensure Store has valid configuration and Feast can access it. StoreUtil.setupStore(store, featureSet); } @@ -121,7 +121,7 @@ public static PipelineResult runPipeline(ImportOptions options) .get(FEATURE_ROW_OUT) .apply( ValidateFeatureRows.newBuilder() - .setFeatureSetSpecs(featureSetSpecsByKey) + .setFeatureSets(featureSetsByKey) .setSuccessTag(FEATURE_ROW_OUT) .setFailureTag(DEADLETTER_OUT) .build()); @@ -131,10 +131,7 @@ public static PipelineResult runPipeline(ImportOptions options) .get(FEATURE_ROW_OUT) .apply( "WriteFeatureRowToStore", - WriteToStore.newBuilder() - .setFeatureSetSpecs(featureSetSpecsByKey) - .setStore(store) - .build()); + WriteToStore.newBuilder().setFeatureSets(featureSetsByKey).setStore(store).build()); // Step 4. Write FailedElements to a dead letter table in BigQuery. if (options.getDeadLetterTableSpec() != null) { diff --git a/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java b/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java index 21df87e4b27..b299bb47e55 100644 --- a/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java +++ b/ingestion/src/main/java/feast/ingestion/options/ImportOptions.java @@ -28,16 +28,16 @@ public interface ImportOptions extends PipelineOptions, DataflowPipelineOptions, DirectOptions { @Required @Description( - "JSON string representation of the FeatureSetSpec that the import job will process." - + "FeatureSetSpec follows the format in feast.core.FeatureSet proto." - + "Mutliple FeatureSetSpec can be passed by specifying '--featureSetSpec={...}' multiple times" + "JSON string representation of the FeatureSet that the import job will process." + + "FeatureSet follows the format in feast.core.FeatureSet proto." + + "Mutliple FeatureSetSpec can be passed by specifying '--featureSet={...}' multiple times" + "The conversion of Proto message to JSON should follow this mapping:" + "https://developers.google.com/protocol-buffers/docs/proto3#json" + "Please minify and remove all insignificant whitespace such as newline in the JSON string" + "to prevent error when parsing the options") - List getFeatureSetSpecJson(); + List getFeatureSetJson(); - void setFeatureSetSpecJson(List featureSetSpecJson); + void setFeatureSetJson(List featureSetJson); @Required @Description( diff --git a/ingestion/src/main/java/feast/ingestion/transform/ValidateFeatureRows.java b/ingestion/src/main/java/feast/ingestion/transform/ValidateFeatureRows.java index 19b1f1f8605..5ca6a710f62 100644 --- a/ingestion/src/main/java/feast/ingestion/transform/ValidateFeatureRows.java +++ b/ingestion/src/main/java/feast/ingestion/transform/ValidateFeatureRows.java @@ -20,7 +20,7 @@ import feast.core.FeatureSetProto; import feast.ingestion.transform.fn.ValidateFeatureRowDoFn; import feast.ingestion.values.FailedElement; -import feast.ingestion.values.FeatureSetSpec; +import feast.ingestion.values.FeatureSet; import feast.types.FeatureRowProto.FeatureRow; import java.util.Map; import java.util.stream.Collectors; @@ -36,7 +36,7 @@ public abstract class ValidateFeatureRows extends PTransform, PCollectionTuple> { - public abstract Map getFeatureSetSpecs(); + public abstract Map getFeatureSets(); public abstract TupleTag getSuccessTag(); @@ -49,8 +49,7 @@ public static Builder newBuilder() { @AutoValue.Builder public abstract static class Builder { - public abstract Builder setFeatureSetSpecs( - Map featureSetSpec); + public abstract Builder setFeatureSets(Map featureSets); public abstract Builder setSuccessTag(TupleTag successTag); @@ -62,16 +61,16 @@ public abstract Builder setFeatureSetSpecs( @Override public PCollectionTuple expand(PCollection input) { - Map featureSetSpecs = - getFeatureSetSpecs().entrySet().stream() - .map(e -> Pair.of(e.getKey(), new FeatureSetSpec(e.getValue()))) + Map featureSets = + getFeatureSets().entrySet().stream() + .map(e -> Pair.of(e.getKey(), new FeatureSet(e.getValue()))) .collect(Collectors.toMap(Pair::getLeft, Pair::getRight)); return input.apply( "ValidateFeatureRows", ParDo.of( ValidateFeatureRowDoFn.newBuilder() - .setFeatureSetSpecs(featureSetSpecs) + .setFeatureSets(featureSets) .setSuccessTag(getSuccessTag()) .setFailureTag(getFailureTag()) .build()) diff --git a/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java b/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java index 2e3a0a5ddee..6aed943931a 100644 --- a/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java +++ b/ingestion/src/main/java/feast/ingestion/transform/WriteToStore.java @@ -19,7 +19,7 @@ import com.google.api.services.bigquery.model.TableDataInsertAllResponse.InsertErrors; import com.google.api.services.bigquery.model.TableRow; import com.google.auto.value.AutoValue; -import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.BigQueryConfig; import feast.core.StoreProto.Store.RedisConfig; @@ -65,7 +65,7 @@ public abstract class WriteToStore extends PTransform, P public abstract Store getStore(); - public abstract Map getFeatureSetSpecs(); + public abstract Map getFeatureSets(); public static Builder newBuilder() { return new AutoValue_WriteToStore.Builder(); @@ -76,7 +76,7 @@ public abstract static class Builder { public abstract Builder setStore(Store store); - public abstract Builder setFeatureSetSpecs(Map featureSetSpecs); + public abstract Builder setFeatureSets(Map featureSets); public abstract WriteToStore build(); } @@ -92,7 +92,7 @@ public PDone expand(PCollection input) { input .apply( "FeatureRowToRedisMutation", - ParDo.of(new FeatureRowToRedisMutationDoFn(getFeatureSetSpecs()))) + ParDo.of(new FeatureRowToRedisMutationDoFn(getFeatureSets()))) .apply( "WriteRedisMutationToRedis", RedisCustomIO.write(redisConfig.getHost(), redisConfig.getPort())); diff --git a/ingestion/src/main/java/feast/ingestion/transform/fn/ValidateFeatureRowDoFn.java b/ingestion/src/main/java/feast/ingestion/transform/fn/ValidateFeatureRowDoFn.java index eeb5ce67327..dfbb48fc85d 100644 --- a/ingestion/src/main/java/feast/ingestion/transform/fn/ValidateFeatureRowDoFn.java +++ b/ingestion/src/main/java/feast/ingestion/transform/fn/ValidateFeatureRowDoFn.java @@ -18,8 +18,7 @@ import com.google.auto.value.AutoValue; import feast.ingestion.values.FailedElement; -import feast.ingestion.values.FailedElement.Builder; -import feast.ingestion.values.FeatureSetSpec; +import feast.ingestion.values.FeatureSet; import feast.ingestion.values.Field; import feast.types.FeatureRowProto.FeatureRow; import feast.types.FieldProto; @@ -31,7 +30,7 @@ @AutoValue public abstract class ValidateFeatureRowDoFn extends DoFn { - public abstract Map getFeatureSetSpecs(); + public abstract Map getFeatureSets(); public abstract TupleTag getSuccessTag(); @@ -44,7 +43,7 @@ public static Builder newBuilder() { @AutoValue.Builder public abstract static class Builder { - public abstract Builder setFeatureSetSpecs(Map featureSetSpecs); + public abstract Builder setFeatureSets(Map featureSets); public abstract Builder setSuccessTag(TupleTag successTag); @@ -57,17 +56,16 @@ public abstract static class Builder { public void processElement(ProcessContext context) { String error = null; FeatureRow featureRow = context.element(); - FeatureSetSpec featureSetSpec = - getFeatureSetSpecs().getOrDefault(featureRow.getFeatureSet(), null); - if (featureSetSpec != null) { + FeatureSet featureSet = getFeatureSets().getOrDefault(featureRow.getFeatureSet(), null); + if (featureSet != null) { for (FieldProto.Field field : featureRow.getFieldsList()) { - Field fieldSpec = featureSetSpec.getField(field.getName()); + Field fieldSpec = featureSet.getField(field.getName()); if (fieldSpec == null) { error = String.format( "FeatureRow contains field '%s' which do not exists in FeatureSet '%s' version '%d'. Please check the FeatureRow data.", - field.getName(), featureSetSpec.getId()); + field.getName(), featureSet.getReference()); break; } // If value is set in the FeatureRow, make sure the value type matches @@ -98,9 +96,14 @@ public void processElement(ProcessContext context) { .setJobName(context.getPipelineOptions().getJobName()) .setPayload(featureRow.toString()) .setErrorMessage(error); - if (featureSetSpec != null) { - String[] split = featureSetSpec.getId().split(":"); - failedElement = failedElement.setFeatureSetName(split[0]).setFeatureSetVersion(split[1]); + if (featureSet != null) { + String[] split = featureSet.getReference().split(":"); + String[] nameSplit = split[0].split("/"); + failedElement = + failedElement + .setProjectName(nameSplit[0]) + .setFeatureSetName(nameSplit[1]) + .setFeatureSetVersion(split[1]); } context.output(getFailureTag(), failedElement.build()); } else { diff --git a/ingestion/src/main/java/feast/ingestion/transform/metrics/WriteDeadletterRowMetricsDoFn.java b/ingestion/src/main/java/feast/ingestion/transform/metrics/WriteDeadletterRowMetricsDoFn.java index 452bfb23775..687670c5cf0 100644 --- a/ingestion/src/main/java/feast/ingestion/transform/metrics/WriteDeadletterRowMetricsDoFn.java +++ b/ingestion/src/main/java/feast/ingestion/transform/metrics/WriteDeadletterRowMetricsDoFn.java @@ -33,6 +33,7 @@ public abstract class WriteDeadletterRowMetricsDoFn extends DoFn { private final String METRIC_PREFIX = "feast_ingestion"; private final String STORE_TAG_KEY = "feast_store"; + private final String FEATURE_SET_PROJECT_TAG_KEY = "feast_project_name"; private final String FEATURE_SET_NAME_TAG_KEY = "feast_featureSet_name"; private final String FEATURE_SET_VERSION_TAG_KEY = "feast_featureSet_version"; private final String FEATURE_TAG_KEY = "feast_feature_name"; @@ -46,10 +46,7 @@ public abstract class WriteRowMetricsDoFn extends DoFn { public abstract int getStatsdPort(); public static WriteRowMetricsDoFn create( - String newStoreName, - FeatureSetSpec newFeatureSetSpec, - String newStatsdHost, - int newStatsdPort) { + String newStoreName, String newStatsdHost, int newStatsdPort) { return newBuilder() .setStoreName(newStoreName) .setStatsdHost(newStatsdHost) @@ -88,13 +85,15 @@ public void processElement(ProcessContext c) { long eventTimestamp = com.google.protobuf.util.Timestamps.toMillis(row.getEventTimestamp()); String[] split = row.getFeatureSet().split(":"); - String featureSetName = split[0]; + String featureSetProject = split[0].split("/")[0]; + String featureSetName = split[0].split("/")[1]; String featureSetVersion = split[1]; statsd.histogram( "feature_row_lag_ms", System.currentTimeMillis() - eventTimestamp, STORE_TAG_KEY + ":" + getStoreName(), + FEATURE_SET_PROJECT_TAG_KEY + ":" + featureSetProject, FEATURE_SET_NAME_TAG_KEY + ":" + featureSetName, FEATURE_SET_VERSION_TAG_KEY + ":" + featureSetVersion, INGESTION_JOB_NAME_KEY + ":" + c.getPipelineOptions().getJobName()); @@ -103,6 +102,7 @@ public void processElement(ProcessContext c) { "feature_row_event_time_epoch_ms", eventTimestamp, STORE_TAG_KEY + ":" + getStoreName(), + FEATURE_SET_PROJECT_TAG_KEY + ":" + featureSetProject, FEATURE_SET_NAME_TAG_KEY + ":" + featureSetName, FEATURE_SET_VERSION_TAG_KEY + ":" + featureSetVersion, INGESTION_JOB_NAME_KEY + ":" + c.getPipelineOptions().getJobName()); @@ -113,6 +113,7 @@ public void processElement(ProcessContext c) { "feature_value_lag_ms", System.currentTimeMillis() - eventTimestamp, STORE_TAG_KEY + ":" + getStoreName(), + FEATURE_SET_PROJECT_TAG_KEY + ":" + featureSetProject, FEATURE_SET_NAME_TAG_KEY + ":" + featureSetName, FEATURE_SET_VERSION_TAG_KEY + ":" + featureSetVersion, FEATURE_TAG_KEY + ":" + field.getName(), @@ -122,6 +123,7 @@ public void processElement(ProcessContext c) { "feature_value_missing_count", 1, STORE_TAG_KEY + ":" + getStoreName(), + FEATURE_SET_PROJECT_TAG_KEY + ":" + featureSetProject, FEATURE_SET_NAME_TAG_KEY + ":" + featureSetName, FEATURE_SET_VERSION_TAG_KEY + ":" + featureSetVersion, FEATURE_TAG_KEY + ":" + field.getName(), @@ -133,6 +135,7 @@ public void processElement(ProcessContext c) { "feature_row_ingested_count", 1, STORE_TAG_KEY + ":" + getStoreName(), + FEATURE_SET_PROJECT_TAG_KEY + ":" + featureSetProject, FEATURE_SET_NAME_TAG_KEY + ":" + featureSetName, FEATURE_SET_VERSION_TAG_KEY + ":" + featureSetVersion, INGESTION_JOB_NAME_KEY + ":" + c.getPipelineOptions().getJobName()); diff --git a/ingestion/src/main/java/feast/ingestion/utils/SpecUtil.java b/ingestion/src/main/java/feast/ingestion/utils/SpecUtil.java index 132a2e93bf8..9163c5b2d6f 100644 --- a/ingestion/src/main/java/feast/ingestion/utils/SpecUtil.java +++ b/ingestion/src/main/java/feast/ingestion/utils/SpecUtil.java @@ -19,6 +19,7 @@ import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSpec; import feast.core.StoreProto.Store; @@ -32,12 +33,45 @@ public class SpecUtil { + public static String getFeatureSetReference(FeatureSet featureSet) { + FeatureSetSpec spec = featureSet.getSpec(); + return String.format("%s/%s:%d", spec.getProject(), spec.getName(), spec.getVersion()); + } + /** Get only feature set specs that matches the subscription */ - public static List getSubscribedFeatureSets( - List subscriptions, List featureSetSpecs) { - List subscribed = new ArrayList<>(); - for (FeatureSetSpec featureSet : featureSetSpecs) { + public static List getSubscribedFeatureSets( + List subscriptions, List featureSets) { + List subscribed = new ArrayList<>(); + for (FeatureSet featureSet : featureSets) { for (Subscription sub : subscriptions) { + // If configuration missing, fail + if (sub.getProject().isEmpty() || sub.getName().isEmpty() || sub.getVersion().isEmpty()) { + throw new IllegalArgumentException( + String.format("Subscription is missing arguments: %s", sub.toString())); + } + + // If all wildcards, subscribe to everything + if (sub.getProject().equals("*") + || sub.getName().equals("*") + || sub.getVersion().equals("*")) { + subscribed.add(featureSet); + break; + } + + // If all wildcards, subscribe to everything + if (sub.getProject().equals("*") + && (!sub.getName().equals("*") || !sub.getVersion().equals("*"))) { + throw new IllegalArgumentException( + String.format( + "Subscription cannot have feature set name and/or version set if project is not defined: %s", + sub.toString())); + } + + // Match project name + if (!featureSet.getSpec().getProject().equals(sub.getProject())) { + continue; + } + // Convert wildcard to regex String subName = sub.getName(); if (!sub.getName().contains(".*")) { @@ -46,25 +80,25 @@ public static List getSubscribedFeatureSets( // Match feature set name to pattern Pattern pattern = Pattern.compile(subName); - if (!pattern.matcher(featureSet.getName()).matches()) { + if (!pattern.matcher(featureSet.getSpec().getName()).matches()) { continue; } - // If version is empty, match all - if (sub.getVersion().isEmpty()) { + // If version is '*', match all + if (sub.getVersion().equals("*")) { subscribed.add(featureSet); break; - } else if (sub.getVersion().startsWith(">") && sub.getVersion().length() > 1) { - // if version starts with >, match only those greater than the version number - int lowerBoundIncl = Integer.parseInt(sub.getVersion().substring(1)); - if (featureSet.getVersion() >= lowerBoundIncl) { - subscribed.add(featureSet); - break; - } + } else if (sub.getVersion().equals("latest")) { + // if version is "latest" + throw new RuntimeException( + String.format( + "Support for latest feature set subscription has not been implemented yet: %s", + sub.toString())); + } else { // If a specific version, match that version alone int version = Integer.parseInt(sub.getVersion()); - if (featureSet.getVersion() == version) { + if (featureSet.getSpec().getVersion() == version) { subscribed.add(featureSet); break; } @@ -74,15 +108,15 @@ public static List getSubscribedFeatureSets( return subscribed; } - public static List parseFeatureSetSpecJsonList(List jsonList) + public static List parseFeatureSetSpecJsonList(List jsonList) throws InvalidProtocolBufferException { - List featureSetSpecs = new ArrayList<>(); + List featureSets = new ArrayList<>(); for (String json : jsonList) { FeatureSetSpec.Builder builder = FeatureSetSpec.newBuilder(); JsonFormat.parser().merge(json, builder); - featureSetSpecs.add(builder.build()); + featureSets.add(FeatureSet.newBuilder().setSpec(builder.build()).build()); } - return featureSetSpecs; + return featureSets; } public static List parseStoreJsonList(List jsonList) diff --git a/ingestion/src/main/java/feast/ingestion/utils/StoreUtil.java b/ingestion/src/main/java/feast/ingestion/utils/StoreUtil.java index 5ceb8bd2f96..7af98fb8f00 100644 --- a/ingestion/src/main/java/feast/ingestion/utils/StoreUtil.java +++ b/ingestion/src/main/java/feast/ingestion/utils/StoreUtil.java @@ -36,6 +36,7 @@ import com.google.cloud.bigquery.TimePartitioning.Type; import com.google.common.collect.ImmutableMap; import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSpec; import feast.core.StoreProto.Store; @@ -99,7 +100,7 @@ public class StoreUtil { VALUE_TYPE_TO_STANDARD_SQL_TYPE.put(Enum.BOOL_LIST, StandardSQLTypeName.BOOL); } - public static void setupStore(Store store, FeatureSetSpec featureSetSpec) { + public static void setupStore(Store store, FeatureSet featureSet) { StoreType storeType = store.getType(); switch (storeType) { case REDIS: @@ -107,7 +108,7 @@ public static void setupStore(Store store, FeatureSetSpec featureSetSpec) { break; case BIGQUERY: StoreUtil.setupBigQuery( - featureSetSpec, + featureSet, store.getBigqueryConfig().getProjectId(), store.getBigqueryConfig().getDatasetId(), BigQueryOptions.getDefaultInstance().getService()); @@ -185,17 +186,18 @@ public static TableDefinition createBigQueryTableDefinition(FeatureSetSpec featu *

Refer to protos/feast/core/Store.proto for the derivation of the table name and schema from * a FeatureSetSpec object. * - * @param featureSetSpec FeatureSetSpec object + * @param featureSet FeatureSet object * @param bigqueryProjectId BigQuery project id * @param bigqueryDatasetId BigQuery dataset id * @param bigquery BigQuery service object */ public static void setupBigQuery( - FeatureSetSpec featureSetSpec, + FeatureSet featureSet, String bigqueryProjectId, String bigqueryDatasetId, BigQuery bigquery) { + FeatureSetSpec featureSetSpec = featureSet.getSpec(); // Ensure BigQuery dataset exists. DatasetId datasetId = DatasetId.of(bigqueryProjectId, bigqueryDatasetId); if (bigquery.getDataset(datasetId) == null) { @@ -204,7 +206,9 @@ public static void setupBigQuery( } String tableName = - String.format("%s_v%d", featureSetSpec.getName(), featureSetSpec.getVersion()) + String.format( + "%s_%s_v%d", + featureSetSpec.getProject(), featureSetSpec.getName(), featureSetSpec.getVersion()) .replaceAll("-", "_"); TableId tableId = TableId.of(bigqueryProjectId, datasetId.getDataset(), tableName); @@ -224,7 +228,7 @@ public static void setupBigQuery( tableId.getTable(), datasetId.getDataset(), bigqueryProjectId); - TableDefinition tableDefinition = createBigQueryTableDefinition(featureSetSpec); + TableDefinition tableDefinition = createBigQueryTableDefinition(featureSet.getSpec()); TableInfo tableInfo = TableInfo.of(tableId, tableDefinition); bigquery.create(tableInfo); } diff --git a/ingestion/src/main/java/feast/ingestion/values/FailedElement.java b/ingestion/src/main/java/feast/ingestion/values/FailedElement.java index a7fd162f355..9606c27d190 100644 --- a/ingestion/src/main/java/feast/ingestion/values/FailedElement.java +++ b/ingestion/src/main/java/feast/ingestion/values/FailedElement.java @@ -33,6 +33,9 @@ public abstract class FailedElement { @Nullable public abstract String getJobName(); + @Nullable + public abstract String getProjectName(); + @Nullable public abstract String getFeatureSetName(); @@ -59,6 +62,8 @@ public static Builder newBuilder() { public abstract static class Builder { public abstract Builder setTimestamp(Instant timestamp); + public abstract Builder setProjectName(String projectName); + public abstract Builder setFeatureSetName(String featureSetName); public abstract Builder setFeatureSetVersion(String featureSetVersion); diff --git a/ingestion/src/main/java/feast/ingestion/values/FeatureSetSpec.java b/ingestion/src/main/java/feast/ingestion/values/FeatureSet.java similarity index 76% rename from ingestion/src/main/java/feast/ingestion/values/FeatureSetSpec.java rename to ingestion/src/main/java/feast/ingestion/values/FeatureSet.java index 8c6e804a065..bf07bcec966 100644 --- a/ingestion/src/main/java/feast/ingestion/values/FeatureSetSpec.java +++ b/ingestion/src/main/java/feast/ingestion/values/FeatureSet.java @@ -16,6 +16,7 @@ */ package feast.ingestion.values; +import static feast.ingestion.utils.SpecUtil.getFeatureSetReference; import static feast.ingestion.utils.SpecUtil.getFieldsByName; import feast.core.FeatureSetProto; @@ -28,18 +29,18 @@ * *

The use for this class is mainly for validating the Fields in FeatureRow. */ -public class FeatureSetSpec implements Serializable { - private final String id; +public class FeatureSet implements Serializable { + private final String reference; private final Map fields; - public FeatureSetSpec(FeatureSetProto.FeatureSetSpec featureSetSpec) { - this.id = String.format("%s:%d", featureSetSpec.getName(), featureSetSpec.getVersion()); - this.fields = getFieldsByName(featureSetSpec); + public FeatureSet(FeatureSetProto.FeatureSet featureSet) { + this.reference = getFeatureSetReference(featureSet); + this.fields = getFieldsByName(featureSet.getSpec()); } - public String getId() { - return id; + public String getReference() { + return reference; } public Field getField(String fieldName) { diff --git a/ingestion/src/main/java/feast/store/serving/bigquery/GetTableDestination.java b/ingestion/src/main/java/feast/store/serving/bigquery/GetTableDestination.java index cf02713ca91..eb37db94498 100644 --- a/ingestion/src/main/java/feast/store/serving/bigquery/GetTableDestination.java +++ b/ingestion/src/main/java/feast/store/serving/bigquery/GetTableDestination.java @@ -36,6 +36,7 @@ public GetTableDestination(String projectId, String datasetId) { @Override public TableDestination apply(ValueInSingleWindow input) { String[] split = input.getValue().getFeatureSet().split(":"); + String[] splitName = split[0].split("/"); TimePartitioning timePartitioning = new TimePartitioning() @@ -43,7 +44,8 @@ public TableDestination apply(ValueInSingleWindow input) { .setField(FeatureRowToTableRow.getEventTimestampColumn()); return new TableDestination( - String.format("%s:%s.%s_v%s", projectId, datasetId, split[0], split[1]), + String.format( + "%s:%s.%s_%s_v%s", projectId, datasetId, splitName[0], splitName[1], split[1]), String.format("Feast table for %s", input.getValue().getFeatureSet()), timePartitioning); } diff --git a/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java b/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java index 9bc503f9870..27cca2ffb2e 100644 --- a/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java +++ b/ingestion/src/main/java/feast/store/serving/redis/FeatureRowToRedisMutationDoFn.java @@ -17,7 +17,7 @@ package feast.store.serving.redis; import feast.core.FeatureSetProto.EntitySpec; -import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.storage.RedisProto.RedisKey; import feast.storage.RedisProto.RedisKey.Builder; import feast.store.serving.redis.RedisCustomIO.Method; @@ -34,16 +34,16 @@ public class FeatureRowToRedisMutationDoFn extends DoFn featureSetSpecs; + private Map featureSets; - public FeatureRowToRedisMutationDoFn(Map featureSetSpecs) { - this.featureSetSpecs = featureSetSpecs; + public FeatureRowToRedisMutationDoFn(Map featureSets) { + this.featureSets = featureSets; } private RedisKey getKey(FeatureRow featureRow) { - FeatureSetSpec featureSetSpec = featureSetSpecs.get(featureRow.getFeatureSet()); + FeatureSet featureSet = featureSets.get(featureRow.getFeatureSet()); Set entityNames = - featureSetSpec.getEntitiesList().stream() + featureSet.getSpec().getEntitiesList().stream() .map(EntitySpec::getName) .collect(Collectors.toSet()); diff --git a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java index 4a09bee82ff..290b38dabee 100644 --- a/ingestion/src/test/java/feast/ingestion/ImportJobTest.java +++ b/ingestion/src/test/java/feast/ingestion/ImportJobTest.java @@ -20,6 +20,7 @@ import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.util.JsonFormat; import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSpec; import feast.core.SourceProto.KafkaSourceConfig; @@ -113,6 +114,7 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() FeatureSetSpec.newBuilder() .setName("feature_set") .setVersion(3) + .setProject("myproject") .addEntities( EntitySpec.newBuilder() .setName("entity_id_primary") @@ -143,6 +145,8 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() .build()) .build(); + FeatureSet featureSet = FeatureSet.newBuilder().setSpec(spec).build(); + Store redis = Store.newBuilder() .setName(StoreType.REDIS.toString()) @@ -151,15 +155,16 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() RedisConfig.newBuilder().setHost(REDIS_HOST).setPort(REDIS_PORT).build()) .addSubscriptions( Subscription.newBuilder() + .setProject(spec.getProject()) .setName(spec.getName()) .setVersion(String.valueOf(spec.getVersion())) .build()) .build(); ImportOptions options = PipelineOptionsFactory.create().as(ImportOptions.class); - options.setFeatureSetSpecJson( + options.setFeatureSetJson( Collections.singletonList( - JsonFormat.printer().omittingInsignificantWhitespace().print(spec))); + JsonFormat.printer().omittingInsignificantWhitespace().print(featureSet.getSpec()))); options.setStoreJson( Collections.singletonList( JsonFormat.printer().omittingInsignificantWhitespace().print(redis))); @@ -173,8 +178,8 @@ public void runPipeline_ShouldWriteToRedisCorrectlyGivenValidSpecAndFeatureRow() IntStream.range(0, IMPORT_JOB_SAMPLE_FEATURE_ROW_SIZE) .forEach( i -> { - FeatureRow randomRow = TestUtil.createRandomFeatureRow(spec); - RedisKey redisKey = TestUtil.createRedisKey(spec, randomRow); + FeatureRow randomRow = TestUtil.createRandomFeatureRow(featureSet); + RedisKey redisKey = TestUtil.createRedisKey(featureSet, randomRow); input.add(randomRow); expected.put(redisKey, randomRow); }); diff --git a/ingestion/src/test/java/feast/ingestion/transform/ValidateFeatureRowsTest.java b/ingestion/src/test/java/feast/ingestion/transform/ValidateFeatureRowsTest.java index 7f2d7176882..d129c15661f 100644 --- a/ingestion/src/test/java/feast/ingestion/transform/ValidateFeatureRowsTest.java +++ b/ingestion/src/test/java/feast/ingestion/transform/ValidateFeatureRowsTest.java @@ -19,6 +19,7 @@ import static org.junit.Assert.*; import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSpec; import feast.ingestion.values.FailedElement; @@ -40,6 +41,7 @@ import org.junit.Test; public class ValidateFeatureRowsTest { + @Rule public transient TestPipeline p = TestPipeline.create(); private static final TupleTag SUCCESS_TAG = new TupleTag() {}; @@ -48,55 +50,73 @@ public class ValidateFeatureRowsTest { @Test public void shouldWriteSuccessAndFailureTagsCorrectly() { - FeatureSetSpec fs1 = - FeatureSetSpec.newBuilder() - .setName("feature_set") - .setVersion(1) - .addEntities( - EntitySpec.newBuilder() - .setName("entity_id_primary") - .setValueType(Enum.INT32) - .build()) - .addEntities( - EntitySpec.newBuilder() - .setName("entity_id_secondary") - .setValueType(Enum.STRING) - .build()) - .addFeatures( - FeatureSpec.newBuilder().setName("feature_1").setValueType(Enum.STRING).build()) - .addFeatures( - FeatureSpec.newBuilder().setName("feature_2").setValueType(Enum.INT64).build()) + FeatureSet fs1 = + FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setName("feature_set") + .setVersion(1) + .setProject("myproject") + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_primary") + .setValueType(Enum.INT32) + .build()) + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_secondary") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_1") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_2") + .setValueType(Enum.INT64) + .build())) .build(); - FeatureSetSpec fs2 = - FeatureSetSpec.newBuilder() - .setName("feature_set") - .setVersion(2) - .addEntities( - EntitySpec.newBuilder() - .setName("entity_id_primary") - .setValueType(Enum.INT32) - .build()) - .addEntities( - EntitySpec.newBuilder() - .setName("entity_id_secondary") - .setValueType(Enum.STRING) - .build()) - .addFeatures( - FeatureSpec.newBuilder().setName("feature_1").setValueType(Enum.STRING).build()) - .addFeatures( - FeatureSpec.newBuilder().setName("feature_2").setValueType(Enum.INT64).build()) + FeatureSet fs2 = + FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setName("feature_set") + .setVersion(2) + .setProject("myproject") + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_primary") + .setValueType(Enum.INT32) + .build()) + .addEntities( + EntitySpec.newBuilder() + .setName("entity_id_secondary") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_1") + .setValueType(Enum.STRING) + .build()) + .addFeatures( + FeatureSpec.newBuilder() + .setName("feature_2") + .setValueType(Enum.INT64) + .build())) .build(); - Map featureSetSpecs = new HashMap<>(); - featureSetSpecs.put("feature_set:1", fs1); - featureSetSpecs.put("feature_set:2", fs2); + Map featureSets = new HashMap<>(); + featureSets.put("myproject/feature_set:1", fs1); + featureSets.put("myproject/feature_set:2", fs2); List input = new ArrayList<>(); List expected = new ArrayList<>(); - for (FeatureSetSpec featureSetSpec : featureSetSpecs.values()) { - FeatureRow randomRow = TestUtil.createRandomFeatureRow(featureSetSpec); + for (FeatureSet featureSet : featureSets.values()) { + FeatureRow randomRow = TestUtil.createRandomFeatureRow(featureSet); input.add(randomRow); expected.add(randomRow); } @@ -110,7 +130,7 @@ public void shouldWriteSuccessAndFailureTagsCorrectly() { ValidateFeatureRows.newBuilder() .setFailureTag(FAILURE_TAG) .setSuccessTag(SUCCESS_TAG) - .setFeatureSetSpecs(featureSetSpecs) + .setFeatureSets(featureSets) .build()); PAssert.that(output.get(SUCCESS_TAG)).containsInAnyOrder(expected); diff --git a/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java b/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java index 1579cc7a6bf..4e2297e405d 100644 --- a/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java +++ b/ingestion/src/test/java/feast/ingestion/util/StoreUtilTest.java @@ -37,6 +37,7 @@ import com.google.cloud.bigquery.Schema; import com.google.cloud.bigquery.StandardSQLTypeName; import feast.core.FeatureSetProto.EntitySpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.core.FeatureSetProto.FeatureSpec; import feast.ingestion.utils.StoreUtil; @@ -49,16 +50,20 @@ public class StoreUtilTest { @Test public void setupBigQuery_shouldCreateTable_givenValidFeatureSetSpec() { - FeatureSetSpec featureSetSpec = - FeatureSetSpec.newBuilder() - .setName("feature_set_1") - .setVersion(1) - .addEntities(EntitySpec.newBuilder().setName("entity_1").setValueType(INT32)) - .addFeatures(FeatureSpec.newBuilder().setName("feature_1").setValueType(INT32)) - .addFeatures(FeatureSpec.newBuilder().setName("feature_2").setValueType(STRING_LIST)) + FeatureSet featureSet = + FeatureSet.newBuilder() + .setSpec( + FeatureSetSpec.newBuilder() + .setName("feature_set_1") + .setVersion(1) + .setProject("feast-project") + .addEntities(EntitySpec.newBuilder().setName("entity_1").setValueType(INT32)) + .addFeatures(FeatureSpec.newBuilder().setName("feature_1").setValueType(INT32)) + .addFeatures( + FeatureSpec.newBuilder().setName("feature_2").setValueType(STRING_LIST))) .build(); BigQuery mockedBigquery = Mockito.mock(BigQuery.class); - StoreUtil.setupBigQuery(featureSetSpec, "project-1", "dataset_1", mockedBigquery); + StoreUtil.setupBigQuery(featureSet, "project-1", "dataset_1", mockedBigquery); } @Test diff --git a/ingestion/src/test/java/feast/test/TestUtil.java b/ingestion/src/test/java/feast/test/TestUtil.java index d66ef4a97d9..5c16d7e9e31 100644 --- a/ingestion/src/test/java/feast/test/TestUtil.java +++ b/ingestion/src/test/java/feast/test/TestUtil.java @@ -16,9 +16,11 @@ */ package feast.test; +import static feast.ingestion.utils.SpecUtil.getFeatureSetReference; + import com.google.protobuf.ByteString; import com.google.protobuf.util.Timestamps; -import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSet; import feast.ingestion.transform.WriteToStore; import feast.storage.RedisProto.RedisKey; import feast.types.FeatureRowProto.FeatureRow; @@ -166,31 +168,33 @@ public static void publishFeatureRowsToKafka( /** * Create a Feature Row with random value according to the FeatureSetSpec * - *

See {@link #createRandomFeatureRow(FeatureSetSpec, int)} + *

See {@link #createRandomFeatureRow(FeatureSet, int)} */ - public static FeatureRow createRandomFeatureRow(FeatureSetSpec spec) { + public static FeatureRow createRandomFeatureRow(FeatureSet featureSet) { ThreadLocalRandom random = ThreadLocalRandom.current(); int randomStringSizeMaxSize = 12; - return createRandomFeatureRow(spec, random.nextInt(0, randomStringSizeMaxSize) + 4); + return createRandomFeatureRow(featureSet, random.nextInt(0, randomStringSizeMaxSize) + 4); } /** - * Create a Feature Row with random value according to the FeatureSetSpec. + * Create a Feature Row with random value according to the FeatureSet. * *

The Feature Row created contains fields according to the entities and features defined in - * FeatureSetSpec, matching the value type of the field, with randomized value for testing. + * FeatureSet, matching the value type of the field, with randomized value for testing. * - * @param spec {@link FeatureSetSpec} + * @param featureSet {@link FeatureSet} * @param randomStringSize number of characters for the generated random string * @return {@link FeatureRow} */ - public static FeatureRow createRandomFeatureRow(FeatureSetSpec spec, int randomStringSize) { + public static FeatureRow createRandomFeatureRow(FeatureSet featureSet, int randomStringSize) { Builder builder = FeatureRow.newBuilder() - .setFeatureSet(spec.getName() + ":" + spec.getVersion()) + .setFeatureSet(getFeatureSetReference(featureSet)) .setEventTimestamp(Timestamps.fromMillis(System.currentTimeMillis())); - spec.getEntitiesList() + featureSet + .getSpec() + .getEntitiesList() .forEach( field -> { builder.addFields( @@ -200,7 +204,9 @@ public static FeatureRow createRandomFeatureRow(FeatureSetSpec spec, int randomS .build()); }); - spec.getFeaturesList() + featureSet + .getSpec() + .getFeaturesList() .forEach( field -> { builder.addFields( @@ -284,19 +290,21 @@ public static Value createRandomValue(ValueType.Enum type, int randomStringSize) } /** - * Create {@link RedisKey} from {@link FeatureSetSpec} and {@link FeatureRow}. + * Create {@link RedisKey} from {@link FeatureSet} and {@link FeatureRow}. * *

The entities in the created {@link RedisKey} will contain the value with matching field name * in the {@link FeatureRow} * - * @param spec {@link FeatureSetSpec} - * @param row {@link FeatureSetSpec} + * @param featureSet {@link FeatureSet} + * @param row {@link FeatureSet} * @return {@link RedisKey} */ - public static RedisKey createRedisKey(FeatureSetSpec spec, FeatureRow row) { + public static RedisKey createRedisKey(FeatureSet featureSet, FeatureRow row) { RedisKey.Builder builder = - RedisKey.newBuilder().setFeatureSet(spec.getName() + ":" + spec.getVersion()); - spec.getEntitiesList() + RedisKey.newBuilder().setFeatureSet(getFeatureSetReference(featureSet)); + featureSet + .getSpec() + .getEntitiesList() .forEach( entityField -> row.getFieldsList().stream() diff --git a/ingestion/src/test/resources/import-job-specs/invalid-empty.yaml b/ingestion/src/test/resources/import-job-specs/invalid-empty.yaml deleted file mode 100644 index 7a1bbc96078..00000000000 --- a/ingestion/src/test/resources/import-job-specs/invalid-empty.yaml +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file diff --git a/ingestion/src/test/resources/import-job-specs/invalid-source-spec-1.yaml b/ingestion/src/test/resources/import-job-specs/invalid-source-spec-1.yaml deleted file mode 100644 index 26c1ff28e51..00000000000 --- a/ingestion/src/test/resources/import-job-specs/invalid-source-spec-1.yaml +++ /dev/null @@ -1,34 +0,0 @@ -sourceSpec: - type: NON_EXISTENT_TYPE - options: - bootstrapServers: localhost:9092 - topics: topic1 - -entitySpec: - name: entity1 - description: description for entity1 - -featureSpecs: -- id: entity1.feature1 - name: feature1 - valueType: INT64 - entity: entity1 -- id: entity1.feature2 - name: feature2 - valueType: DOUBLE - entity: entity1 -- id: entity1.feature3 - name: feature3 - valueType: TIMESTAMP - entity: entity1 -- id: entity1.feature4 - name: feature4 - valueType: DOUBLE - entity: entity1 - -sinkStorageSpec: - id: storage1 - type: BIGQUERY - options: - datasetId: dataset1 - projectId: project1 diff --git a/ingestion/src/test/resources/import-job-specs/valid-1.yaml b/ingestion/src/test/resources/import-job-specs/valid-1.yaml deleted file mode 100644 index 80eaa0c5e81..00000000000 --- a/ingestion/src/test/resources/import-job-specs/valid-1.yaml +++ /dev/null @@ -1,34 +0,0 @@ -sourceSpec: - type: KAFKA - options: - bootstrapServers: localhost:9092 - topics: topic1 - -entitySpec: - name: entity1 - description: description for entity1 - -featureSpecs: -- id: entity1.feature1 - name: feature1 - valueType: INT64 - entity: entity1 -- id: entity1.feature2 - name: feature2 - valueType: DOUBLE - entity: entity1 -- id: entity1.feature3 - name: feature3 - valueType: TIMESTAMP - entity: entity1 -- id: entity1.feature4 - name: feature4 - valueType: DOUBLE - entity: entity1 - -sinkStorageSpec: - id: storage1 - type: BIGQUERY - options: - datasetId: dataset1 - projectId: project1 diff --git a/ingestion/src/test/resources/import-job-specs/valid-2.yaml b/ingestion/src/test/resources/import-job-specs/valid-2.yaml deleted file mode 100644 index cc8762d424f..00000000000 --- a/ingestion/src/test/resources/import-job-specs/valid-2.yaml +++ /dev/null @@ -1,33 +0,0 @@ -sourceSpec: - type: KAFKA - options: - bootstrapServers: localhost:9092 - topics: topic1 - -entitySpec: - name: entity1 - description: description for entity1 - -featureSpecs: -- id: entity1.feature1 - name: feature1 - valueType: INT64 - entity: entity1 -- id: entity1.feature2 - name: feature2 - valueType: DOUBLE - entity: entity1 -- id: entity1.feature3 - name: feature3 - valueType: TIMESTAMP - entity: entity1 -- id: entity1.feature4 - name: feature4 - valueType: DOUBLE - entity: entity1 - -sinkStorageSpec: - id: storage1 - type: REDIS - options: - host: localhost diff --git a/ingestion/src/test/resources/import-specs/csv_to_store1.yaml b/ingestion/src/test/resources/import-specs/csv_to_store1.yaml deleted file mode 100644 index 57c8d4be5f9..00000000000 --- a/ingestion/src/test/resources/import-specs/csv_to_store1.yaml +++ /dev/null @@ -1,16 +0,0 @@ ---- -type: file -options: - format: csv - path: # to be overwritten in tests -entities: - - testEntity -schema: - entityIdColumn: id - timestampValue: 2018-09-25T00:00:00.000Z - fields: - - name: timestamp - - name: id - - featureId: testEntity.testInt32 - - featureId: testEntity.testString - diff --git a/ingestion/src/test/resources/specs/importJobSpecs.yaml b/ingestion/src/test/resources/specs/importJobSpecs.yaml deleted file mode 100644 index 6f52cb30aae..00000000000 --- a/ingestion/src/test/resources/specs/importJobSpecs.yaml +++ /dev/null @@ -1,44 +0,0 @@ -sourceSpec: {} -sinkStorageSpec: - id: TEST_SERVING - type: serving.mock - options: {} -errorsStorageSpec: - id: errors - type: errors.mock - options: {} -entitySpec: - name: testEntity - description: This is a test entity - tags: [] -featureSpecs: - - id: testEntity.testInt64 - entity: testEntity - name: testInt64 - owner: feast@example.com - description: This is test feature of long - uri: https://example.com/ - valueType: INT64 - tags: [] - options: - nonsense: "I should be safely ignored" - - id: testEntity.testInt32 - entity: testEntity - name: testInt32 - owner: feast@example.com - description: This is test feature of type integer - uri: https://example.com/ - valueType: INT32 - tags: [] - options: - nonsense: "I should be safely ignored" - - id: testEntity.testString - entity: testEntity - name: testString - owner: feast@example.com - description: This is test feature of type string - uri: https://example.com/ - valueType: STRING - tags: [] - options: - nonsense: "I should be safely ignored" diff --git a/protos/feast/core/CoreService.proto b/protos/feast/core/CoreService.proto index 9a9eaa64fdd..35b96e17895 100644 --- a/protos/feast/core/CoreService.proto +++ b/protos/feast/core/CoreService.proto @@ -58,11 +58,28 @@ service CoreService { // // If the changes are valid, core will return the given store configuration in response, and // start or update the necessary feature population jobs for the updated store. - rpc UpdateStore(UpdateStoreRequest) returns (UpdateStoreResponse); + rpc UpdateStore (UpdateStoreRequest) returns (UpdateStoreResponse); + + // Creates a project. Projects serve as namespaces within which resources like features will be + // created. Both feature set names as well as field names must be unique within a project. Project + // names themselves must be globally unique. + rpc CreateProject (CreateProjectRequest) returns (CreateProjectResponse); + + // Archives a project. Archived projects will continue to exist and function, but won't be visible + // through the Core API. Any existing ingestion or serving requests will continue to function, + // but will result in warning messages being logged. It is not possible to unarchive a project + // through the Core API + rpc ArchiveProject (ArchiveProjectRequest) returns (ArchiveProjectResponse); + + // Lists all projects active projects. + rpc ListProjects (ListProjectsRequest) returns (ListProjectsResponse); } // Request for a single feature set message GetFeatureSetRequest { + // Name of project the feature set belongs to (required) + string project = 3; + // Name of feature set (required). string name = 1; @@ -77,21 +94,35 @@ message GetFeatureSetResponse { // Retrieves details for all versions of a specific feature set message ListFeatureSetsRequest { + Filter filter = 1; + message Filter { - // Name of the desired feature set. Valid regex strings are allowed. + // Name of project that the feature sets belongs to. This can be one of + // - [project_name] + // - * + // If an asterisk is provided, filtering on projects will be disabled. All projects will + // be matched. It is NOT possible to provide an asterisk with a string in order to do + // pattern matching. + string project = 3; + + // Name of the desired feature set. Asterisks can be used as wildcards in the name. + // Matching on names is only permitted if a specific project is defined. It is disallowed + // If the project name is set to "*" // e.g. - // - .* can be used to match all feature sets - // - my-project-.* can be used to match all features prefixed by "my-project" + // - * can be used to match all feature sets + // - my-feature-set* can be used to match all features prefixed by "my-feature-set" + // - my-feature-set-6 can be used to select a single feature set string feature_set_name = 1; - // Version of the desired feature set. Either a number or valid expression can be provided. - // e.g. - // - 1 will match version 1 exactly - // - >=1 will match all versions greater or equal to 1 - // - <10 will match all versions less than 10 + + + // Versions of the given feature sets that will be returned. + // Valid options for version: + // "latest": only the latest version is returned. + // "*": Subscribe to all versions + // [version number]: pin to a specific version. Project and feature set name must be + // explicitly defined if a specific version is pinned. string feature_set_version = 2; } - - Filter filter = 1; } message ListFeatureSetsResponse { @@ -133,7 +164,8 @@ message ApplyFeatureSetResponse { Status status = 2; } -message GetFeastCoreVersionRequest {} +message GetFeastCoreVersionRequest { +} message GetFeastCoreVersionResponse { string version = 1; @@ -153,4 +185,34 @@ message UpdateStoreResponse { } feast.core.Store store = 1; Status status = 2; +} + +// Request to create a project +message CreateProjectRequest { + // Name of project (required) + string name = 1; +} + +// Response for creation of a project +message CreateProjectResponse { +} + +// Request for the archival of a project +message ArchiveProjectRequest { + // Name of project to be archived + string name = 1; +} + +// Response for archival of a project +message ArchiveProjectResponse { +} + +// Request for listing of projects +message ListProjectsRequest { +} + +// Response for listing of projects +message ListProjectsResponse { + // List of project names (archived projects are filtered out) + repeated string projects = 1; } \ No newline at end of file diff --git a/protos/feast/core/FeatureSet.proto b/protos/feast/core/FeatureSet.proto index a5adf139bff..910cc375f7b 100644 --- a/protos/feast/core/FeatureSet.proto +++ b/protos/feast/core/FeatureSet.proto @@ -15,9 +15,7 @@ // syntax = "proto3"; - package feast.core; - option java_package = "feast.core"; option java_outer_classname = "FeatureSetProto"; option go_package = "github.com/gojek/feast/sdk/go/protos/feast/core"; @@ -30,16 +28,18 @@ import "google/protobuf/timestamp.proto"; message FeatureSet { // User-specified specifications of this feature set. FeatureSetSpec spec = 1; - // System-populated metadata for this feature set. FeatureSetMeta meta = 2; } message FeatureSetSpec { - // Name of the featureSet. Must be unique. + // Name of project that this feature set belongs to. + string project = 7; + + // Name of the feature set. Must be unique. string name = 1; - // FeatureSet version. + // Feature set version. int32 version = 2; // List of entities contained within this featureSet. @@ -51,8 +51,8 @@ message FeatureSetSpec { // List of features contained within this featureSet. repeated FeatureSpec features = 4; - // Features in this feature set will only be retrieved if they are found - // after [time - max_age]. Missing or older feature values will be returned + // Features in this feature set will only be retrieved if they are found + // after [time - max_age]. Missing or older feature values will be returned // as nulls and indicated to end user google.protobuf.Duration max_age = 5; @@ -77,7 +77,6 @@ message FeatureSpec { feast.types.ValueType.Enum value_type = 2; } - message FeatureSetMeta { // Created timestamp of this specific feature set. google.protobuf.Timestamp created_timestamp = 1; @@ -95,4 +94,4 @@ enum FeatureSetStatus { STATUS_INVALID = 0; STATUS_PENDING = 1; STATUS_READY = 2; -} \ No newline at end of file +} diff --git a/protos/feast/core/Store.proto b/protos/feast/core/Store.proto index e1b8c581a38..bbb4ed80001 100644 --- a/protos/feast/core/Store.proto +++ b/protos/feast/core/Store.proto @@ -123,16 +123,30 @@ message Store { } message Subscription { - // Name of featureSet to subscribe to. This field supports any valid basic POSIX regex, - // e.g. customer_.* or .* - // https://www.regular-expressions.info/posix.html + // Name of project that the feature sets belongs to. This can be one of + // - [project_name] + // - * + // If an asterisk is provided, filtering on projects will be disabled. All projects will + // be matched. It is NOT possible to provide an asterisk with a string in order to do + // pattern matching. + string project = 3; + + + // Name of the desired feature set. Asterisks can be used as wildcards in the name. + // Matching on names is only permitted if a specific project is defined. It is disallowed + // If the project name is set to "*" + // e.g. + // - * can be used to match all feature sets + // - my-feature-set* can be used to match all features prefixed by "my-feature-set" + // - my-feature-set-6 can be used to select a single feature set string name = 1; - // Versions of the given featureSet that will be ingested into this store. + // Versions of the given feature sets that will be returned. // Valid options for version: - // latest: only subscribe to latest version of feature set - // [version number]: pin to a specific version - // >[version number]: subscribe to all versions larger than or equal to [version number] + // "latest": only the latest version is returned. + // "*": Subscribe to all versions + // [version number]: pin to a specific version. Project and feature set name must be + // explicitly defined if a specific version is pinned. string version = 2; } diff --git a/protos/feast/serving/ServingService.proto b/protos/feast/serving/ServingService.proto index fe896253a94..5145670ec9a 100644 --- a/protos/feast/serving/ServingService.proto +++ b/protos/feast/serving/ServingService.proto @@ -62,15 +62,15 @@ message GetFeastServingInfoResponse { string job_staging_location = 10; } -message FeatureSetRequest { - // Feature set name - string name = 1; +message FeatureReference { + // Project name + string project = 1; - // Feature set version - int32 version = 2; + // Feature name + string name = 2; - // Features that should be retrieved from this feature set - repeated string feature_names = 3; + // Feature version + int32 version = 3; // The features will be retrieved if: // entity_timestamp - max_age <= event_timestamp <= entity_timestamp @@ -81,8 +81,8 @@ message FeatureSetRequest { } message GetOnlineFeaturesRequest { - // List of feature sets and their features that are being retrieved - repeated FeatureSetRequest feature_sets = 1; + // List of features that are being retrieved + repeated FeatureReference features = 4; // List of entity rows, containing entity id and timestamp data. // Used during retrieval of feature rows and for joining feature @@ -104,8 +104,8 @@ message GetOnlineFeaturesRequest { } message GetBatchFeaturesRequest { - // List of feature sets and their features that are being retrieved. - repeated FeatureSetRequest feature_sets = 1; + // List of features that are being retrieved + repeated FeatureReference features = 3; // Source of the entity dataset containing the timestamps and entity keys to retrieve // features for. diff --git a/sdk/go/README.md b/sdk/go/README.md new file mode 100644 index 00000000000..6084f909931 --- /dev/null +++ b/sdk/go/README.md @@ -0,0 +1,49 @@ +# Feast Golang SDK + +The Feast golang SDK currently only supports retrieval from online stores. + +## Quickstart +```{go} +import ( + "context" + feast "github.com/gojek/feast/sdk/go" +) + +func main() { + cli, err := feast.NewGrpcClient("localhost", 6565) + if err != nil { + panic(err) + } + + ctx := context.Background() + req := feast.OnlineFeaturesRequest{ + Features: []string{"my_project_1/feature1:1", "my_project_2/feature1:1", "my_project_4/feature3", "feature2:2", "feature2"}, + Entities: []feast.Row{ + {"entity1": feast.Int64Val(1), "entity2": feast.StrVal("bob")}, + {"entity1": feast.Int64Val(1), "entity2": feast.StrVal("annie")}, + {"entity1": feast.Int64Val(1), "entity2": feast.StrVal("jane")}, + }, + Project: "my_project_3", + } + + resp, err := cli.GetOnlineFeatures(ctx, &req) + if err != nil { + panic(err) + } + + // returns a list of rows (map[string]featureValue) + out := resp.Rows() +} + +``` + +If all features retrieved are of a single type, Feast provides convenience functions to retrieve your features as a vector of feature values: +```{go} +arr, err := resp.Int64Arrays( + []string{"my_project_1/feature1:1", + "my_project_2/feature1:1", + "my_project_4/feature3", + "feature2:2", + "feature2"}, // order of features + []int64{1,2,3,4,5}) // fillNa values +``` diff --git a/sdk/go/go.mod b/sdk/go/go.mod index 7c029da1095..0def759a4f2 100644 --- a/sdk/go/go.mod +++ b/sdk/go/go.mod @@ -4,7 +4,7 @@ go 1.13 require ( github.com/golang/protobuf v1.3.2 - github.com/google/go-cmp v0.3.0 + github.com/google/go-cmp v0.3.1 github.com/opentracing/opentracing-go v1.1.0 github.com/stretchr/testify v1.4.0 // indirect go.opencensus.io v0.22.1 diff --git a/sdk/go/go.sum b/sdk/go/go.sum index 56df48673e5..04cf3d8d7f3 100644 --- a/sdk/go/go.sum +++ b/sdk/go/go.sum @@ -16,6 +16,8 @@ github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0 h1:crn/baboCvb5fXaQ0IJ1SGTsTVrWpDsCWC8EGETZijY= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/opentracing/opentracing-go v1.1.0 h1:pWlfV3Bxv7k65HYwkikxat0+s3pV4bsqf19k25Ur8rU= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/sdk/go/protos/feast/core/CoreService.pb.go b/sdk/go/protos/feast/core/CoreService.pb.go index e0af92433b7..45ad9ed79a4 100644 --- a/sdk/go/protos/feast/core/CoreService.pb.go +++ b/sdk/go/protos/feast/core/CoreService.pb.go @@ -84,6 +84,8 @@ func (UpdateStoreResponse_Status) EnumDescriptor() ([]byte, []int) { // Request for a single feature set type GetFeatureSetRequest struct { + // Name of project the feature set belongs to (required) + Project string `protobuf:"bytes,3,opt,name=project,proto3" json:"project,omitempty"` // Name of feature set (required). Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` // Version of feature set (optional). If omitted then latest feature set will be returned. @@ -118,6 +120,13 @@ func (m *GetFeatureSetRequest) XXX_DiscardUnknown() { var xxx_messageInfo_GetFeatureSetRequest proto.InternalMessageInfo +func (m *GetFeatureSetRequest) GetProject() string { + if m != nil { + return m.Project + } + return "" +} + func (m *GetFeatureSetRequest) GetName() string { if m != nil { return m.Name @@ -134,10 +143,10 @@ func (m *GetFeatureSetRequest) GetVersion() int32 { // Response containing a single feature set type GetFeatureSetResponse struct { - FeatureSet *FeatureSetSpec `protobuf:"bytes,1,opt,name=feature_set,json=featureSet,proto3" json:"feature_set,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + FeatureSet *FeatureSet `protobuf:"bytes,1,opt,name=feature_set,json=featureSet,proto3" json:"feature_set,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *GetFeatureSetResponse) Reset() { *m = GetFeatureSetResponse{} } @@ -165,7 +174,7 @@ func (m *GetFeatureSetResponse) XXX_DiscardUnknown() { var xxx_messageInfo_GetFeatureSetResponse proto.InternalMessageInfo -func (m *GetFeatureSetResponse) GetFeatureSet() *FeatureSetSpec { +func (m *GetFeatureSetResponse) GetFeatureSet() *FeatureSet { if m != nil { return m.FeatureSet } @@ -213,16 +222,27 @@ func (m *ListFeatureSetsRequest) GetFilter() *ListFeatureSetsRequest_Filter { } type ListFeatureSetsRequest_Filter struct { - // Name of the desired feature set. Valid regex strings are allowed. + // Name of project that the feature sets belongs to. This can be one of + // - [project_name] + // - * + // If an asterisk is provided, filtering on projects will be disabled. All projects will + // be matched. It is NOT possible to provide an asterisk with a string in order to do + // pattern matching. + Project string `protobuf:"bytes,3,opt,name=project,proto3" json:"project,omitempty"` + // Name of the desired feature set. Asterisks can be used as wildcards in the name. + // Matching on names is only permitted if a specific project is defined. It is disallowed + // If the project name is set to "*" // e.g. - // - .* can be used to match all feature sets - // - my-project-.* can be used to match all features prefixed by "my-project" + // - * can be used to match all feature sets + // - my-feature-set* can be used to match all features prefixed by "my-feature-set" + // - my-feature-set-6 can be used to select a single feature set FeatureSetName string `protobuf:"bytes,1,opt,name=feature_set_name,json=featureSetName,proto3" json:"feature_set_name,omitempty"` - // Version of the desired feature set. Either a number or valid expression can be provided. - // e.g. - // - 1 will match version 1 exactly - // - >=1 will match all versions greater or equal to 1 - // - <10 will match all versions less than 10 + // Versions of the given feature sets that will be returned. + // Valid options for version: + // "latest": only the latest version is returned. + // "*": Subscribe to all versions + // [version number]: pin to a specific version. Project and feature set name must be + // explicitly defined if a specific version is pinned. FeatureSetVersion string `protobuf:"bytes,2,opt,name=feature_set_version,json=featureSetVersion,proto3" json:"feature_set_version,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` @@ -254,6 +274,13 @@ func (m *ListFeatureSetsRequest_Filter) XXX_DiscardUnknown() { var xxx_messageInfo_ListFeatureSetsRequest_Filter proto.InternalMessageInfo +func (m *ListFeatureSetsRequest_Filter) GetProject() string { + if m != nil { + return m.Project + } + return "" +} + func (m *ListFeatureSetsRequest_Filter) GetFeatureSetName() string { if m != nil { return m.FeatureSetName @@ -269,10 +296,10 @@ func (m *ListFeatureSetsRequest_Filter) GetFeatureSetVersion() string { } type ListFeatureSetsResponse struct { - FeatureSets []*FeatureSetSpec `protobuf:"bytes,1,rep,name=feature_sets,json=featureSets,proto3" json:"feature_sets,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + FeatureSets []*FeatureSet `protobuf:"bytes,1,rep,name=feature_sets,json=featureSets,proto3" json:"feature_sets,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *ListFeatureSetsResponse) Reset() { *m = ListFeatureSetsResponse{} } @@ -300,7 +327,7 @@ func (m *ListFeatureSetsResponse) XXX_DiscardUnknown() { var xxx_messageInfo_ListFeatureSetsResponse proto.InternalMessageInfo -func (m *ListFeatureSetsResponse) GetFeatureSets() []*FeatureSetSpec { +func (m *ListFeatureSetsResponse) GetFeatureSets() []*FeatureSet { if m != nil { return m.FeatureSets } @@ -427,10 +454,10 @@ func (m *ListStoresResponse) GetStore() []*Store { type ApplyFeatureSetRequest struct { // Feature set version and source will be ignored - FeatureSet *FeatureSetSpec `protobuf:"bytes,1,opt,name=feature_set,json=featureSet,proto3" json:"feature_set,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + FeatureSet *FeatureSet `protobuf:"bytes,1,opt,name=feature_set,json=featureSet,proto3" json:"feature_set,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *ApplyFeatureSetRequest) Reset() { *m = ApplyFeatureSetRequest{} } @@ -458,7 +485,7 @@ func (m *ApplyFeatureSetRequest) XXX_DiscardUnknown() { var xxx_messageInfo_ApplyFeatureSetRequest proto.InternalMessageInfo -func (m *ApplyFeatureSetRequest) GetFeatureSet() *FeatureSetSpec { +func (m *ApplyFeatureSetRequest) GetFeatureSet() *FeatureSet { if m != nil { return m.FeatureSet } @@ -467,7 +494,7 @@ func (m *ApplyFeatureSetRequest) GetFeatureSet() *FeatureSetSpec { type ApplyFeatureSetResponse struct { // Feature set response has been enriched with version and source information - FeatureSet *FeatureSetSpec `protobuf:"bytes,1,opt,name=feature_set,json=featureSet,proto3" json:"feature_set,omitempty"` + FeatureSet *FeatureSet `protobuf:"bytes,1,opt,name=feature_set,json=featureSet,proto3" json:"feature_set,omitempty"` Status ApplyFeatureSetResponse_Status `protobuf:"varint,2,opt,name=status,proto3,enum=feast.core.ApplyFeatureSetResponse_Status" json:"status,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` @@ -499,7 +526,7 @@ func (m *ApplyFeatureSetResponse) XXX_DiscardUnknown() { var xxx_messageInfo_ApplyFeatureSetResponse proto.InternalMessageInfo -func (m *ApplyFeatureSetResponse) GetFeatureSet() *FeatureSetSpec { +func (m *ApplyFeatureSetResponse) GetFeatureSet() *FeatureSet { if m != nil { return m.FeatureSet } @@ -669,6 +696,225 @@ func (m *UpdateStoreResponse) GetStatus() UpdateStoreResponse_Status { return UpdateStoreResponse_NO_CHANGE } +// Request to create a project +type CreateProjectRequest struct { + // Name of project (required) + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *CreateProjectRequest) Reset() { *m = CreateProjectRequest{} } +func (m *CreateProjectRequest) String() string { return proto.CompactTextString(m) } +func (*CreateProjectRequest) ProtoMessage() {} +func (*CreateProjectRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_d9be266444105411, []int{12} +} + +func (m *CreateProjectRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_CreateProjectRequest.Unmarshal(m, b) +} +func (m *CreateProjectRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_CreateProjectRequest.Marshal(b, m, deterministic) +} +func (m *CreateProjectRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_CreateProjectRequest.Merge(m, src) +} +func (m *CreateProjectRequest) XXX_Size() int { + return xxx_messageInfo_CreateProjectRequest.Size(m) +} +func (m *CreateProjectRequest) XXX_DiscardUnknown() { + xxx_messageInfo_CreateProjectRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_CreateProjectRequest proto.InternalMessageInfo + +func (m *CreateProjectRequest) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +// Response for creation of a project +type CreateProjectResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *CreateProjectResponse) Reset() { *m = CreateProjectResponse{} } +func (m *CreateProjectResponse) String() string { return proto.CompactTextString(m) } +func (*CreateProjectResponse) ProtoMessage() {} +func (*CreateProjectResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_d9be266444105411, []int{13} +} + +func (m *CreateProjectResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_CreateProjectResponse.Unmarshal(m, b) +} +func (m *CreateProjectResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_CreateProjectResponse.Marshal(b, m, deterministic) +} +func (m *CreateProjectResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_CreateProjectResponse.Merge(m, src) +} +func (m *CreateProjectResponse) XXX_Size() int { + return xxx_messageInfo_CreateProjectResponse.Size(m) +} +func (m *CreateProjectResponse) XXX_DiscardUnknown() { + xxx_messageInfo_CreateProjectResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_CreateProjectResponse proto.InternalMessageInfo + +// Request for the archival of a project +type ArchiveProjectRequest struct { + // Name of project to be archived + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ArchiveProjectRequest) Reset() { *m = ArchiveProjectRequest{} } +func (m *ArchiveProjectRequest) String() string { return proto.CompactTextString(m) } +func (*ArchiveProjectRequest) ProtoMessage() {} +func (*ArchiveProjectRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_d9be266444105411, []int{14} +} + +func (m *ArchiveProjectRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ArchiveProjectRequest.Unmarshal(m, b) +} +func (m *ArchiveProjectRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ArchiveProjectRequest.Marshal(b, m, deterministic) +} +func (m *ArchiveProjectRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ArchiveProjectRequest.Merge(m, src) +} +func (m *ArchiveProjectRequest) XXX_Size() int { + return xxx_messageInfo_ArchiveProjectRequest.Size(m) +} +func (m *ArchiveProjectRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ArchiveProjectRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ArchiveProjectRequest proto.InternalMessageInfo + +func (m *ArchiveProjectRequest) GetName() string { + if m != nil { + return m.Name + } + return "" +} + +// Response for archival of a project +type ArchiveProjectResponse struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ArchiveProjectResponse) Reset() { *m = ArchiveProjectResponse{} } +func (m *ArchiveProjectResponse) String() string { return proto.CompactTextString(m) } +func (*ArchiveProjectResponse) ProtoMessage() {} +func (*ArchiveProjectResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_d9be266444105411, []int{15} +} + +func (m *ArchiveProjectResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ArchiveProjectResponse.Unmarshal(m, b) +} +func (m *ArchiveProjectResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ArchiveProjectResponse.Marshal(b, m, deterministic) +} +func (m *ArchiveProjectResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ArchiveProjectResponse.Merge(m, src) +} +func (m *ArchiveProjectResponse) XXX_Size() int { + return xxx_messageInfo_ArchiveProjectResponse.Size(m) +} +func (m *ArchiveProjectResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ArchiveProjectResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ArchiveProjectResponse proto.InternalMessageInfo + +// Request for listing of projects +type ListProjectsRequest struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ListProjectsRequest) Reset() { *m = ListProjectsRequest{} } +func (m *ListProjectsRequest) String() string { return proto.CompactTextString(m) } +func (*ListProjectsRequest) ProtoMessage() {} +func (*ListProjectsRequest) Descriptor() ([]byte, []int) { + return fileDescriptor_d9be266444105411, []int{16} +} + +func (m *ListProjectsRequest) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ListProjectsRequest.Unmarshal(m, b) +} +func (m *ListProjectsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ListProjectsRequest.Marshal(b, m, deterministic) +} +func (m *ListProjectsRequest) XXX_Merge(src proto.Message) { + xxx_messageInfo_ListProjectsRequest.Merge(m, src) +} +func (m *ListProjectsRequest) XXX_Size() int { + return xxx_messageInfo_ListProjectsRequest.Size(m) +} +func (m *ListProjectsRequest) XXX_DiscardUnknown() { + xxx_messageInfo_ListProjectsRequest.DiscardUnknown(m) +} + +var xxx_messageInfo_ListProjectsRequest proto.InternalMessageInfo + +// Response for listing of projects +type ListProjectsResponse struct { + // List of project names (archived projects are filtered out) + Projects []string `protobuf:"bytes,1,rep,name=projects,proto3" json:"projects,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *ListProjectsResponse) Reset() { *m = ListProjectsResponse{} } +func (m *ListProjectsResponse) String() string { return proto.CompactTextString(m) } +func (*ListProjectsResponse) ProtoMessage() {} +func (*ListProjectsResponse) Descriptor() ([]byte, []int) { + return fileDescriptor_d9be266444105411, []int{17} +} + +func (m *ListProjectsResponse) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_ListProjectsResponse.Unmarshal(m, b) +} +func (m *ListProjectsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_ListProjectsResponse.Marshal(b, m, deterministic) +} +func (m *ListProjectsResponse) XXX_Merge(src proto.Message) { + xxx_messageInfo_ListProjectsResponse.Merge(m, src) +} +func (m *ListProjectsResponse) XXX_Size() int { + return xxx_messageInfo_ListProjectsResponse.Size(m) +} +func (m *ListProjectsResponse) XXX_DiscardUnknown() { + xxx_messageInfo_ListProjectsResponse.DiscardUnknown(m) +} + +var xxx_messageInfo_ListProjectsResponse proto.InternalMessageInfo + +func (m *ListProjectsResponse) GetProjects() []string { + if m != nil { + return m.Projects + } + return nil +} + func init() { proto.RegisterEnum("feast.core.ApplyFeatureSetResponse_Status", ApplyFeatureSetResponse_Status_name, ApplyFeatureSetResponse_Status_value) proto.RegisterEnum("feast.core.UpdateStoreResponse_Status", UpdateStoreResponse_Status_name, UpdateStoreResponse_Status_value) @@ -686,52 +932,66 @@ func init() { proto.RegisterType((*GetFeastCoreVersionResponse)(nil), "feast.core.GetFeastCoreVersionResponse") proto.RegisterType((*UpdateStoreRequest)(nil), "feast.core.UpdateStoreRequest") proto.RegisterType((*UpdateStoreResponse)(nil), "feast.core.UpdateStoreResponse") + proto.RegisterType((*CreateProjectRequest)(nil), "feast.core.CreateProjectRequest") + proto.RegisterType((*CreateProjectResponse)(nil), "feast.core.CreateProjectResponse") + proto.RegisterType((*ArchiveProjectRequest)(nil), "feast.core.ArchiveProjectRequest") + proto.RegisterType((*ArchiveProjectResponse)(nil), "feast.core.ArchiveProjectResponse") + proto.RegisterType((*ListProjectsRequest)(nil), "feast.core.ListProjectsRequest") + proto.RegisterType((*ListProjectsResponse)(nil), "feast.core.ListProjectsResponse") } func init() { proto.RegisterFile("feast/core/CoreService.proto", fileDescriptor_d9be266444105411) } var fileDescriptor_d9be266444105411 = []byte{ - // 636 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x55, 0xdd, 0x72, 0xd2, 0x40, - 0x14, 0x36, 0xb5, 0xa5, 0xc3, 0x89, 0xad, 0xb0, 0x28, 0x65, 0x52, 0xac, 0x18, 0x3b, 0x16, 0xbd, - 0x48, 0x66, 0xf0, 0xc2, 0x0b, 0xc5, 0x19, 0xfe, 0x5a, 0x67, 0x74, 0xa0, 0xb3, 0x80, 0xe3, 0xf4, - 0x86, 0x01, 0xba, 0x20, 0xb6, 0x65, 0x63, 0x76, 0xe9, 0x8c, 0x6f, 0xe3, 0x85, 0xef, 0xe1, 0x03, - 0xf8, 0x52, 0x4e, 0xb2, 0x5b, 0xb2, 0x09, 0x21, 0x5c, 0xe8, 0x5d, 0xb2, 0xe7, 0x3b, 0xdf, 0x9e, - 0xfd, 0xce, 0x1f, 0x14, 0x27, 0x64, 0xc8, 0xb8, 0x3d, 0xa6, 0x2e, 0xb1, 0x1b, 0xd4, 0x25, 0x5d, - 0xe2, 0xde, 0xce, 0xc6, 0xc4, 0x72, 0x5c, 0xca, 0x29, 0x02, 0xdf, 0x6a, 0x79, 0x56, 0xe3, 0x50, - 0x41, 0x9e, 0x92, 0x21, 0x5f, 0x78, 0x60, 0x2e, 0x80, 0x46, 0x5e, 0x31, 0x76, 0x39, 0x75, 0x25, - 0x81, 0xd9, 0x84, 0x47, 0x67, 0x84, 0x07, 0x70, 0x4c, 0xbe, 0x2f, 0x08, 0xe3, 0x08, 0xc1, 0xf6, - 0x7c, 0x78, 0x43, 0x0a, 0x5a, 0x49, 0x2b, 0xa7, 0xb1, 0xff, 0x8d, 0x0a, 0xb0, 0x7b, 0x4b, 0x5c, - 0x36, 0xa3, 0xf3, 0xc2, 0x56, 0x49, 0x2b, 0xef, 0xe0, 0xbb, 0x5f, 0xb3, 0x07, 0x8f, 0x23, 0x2c, - 0xcc, 0xa1, 0x73, 0x46, 0xd0, 0x5b, 0xd0, 0x27, 0xe2, 0x74, 0xc0, 0x08, 0xf7, 0xd9, 0xf4, 0x8a, - 0x61, 0x05, 0x51, 0x5b, 0x81, 0x53, 0xd7, 0x21, 0x63, 0x0c, 0x93, 0xe5, 0xbf, 0xf9, 0x5b, 0x83, - 0xfc, 0xa7, 0x19, 0x53, 0x78, 0xd9, 0x5d, 0x78, 0x35, 0x48, 0x4d, 0x66, 0xd7, 0x9c, 0xb8, 0x92, - 0xf2, 0xa5, 0x4a, 0x19, 0xef, 0x63, 0x9d, 0xfa, 0x0e, 0x58, 0x3a, 0x1a, 0x23, 0x48, 0x89, 0x13, - 0x54, 0x86, 0x8c, 0x12, 0xe4, 0x40, 0x79, 0xf7, 0x7e, 0x10, 0x4d, 0xdb, 0x53, 0xc0, 0x82, 0x9c, - 0x8a, 0x54, 0xd5, 0x48, 0xe3, 0x6c, 0x00, 0xfe, 0x2c, 0x75, 0xf9, 0x02, 0x07, 0x2b, 0xc1, 0x48, - 0x65, 0xaa, 0xf0, 0x40, 0xa1, 0x62, 0x05, 0xad, 0x74, 0x7f, 0x83, 0x34, 0x7a, 0xc0, 0xcf, 0x4c, - 0x0a, 0x59, 0x8f, 0xd9, 0x4f, 0xe5, 0x52, 0x95, 0x77, 0x11, 0x55, 0x8e, 0xa3, 0xaa, 0x84, 0xe0, - 0x51, 0x41, 0x8a, 0x4b, 0x41, 0x62, 0x92, 0x6f, 0x56, 0x01, 0xa9, 0x0c, 0xf2, 0x15, 0x27, 0xb0, - 0xc3, 0xbc, 0x13, 0x19, 0x7e, 0x56, 0xbd, 0xd0, 0x87, 0x62, 0x61, 0x37, 0xfb, 0x90, 0xaf, 0x39, - 0xce, 0xf5, 0x8f, 0xd5, 0x4a, 0xfb, 0xa7, 0x12, 0xf9, 0xa3, 0xc1, 0xc1, 0x0a, 0xef, 0x7f, 0xa8, - 0x3d, 0x54, 0x87, 0x14, 0xe3, 0x43, 0xbe, 0x60, 0x7e, 0x72, 0xf7, 0x2b, 0xaf, 0x54, 0xbf, 0x35, - 0x37, 0x5a, 0x5d, 0xdf, 0x03, 0x4b, 0x4f, 0xd3, 0x86, 0x94, 0x38, 0x41, 0x7b, 0x90, 0x6e, 0x77, - 0x06, 0x8d, 0x0f, 0xb5, 0xf6, 0x59, 0x2b, 0x73, 0x0f, 0xe9, 0xb0, 0xdb, 0xc0, 0xad, 0x5a, 0xaf, - 0xd5, 0xcc, 0x68, 0x28, 0x0d, 0x3b, 0x2d, 0x8c, 0x3b, 0x38, 0xb3, 0x65, 0x16, 0xc1, 0x10, 0x6d, - 0xc4, 0xb8, 0xd7, 0xea, 0xb2, 0x8a, 0xa4, 0x50, 0xe6, 0x1b, 0x38, 0x8c, 0xb5, 0xca, 0xe7, 0x2a, - 0xdd, 0x29, 0xf2, 0xb6, 0xec, 0xce, 0x2a, 0xa0, 0xbe, 0x73, 0x39, 0xe4, 0x44, 0x64, 0x44, 0xea, - 0xae, 0xa4, 0x4e, 0x4b, 0x4c, 0xdd, 0x2f, 0x0d, 0x72, 0x21, 0xff, 0xd5, 0xdc, 0x27, 0x12, 0xa0, - 0xf7, 0x11, 0x2d, 0x5f, 0xa8, 0xc8, 0x18, 0xe6, 0xa8, 0x8e, 0xc7, 0x09, 0x3a, 0xf6, 0xcf, 0x9b, - 0x42, 0xc7, 0xca, 0xcf, 0x6d, 0xd0, 0x95, 0x01, 0x89, 0x26, 0x90, 0x8b, 0x91, 0x0b, 0x85, 0x2e, - 0x5f, 0xaf, 0xb6, 0x71, 0xb2, 0x11, 0x27, 0x65, 0xe8, 0xc1, 0x5e, 0x68, 0xf6, 0xa1, 0xd2, 0xaa, - 0x67, 0xb8, 0xe4, 0x8d, 0x67, 0x09, 0x08, 0xc9, 0x7a, 0x01, 0x0f, 0x23, 0x93, 0x03, 0x99, 0x9b, - 0x67, 0x9c, 0xf1, 0x3c, 0x11, 0x23, 0xb9, 0x3f, 0x02, 0x04, 0xad, 0x8c, 0x9e, 0x24, 0x0e, 0x09, - 0xe3, 0x68, 0x9d, 0x39, 0x08, 0x34, 0xd2, 0x0e, 0xe1, 0x40, 0xe3, 0xbb, 0x3e, 0x1c, 0xe8, 0xba, - 0x0e, 0x6e, 0x83, 0xae, 0x94, 0x07, 0x3a, 0x5a, 0x5b, 0x37, 0x82, 0xf3, 0xe9, 0x86, 0xba, 0xaa, - 0x77, 0x40, 0xd9, 0x97, 0xf5, 0x8c, 0x52, 0x2d, 0xe7, 0xde, 0x32, 0xbc, 0xb0, 0xa7, 0x33, 0xfe, - 0x75, 0x31, 0xb2, 0xc6, 0xf4, 0xc6, 0x9e, 0xd2, 0x6f, 0xe4, 0xca, 0x16, 0x5b, 0x93, 0x5d, 0x5e, - 0xd9, 0x53, 0x6a, 0xfb, 0x1b, 0x93, 0xd9, 0xc1, 0x26, 0x1d, 0xa5, 0xfc, 0xa3, 0xd7, 0x7f, 0x03, - 0x00, 0x00, 0xff, 0xff, 0xc7, 0xc1, 0x10, 0xf1, 0xa5, 0x07, 0x00, 0x00, + // 762 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x56, 0xef, 0x4e, 0x13, 0x4f, + 0x14, 0xfd, 0x2d, 0xfc, 0x28, 0xf6, 0x16, 0xb0, 0x4c, 0x69, 0x69, 0x16, 0x84, 0x3a, 0x12, 0x41, + 0x4c, 0x76, 0x93, 0xfa, 0x81, 0x98, 0x88, 0x49, 0x29, 0x05, 0x13, 0x4d, 0x29, 0x0b, 0x68, 0xc2, + 0x07, 0x49, 0x29, 0x53, 0x28, 0xff, 0xa6, 0xee, 0x4c, 0x49, 0x4c, 0x7c, 0x1a, 0xe3, 0xbb, 0xf8, + 0x0c, 0xbe, 0x8d, 0xd9, 0x9d, 0x69, 0x77, 0x66, 0xba, 0xdd, 0x1a, 0xfd, 0xd6, 0xbd, 0x73, 0xee, + 0xd9, 0xbb, 0xe7, 0xde, 0x73, 0x3b, 0xb0, 0xdc, 0x26, 0x4d, 0xc6, 0xdd, 0x16, 0xf5, 0x89, 0x5b, + 0xa5, 0x3e, 0x39, 0x22, 0xfe, 0x43, 0xa7, 0x45, 0x9c, 0xae, 0x4f, 0x39, 0x45, 0x10, 0x9e, 0x3a, + 0xc1, 0xa9, 0xbd, 0xa4, 0x20, 0xf7, 0x48, 0x93, 0xf7, 0x02, 0x30, 0x17, 0x40, 0xbb, 0xa0, 0x1c, + 0x1e, 0x71, 0xea, 0x4b, 0x02, 0xfc, 0x19, 0x16, 0xf6, 0x09, 0x8f, 0xe0, 0x1e, 0xf9, 0xd2, 0x23, + 0x8c, 0xa3, 0x22, 0x4c, 0x77, 0x7d, 0x7a, 0x4d, 0x5a, 0xbc, 0x38, 0x59, 0xb2, 0x36, 0xd2, 0x5e, + 0xff, 0x11, 0x21, 0xf8, 0xff, 0xbe, 0x79, 0x47, 0x8a, 0x56, 0x18, 0x0e, 0x7f, 0x07, 0xe8, 0x07, + 0xe2, 0xb3, 0x0e, 0xbd, 0x2f, 0x4e, 0x94, 0xac, 0x8d, 0x29, 0xaf, 0xff, 0x88, 0x1b, 0x90, 0x37, + 0xf8, 0x59, 0x97, 0xde, 0x33, 0x82, 0xb6, 0x20, 0xd3, 0x16, 0xd1, 0x33, 0x46, 0x78, 0xc8, 0x96, + 0x29, 0x17, 0x9c, 0xe8, 0x7b, 0x1c, 0x25, 0x09, 0xda, 0x83, 0xdf, 0xf8, 0x97, 0x05, 0x85, 0x0f, + 0x1d, 0xa6, 0x70, 0xb2, 0x7e, 0xd1, 0x15, 0x48, 0xb5, 0x3b, 0xb7, 0x9c, 0xf8, 0x92, 0xee, 0x85, + 0x4a, 0x17, 0x9f, 0xe3, 0xec, 0x85, 0x09, 0x9e, 0x4c, 0xb4, 0xbf, 0x41, 0x4a, 0x44, 0x12, 0x14, + 0xd8, 0x80, 0xac, 0x52, 0xfa, 0x99, 0xa2, 0xc6, 0x5c, 0x54, 0x67, 0x3d, 0xd0, 0xc5, 0x81, 0x9c, + 0x8a, 0x54, 0x35, 0x4a, 0x7b, 0xf3, 0x11, 0xf8, 0xa3, 0x54, 0xeb, 0x18, 0x16, 0x87, 0xca, 0x94, + 0x7a, 0xbd, 0x86, 0x19, 0x85, 0x8a, 0x15, 0xad, 0xd2, 0x64, 0x82, 0x60, 0x99, 0x88, 0x9b, 0x61, + 0x0a, 0xf3, 0x01, 0x6b, 0xd8, 0xf6, 0x81, 0x56, 0x6f, 0x0c, 0xad, 0xd6, 0x4c, 0xad, 0x34, 0xb8, + 0x29, 0xd3, 0xf2, 0x40, 0xa6, 0x98, 0x71, 0xc0, 0xdb, 0x80, 0x54, 0x06, 0xf9, 0x05, 0xeb, 0x30, + 0xc5, 0x82, 0x88, 0x2c, 0x7d, 0x5e, 0x7d, 0x61, 0x08, 0xf5, 0xc4, 0x39, 0x3e, 0x84, 0x42, 0xa5, + 0xdb, 0xbd, 0xfd, 0x3a, 0x3c, 0x95, 0x7f, 0x3d, 0x34, 0x3f, 0x2d, 0x58, 0x1c, 0xe2, 0xfc, 0xc7, + 0x49, 0x44, 0x3b, 0x90, 0x62, 0xbc, 0xc9, 0x7b, 0x2c, 0x6c, 0xe8, 0x5c, 0x79, 0x53, 0xcd, 0x19, + 0xf1, 0x36, 0xe7, 0x28, 0xcc, 0xf0, 0x64, 0x26, 0x76, 0x21, 0x25, 0x22, 0x68, 0x16, 0xd2, 0xf5, + 0x83, 0xb3, 0xea, 0xbb, 0x4a, 0x7d, 0xbf, 0x96, 0xfd, 0x0f, 0x65, 0x60, 0xba, 0xea, 0xd5, 0x2a, + 0xc7, 0xb5, 0xdd, 0xac, 0x85, 0xd2, 0x30, 0x55, 0xf3, 0xbc, 0x03, 0x2f, 0x3b, 0x81, 0x97, 0xc1, + 0x16, 0x86, 0x62, 0x3c, 0x58, 0x07, 0x72, 0x72, 0xa4, 0x40, 0x78, 0x0b, 0x96, 0x62, 0x4f, 0xe5, + 0xa7, 0x2a, 0x3e, 0x15, 0xfd, 0x1a, 0xf8, 0x74, 0x1b, 0xd0, 0x49, 0xf7, 0xa2, 0xc9, 0x89, 0xe8, + 0x84, 0xd4, 0x5b, 0x69, 0x99, 0x95, 0xd8, 0xb2, 0x1f, 0x16, 0xe4, 0xb4, 0xfc, 0xe1, 0x9e, 0x27, + 0x12, 0xa0, 0xb7, 0x86, 0x96, 0xcf, 0x55, 0x64, 0x0c, 0xb3, 0xa9, 0xe3, 0x5a, 0x82, 0x8e, 0x27, + 0x8d, 0x5d, 0xa1, 0x23, 0xde, 0x84, 0x85, 0xaa, 0x4f, 0x9a, 0x9c, 0x34, 0x84, 0x95, 0xfb, 0xdf, + 0x19, 0x37, 0xc4, 0x8b, 0x90, 0x37, 0xb0, 0xe2, 0xcd, 0xf8, 0x25, 0xe4, 0x2b, 0x7e, 0xeb, 0xaa, + 0xf3, 0xf0, 0x27, 0x2c, 0x45, 0x28, 0x98, 0x60, 0x49, 0x93, 0x87, 0x5c, 0x60, 0x12, 0x19, 0xee, + 0x1b, 0x0d, 0x97, 0x61, 0x41, 0x0f, 0x4b, 0x25, 0x6d, 0x78, 0x24, 0xf7, 0x8f, 0xf0, 0x7e, 0xda, + 0x1b, 0x3c, 0x97, 0xbf, 0xa7, 0x20, 0xa3, 0xfc, 0x37, 0xa0, 0x36, 0xe4, 0x62, 0xa6, 0x00, 0x69, + 0x9a, 0x8e, 0x1e, 0x22, 0x7b, 0x7d, 0x2c, 0x4e, 0xd6, 0x74, 0x0c, 0xb3, 0xda, 0x72, 0x47, 0xa5, + 0xe1, 0x4c, 0xdd, 0xc1, 0xf6, 0xd3, 0x04, 0x84, 0x64, 0x3d, 0x85, 0xc7, 0xc6, 0x12, 0x44, 0x78, + 0xfc, 0x22, 0xb7, 0x9f, 0x25, 0x62, 0x24, 0xf7, 0x7b, 0x80, 0x68, 0x33, 0xa1, 0x27, 0x89, 0x3b, + 0xcf, 0x5e, 0x19, 0x75, 0x1c, 0x15, 0x6a, 0xb8, 0x5c, 0x2f, 0x34, 0x7e, 0x89, 0xe9, 0x85, 0x8e, + 0x5a, 0x4a, 0x75, 0xc8, 0x28, 0x53, 0x8f, 0x56, 0x46, 0xda, 0x41, 0x70, 0xae, 0x8e, 0xb1, 0x4b, + 0xd0, 0x2a, 0x6d, 0x9a, 0xf5, 0x56, 0xc5, 0x99, 0x42, 0x6f, 0x55, 0xac, 0x15, 0xd0, 0x27, 0x98, + 0xd3, 0xa7, 0x1b, 0x69, 0x49, 0xb1, 0x36, 0xb1, 0x71, 0x12, 0x44, 0x12, 0x1f, 0xc2, 0x8c, 0xea, + 0x02, 0xb4, 0x6a, 0xb6, 0xc2, 0xb0, 0x8d, 0x5d, 0x1a, 0x0d, 0x10, 0x94, 0x3b, 0x07, 0xa0, 0x5c, + 0x96, 0x76, 0xb2, 0x8a, 0x5f, 0x1a, 0xc1, 0x4d, 0xe8, 0xd4, 0xbd, 0xec, 0xf0, 0xab, 0xde, 0xb9, + 0xd3, 0xa2, 0x77, 0xee, 0x25, 0xbd, 0x26, 0x37, 0xae, 0xb8, 0x32, 0xb1, 0x8b, 0x1b, 0xf7, 0x92, + 0xba, 0xe1, 0x75, 0x89, 0xb9, 0xd1, 0x35, 0xea, 0x3c, 0x15, 0x86, 0x5e, 0xfd, 0x0e, 0x00, 0x00, + 0xff, 0xff, 0xe2, 0x7d, 0x9e, 0xca, 0xa2, 0x09, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. @@ -774,6 +1034,17 @@ type CoreServiceClient interface { // If the changes are valid, core will return the given store configuration in response, and // start or update the necessary feature population jobs for the updated store. UpdateStore(ctx context.Context, in *UpdateStoreRequest, opts ...grpc.CallOption) (*UpdateStoreResponse, error) + // Creates a project. Projects serve as namespaces within which resources like features will be + // created. Both feature set names as well as field names must be unique within a project. Project + // names themselves must be globally unique. + CreateProject(ctx context.Context, in *CreateProjectRequest, opts ...grpc.CallOption) (*CreateProjectResponse, error) + // Archives a project. Archived projects will continue to exist and function, but won't be visible + // through the Core API. Any existing ingestion or serving requests will continue to function, + // but will result in warning messages being logged. It is not possible to unarchive a project + // through the Core API + ArchiveProject(ctx context.Context, in *ArchiveProjectRequest, opts ...grpc.CallOption) (*ArchiveProjectResponse, error) + // Lists all projects active projects. + ListProjects(ctx context.Context, in *ListProjectsRequest, opts ...grpc.CallOption) (*ListProjectsResponse, error) } type coreServiceClient struct { @@ -838,6 +1109,33 @@ func (c *coreServiceClient) UpdateStore(ctx context.Context, in *UpdateStoreRequ return out, nil } +func (c *coreServiceClient) CreateProject(ctx context.Context, in *CreateProjectRequest, opts ...grpc.CallOption) (*CreateProjectResponse, error) { + out := new(CreateProjectResponse) + err := c.cc.Invoke(ctx, "/feast.core.CoreService/CreateProject", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreServiceClient) ArchiveProject(ctx context.Context, in *ArchiveProjectRequest, opts ...grpc.CallOption) (*ArchiveProjectResponse, error) { + out := new(ArchiveProjectResponse) + err := c.cc.Invoke(ctx, "/feast.core.CoreService/ArchiveProject", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + +func (c *coreServiceClient) ListProjects(ctx context.Context, in *ListProjectsRequest, opts ...grpc.CallOption) (*ListProjectsResponse, error) { + out := new(ListProjectsResponse) + err := c.cc.Invoke(ctx, "/feast.core.CoreService/ListProjects", in, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // CoreServiceServer is the server API for CoreService service. type CoreServiceServer interface { // Retrieve version information about this Feast deployment @@ -868,6 +1166,17 @@ type CoreServiceServer interface { // If the changes are valid, core will return the given store configuration in response, and // start or update the necessary feature population jobs for the updated store. UpdateStore(context.Context, *UpdateStoreRequest) (*UpdateStoreResponse, error) + // Creates a project. Projects serve as namespaces within which resources like features will be + // created. Both feature set names as well as field names must be unique within a project. Project + // names themselves must be globally unique. + CreateProject(context.Context, *CreateProjectRequest) (*CreateProjectResponse, error) + // Archives a project. Archived projects will continue to exist and function, but won't be visible + // through the Core API. Any existing ingestion or serving requests will continue to function, + // but will result in warning messages being logged. It is not possible to unarchive a project + // through the Core API + ArchiveProject(context.Context, *ArchiveProjectRequest) (*ArchiveProjectResponse, error) + // Lists all projects active projects. + ListProjects(context.Context, *ListProjectsRequest) (*ListProjectsResponse, error) } // UnimplementedCoreServiceServer can be embedded to have forward compatible implementations. @@ -892,6 +1201,15 @@ func (*UnimplementedCoreServiceServer) ApplyFeatureSet(ctx context.Context, req func (*UnimplementedCoreServiceServer) UpdateStore(ctx context.Context, req *UpdateStoreRequest) (*UpdateStoreResponse, error) { return nil, status.Errorf(codes.Unimplemented, "method UpdateStore not implemented") } +func (*UnimplementedCoreServiceServer) CreateProject(ctx context.Context, req *CreateProjectRequest) (*CreateProjectResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method CreateProject not implemented") +} +func (*UnimplementedCoreServiceServer) ArchiveProject(ctx context.Context, req *ArchiveProjectRequest) (*ArchiveProjectResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ArchiveProject not implemented") +} +func (*UnimplementedCoreServiceServer) ListProjects(ctx context.Context, req *ListProjectsRequest) (*ListProjectsResponse, error) { + return nil, status.Errorf(codes.Unimplemented, "method ListProjects not implemented") +} func RegisterCoreServiceServer(s *grpc.Server, srv CoreServiceServer) { s.RegisterService(&_CoreService_serviceDesc, srv) @@ -1005,6 +1323,60 @@ func _CoreService_UpdateStore_Handler(srv interface{}, ctx context.Context, dec return interceptor(ctx, in, info, handler) } +func _CoreService_CreateProject_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(CreateProjectRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServiceServer).CreateProject(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/feast.core.CoreService/CreateProject", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServiceServer).CreateProject(ctx, req.(*CreateProjectRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _CoreService_ArchiveProject_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ArchiveProjectRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServiceServer).ArchiveProject(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/feast.core.CoreService/ArchiveProject", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServiceServer).ArchiveProject(ctx, req.(*ArchiveProjectRequest)) + } + return interceptor(ctx, in, info, handler) +} + +func _CoreService_ListProjects_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(ListProjectsRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(CoreServiceServer).ListProjects(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: "/feast.core.CoreService/ListProjects", + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(CoreServiceServer).ListProjects(ctx, req.(*ListProjectsRequest)) + } + return interceptor(ctx, in, info, handler) +} + var _CoreService_serviceDesc = grpc.ServiceDesc{ ServiceName: "feast.core.CoreService", HandlerType: (*CoreServiceServer)(nil), @@ -1033,6 +1405,18 @@ var _CoreService_serviceDesc = grpc.ServiceDesc{ MethodName: "UpdateStore", Handler: _CoreService_UpdateStore_Handler, }, + { + MethodName: "CreateProject", + Handler: _CoreService_CreateProject_Handler, + }, + { + MethodName: "ArchiveProject", + Handler: _CoreService_ArchiveProject_Handler, + }, + { + MethodName: "ListProjects", + Handler: _CoreService_ListProjects_Handler, + }, }, Streams: []grpc.StreamDesc{}, Metadata: "feast/core/CoreService.proto", diff --git a/sdk/go/protos/feast/core/FeatureSet.pb.go b/sdk/go/protos/feast/core/FeatureSet.pb.go index 79cbcaa94b1..26d9d9c4f7e 100644 --- a/sdk/go/protos/feast/core/FeatureSet.pb.go +++ b/sdk/go/protos/feast/core/FeatureSet.pb.go @@ -8,6 +8,7 @@ import ( types "github.com/gojek/feast/sdk/go/protos/feast/types" proto "github.com/golang/protobuf/proto" duration "github.com/golang/protobuf/ptypes/duration" + timestamp "github.com/golang/protobuf/ptypes/timestamp" math "math" ) @@ -22,10 +23,89 @@ var _ = math.Inf // proto package needs to be updated. const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package +type FeatureSetStatus int32 + +const ( + FeatureSetStatus_STATUS_INVALID FeatureSetStatus = 0 + FeatureSetStatus_STATUS_PENDING FeatureSetStatus = 1 + FeatureSetStatus_STATUS_READY FeatureSetStatus = 2 +) + +var FeatureSetStatus_name = map[int32]string{ + 0: "STATUS_INVALID", + 1: "STATUS_PENDING", + 2: "STATUS_READY", +} + +var FeatureSetStatus_value = map[string]int32{ + "STATUS_INVALID": 0, + "STATUS_PENDING": 1, + "STATUS_READY": 2, +} + +func (x FeatureSetStatus) String() string { + return proto.EnumName(FeatureSetStatus_name, int32(x)) +} + +func (FeatureSetStatus) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_972fbd278ac19c0c, []int{0} +} + +type FeatureSet struct { + // User-specified specifications of this feature set. + Spec *FeatureSetSpec `protobuf:"bytes,1,opt,name=spec,proto3" json:"spec,omitempty"` + // System-populated metadata for this feature set. + Meta *FeatureSetMeta `protobuf:"bytes,2,opt,name=meta,proto3" json:"meta,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *FeatureSet) Reset() { *m = FeatureSet{} } +func (m *FeatureSet) String() string { return proto.CompactTextString(m) } +func (*FeatureSet) ProtoMessage() {} +func (*FeatureSet) Descriptor() ([]byte, []int) { + return fileDescriptor_972fbd278ac19c0c, []int{0} +} + +func (m *FeatureSet) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_FeatureSet.Unmarshal(m, b) +} +func (m *FeatureSet) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_FeatureSet.Marshal(b, m, deterministic) +} +func (m *FeatureSet) XXX_Merge(src proto.Message) { + xxx_messageInfo_FeatureSet.Merge(m, src) +} +func (m *FeatureSet) XXX_Size() int { + return xxx_messageInfo_FeatureSet.Size(m) +} +func (m *FeatureSet) XXX_DiscardUnknown() { + xxx_messageInfo_FeatureSet.DiscardUnknown(m) +} + +var xxx_messageInfo_FeatureSet proto.InternalMessageInfo + +func (m *FeatureSet) GetSpec() *FeatureSetSpec { + if m != nil { + return m.Spec + } + return nil +} + +func (m *FeatureSet) GetMeta() *FeatureSetMeta { + if m != nil { + return m.Meta + } + return nil +} + type FeatureSetSpec struct { - // Name of the featureSet. Must be unique. + // Name of project that this feature set belongs to. + Project string `protobuf:"bytes,7,opt,name=project,proto3" json:"project,omitempty"` + // Name of the feature set. Must be unique. Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` - // FeatureSet version. + // Feature set version. Version int32 `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"` // List of entities contained within this featureSet. // This allows the feature to be used during joins between feature sets. @@ -50,7 +130,7 @@ func (m *FeatureSetSpec) Reset() { *m = FeatureSetSpec{} } func (m *FeatureSetSpec) String() string { return proto.CompactTextString(m) } func (*FeatureSetSpec) ProtoMessage() {} func (*FeatureSetSpec) Descriptor() ([]byte, []int) { - return fileDescriptor_972fbd278ac19c0c, []int{0} + return fileDescriptor_972fbd278ac19c0c, []int{1} } func (m *FeatureSetSpec) XXX_Unmarshal(b []byte) error { @@ -71,6 +151,13 @@ func (m *FeatureSetSpec) XXX_DiscardUnknown() { var xxx_messageInfo_FeatureSetSpec proto.InternalMessageInfo +func (m *FeatureSetSpec) GetProject() string { + if m != nil { + return m.Project + } + return "" +} + func (m *FeatureSetSpec) GetName() string { if m != nil { return m.Name @@ -127,7 +214,7 @@ func (m *EntitySpec) Reset() { *m = EntitySpec{} } func (m *EntitySpec) String() string { return proto.CompactTextString(m) } func (*EntitySpec) ProtoMessage() {} func (*EntitySpec) Descriptor() ([]byte, []int) { - return fileDescriptor_972fbd278ac19c0c, []int{1} + return fileDescriptor_972fbd278ac19c0c, []int{2} } func (m *EntitySpec) XXX_Unmarshal(b []byte) error { @@ -176,7 +263,7 @@ func (m *FeatureSpec) Reset() { *m = FeatureSpec{} } func (m *FeatureSpec) String() string { return proto.CompactTextString(m) } func (*FeatureSpec) ProtoMessage() {} func (*FeatureSpec) Descriptor() ([]byte, []int) { - return fileDescriptor_972fbd278ac19c0c, []int{2} + return fileDescriptor_972fbd278ac19c0c, []int{3} } func (m *FeatureSpec) XXX_Unmarshal(b []byte) error { @@ -211,37 +298,103 @@ func (m *FeatureSpec) GetValueType() types.ValueType_Enum { return types.ValueType_INVALID } +type FeatureSetMeta struct { + // Created timestamp of this specific feature set. + CreatedTimestamp *timestamp.Timestamp `protobuf:"bytes,1,opt,name=created_timestamp,json=createdTimestamp,proto3" json:"created_timestamp,omitempty"` + // Status of the feature set. + // Used to indicate whether the feature set is ready for consumption or ingestion. + // Currently supports 2 states: + // 1) STATUS_PENDING - A feature set is in pending state if Feast has not spun up the jobs + // necessary to push rows for this feature set to stores subscribing to this feature set. + // 2) STATUS_READY - Feature set is ready for consumption or ingestion + Status FeatureSetStatus `protobuf:"varint,2,opt,name=status,proto3,enum=feast.core.FeatureSetStatus" json:"status,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *FeatureSetMeta) Reset() { *m = FeatureSetMeta{} } +func (m *FeatureSetMeta) String() string { return proto.CompactTextString(m) } +func (*FeatureSetMeta) ProtoMessage() {} +func (*FeatureSetMeta) Descriptor() ([]byte, []int) { + return fileDescriptor_972fbd278ac19c0c, []int{4} +} + +func (m *FeatureSetMeta) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_FeatureSetMeta.Unmarshal(m, b) +} +func (m *FeatureSetMeta) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_FeatureSetMeta.Marshal(b, m, deterministic) +} +func (m *FeatureSetMeta) XXX_Merge(src proto.Message) { + xxx_messageInfo_FeatureSetMeta.Merge(m, src) +} +func (m *FeatureSetMeta) XXX_Size() int { + return xxx_messageInfo_FeatureSetMeta.Size(m) +} +func (m *FeatureSetMeta) XXX_DiscardUnknown() { + xxx_messageInfo_FeatureSetMeta.DiscardUnknown(m) +} + +var xxx_messageInfo_FeatureSetMeta proto.InternalMessageInfo + +func (m *FeatureSetMeta) GetCreatedTimestamp() *timestamp.Timestamp { + if m != nil { + return m.CreatedTimestamp + } + return nil +} + +func (m *FeatureSetMeta) GetStatus() FeatureSetStatus { + if m != nil { + return m.Status + } + return FeatureSetStatus_STATUS_INVALID +} + func init() { + proto.RegisterEnum("feast.core.FeatureSetStatus", FeatureSetStatus_name, FeatureSetStatus_value) + proto.RegisterType((*FeatureSet)(nil), "feast.core.FeatureSet") proto.RegisterType((*FeatureSetSpec)(nil), "feast.core.FeatureSetSpec") proto.RegisterType((*EntitySpec)(nil), "feast.core.EntitySpec") proto.RegisterType((*FeatureSpec)(nil), "feast.core.FeatureSpec") + proto.RegisterType((*FeatureSetMeta)(nil), "feast.core.FeatureSetMeta") } func init() { proto.RegisterFile("feast/core/FeatureSet.proto", fileDescriptor_972fbd278ac19c0c) } var fileDescriptor_972fbd278ac19c0c = []byte{ - // 357 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x52, 0x4d, 0x6f, 0xe2, 0x30, - 0x10, 0x55, 0xf8, 0x08, 0x30, 0x48, 0xac, 0xe4, 0xc3, 0x92, 0x5d, 0xa4, 0x55, 0xc4, 0x29, 0xda, - 0x83, 0x2d, 0x85, 0x5b, 0x6f, 0x45, 0x6d, 0x8f, 0x55, 0x15, 0xaa, 0x1e, 0xaa, 0x56, 0xc8, 0x84, - 0x21, 0x4d, 0x21, 0x71, 0x14, 0x3b, 0x08, 0x7e, 0x41, 0xff, 0x76, 0x15, 0x1b, 0x37, 0x39, 0xf4, - 0xd8, 0x9b, 0xc7, 0xef, 0xcd, 0xc7, 0x7b, 0x33, 0x30, 0xdb, 0x21, 0x97, 0x8a, 0xc5, 0xa2, 0x44, - 0x76, 0x87, 0x5c, 0x55, 0x25, 0xae, 0x50, 0xd1, 0xa2, 0x14, 0x4a, 0x10, 0xd0, 0x20, 0xad, 0xc1, - 0xbf, 0x53, 0x43, 0x54, 0xe7, 0x02, 0x25, 0x7b, 0xe2, 0x87, 0x0a, 0x0d, 0xc9, 0x02, 0xba, 0xc2, - 0x4a, 0x54, 0x65, 0x6c, 0x81, 0x7f, 0x89, 0x10, 0xc9, 0x01, 0x99, 0x8e, 0x36, 0xd5, 0x8e, 0x6d, - 0xab, 0x92, 0xab, 0x54, 0xe4, 0x06, 0x9f, 0x7f, 0x74, 0x60, 0xd2, 0xb4, 0x5c, 0x15, 0x18, 0x13, - 0x02, 0xbd, 0x9c, 0x67, 0xe8, 0x39, 0xbe, 0x13, 0x8c, 0x22, 0xfd, 0x26, 0x1e, 0x0c, 0x8e, 0x58, - 0xca, 0x54, 0xe4, 0x5e, 0xc7, 0x77, 0x82, 0x7e, 0x64, 0x43, 0x12, 0xc2, 0x10, 0x73, 0x95, 0xaa, - 0x14, 0xa5, 0xd7, 0xf5, 0xbb, 0xc1, 0x38, 0xfc, 0x4d, 0x9b, 0x89, 0xe9, 0x6d, 0x8d, 0x9d, 0xeb, - 0xba, 0xd1, 0x17, 0x8f, 0x2c, 0x60, 0xb8, 0x33, 0x3d, 0xa5, 0xd7, 0xd3, 0x39, 0xd3, 0x76, 0x8e, - 0x9d, 0x47, 0x27, 0x59, 0x22, 0x09, 0x61, 0x90, 0xf1, 0xd3, 0x9a, 0x27, 0xe8, 0xf5, 0x7d, 0x27, - 0x18, 0x87, 0x7f, 0xa8, 0xd1, 0x46, 0xad, 0x36, 0x7a, 0x73, 0xd1, 0x16, 0xb9, 0x19, 0x3f, 0x5d, - 0x27, 0x48, 0xfe, 0x83, 0x2b, 0xb5, 0x1b, 0x9e, 0xab, 0x53, 0x48, 0xbb, 0x8d, 0xf1, 0x29, 0xba, - 0x30, 0xe6, 0x2f, 0x00, 0xcd, 0xb0, 0xdf, 0x9a, 0x70, 0x05, 0x70, 0xac, 0x3d, 0x5f, 0xd7, 0xfe, - 0x6b, 0x1f, 0x26, 0xe1, 0xec, 0x52, 0x51, 0xaf, 0x84, 0xea, 0x95, 0x3c, 0x9e, 0x8b, 0x5a, 0x77, - 0x95, 0x45, 0xa3, 0xa3, 0x8d, 0xe7, 0xaf, 0x30, 0x6e, 0xc9, 0xfa, 0xe9, 0xf2, 0xcb, 0x7b, 0x68, - 0x9d, 0xc9, 0xf2, 0x57, 0xb3, 0xd1, 0x87, 0xda, 0x9b, 0x67, 0x96, 0xa4, 0xea, 0xad, 0xda, 0xd0, - 0x58, 0x64, 0x2c, 0x11, 0xef, 0xb8, 0x67, 0xe6, 0x5e, 0xe4, 0x76, 0xcf, 0x12, 0x61, 0x8e, 0x43, - 0xb2, 0xe6, 0x86, 0x36, 0xae, 0xfe, 0x5a, 0x7c, 0x06, 0x00, 0x00, 0xff, 0xff, 0xdb, 0x96, 0x15, - 0x04, 0x9a, 0x02, 0x00, 0x00, + // 510 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x93, 0x4f, 0x6f, 0xda, 0x30, + 0x18, 0xc6, 0x07, 0xa5, 0x50, 0x5e, 0x26, 0x96, 0xf9, 0xb0, 0x66, 0xed, 0xb4, 0x21, 0x4e, 0xa8, + 0x07, 0x5b, 0x4a, 0x77, 0xda, 0x8d, 0x0a, 0x56, 0x21, 0x75, 0xa8, 0x72, 0x58, 0xa5, 0x4d, 0x9b, + 0x90, 0x09, 0x2f, 0x59, 0x5a, 0x82, 0xa3, 0xd8, 0x41, 0xe5, 0x53, 0xec, 0x33, 0xec, 0x9b, 0x4e, + 0x71, 0x12, 0x92, 0xa1, 0x6e, 0xa7, 0xdd, 0x62, 0x3f, 0x3f, 0xbf, 0x79, 0xde, 0x7f, 0x70, 0xbe, + 0x42, 0xa1, 0x34, 0xf3, 0x64, 0x8c, 0xec, 0x23, 0x0a, 0x9d, 0xc4, 0xe8, 0xa2, 0xa6, 0x51, 0x2c, + 0xb5, 0x24, 0x60, 0x44, 0x9a, 0x8a, 0x67, 0xa7, 0x19, 0xa8, 0x77, 0x11, 0x2a, 0x76, 0x27, 0xd6, + 0x09, 0x66, 0x50, 0x21, 0x98, 0x08, 0xae, 0x4c, 0x62, 0xaf, 0x10, 0xde, 0xfa, 0x52, 0xfa, 0x6b, + 0x64, 0xe6, 0xb4, 0x48, 0x56, 0x6c, 0x99, 0xc4, 0x42, 0x07, 0x72, 0x93, 0xeb, 0xef, 0x0e, 0x75, + 0x1d, 0x84, 0xa8, 0xb4, 0x08, 0xa3, 0x0c, 0xe8, 0xaf, 0x01, 0x4a, 0x4b, 0x84, 0x42, 0x43, 0x45, + 0xe8, 0xd9, 0xb5, 0x5e, 0x6d, 0xd0, 0x71, 0xce, 0x68, 0xe9, 0x8d, 0x96, 0x94, 0x1b, 0xa1, 0xc7, + 0x0d, 0x97, 0xf2, 0x21, 0x6a, 0x61, 0xd7, 0xff, 0xc5, 0x7f, 0x42, 0x2d, 0xb8, 0xe1, 0xfa, 0xbf, + 0xea, 0xd0, 0xfd, 0x33, 0x10, 0xb1, 0xa1, 0x15, 0xc5, 0xf2, 0x1e, 0x3d, 0x6d, 0xb7, 0x7a, 0xb5, + 0x41, 0x9b, 0x17, 0x47, 0x42, 0xa0, 0xb1, 0x11, 0x21, 0x1a, 0x33, 0x6d, 0x6e, 0xbe, 0x53, 0x7a, + 0x8b, 0xb1, 0x0a, 0xe4, 0xc6, 0xfc, 0xf3, 0x98, 0x17, 0x47, 0xe2, 0xc0, 0x09, 0x6e, 0x74, 0xa0, + 0x03, 0x54, 0xf6, 0x51, 0xef, 0x68, 0xd0, 0x71, 0x5e, 0x55, 0xed, 0x8c, 0x53, 0x6d, 0x67, 0xac, + 0xef, 0x39, 0x72, 0x09, 0x27, 0xab, 0xcc, 0x8d, 0xb2, 0x1b, 0xe6, 0xcd, 0xe9, 0x53, 0x29, 0x98, + 0x47, 0x05, 0x48, 0x1c, 0x68, 0x85, 0xe2, 0x71, 0x2e, 0x7c, 0xb4, 0x8f, 0x4d, 0xda, 0xaf, 0x69, + 0x56, 0x64, 0x5a, 0x14, 0x99, 0x8e, 0xf2, 0x26, 0xf0, 0x66, 0x28, 0x1e, 0x87, 0x3e, 0x92, 0x0b, + 0x68, 0x2a, 0xd3, 0x36, 0xbb, 0x69, 0x9e, 0x90, 0xea, 0x6f, 0xb2, 0x86, 0xf2, 0x9c, 0xe8, 0x7f, + 0x03, 0x28, 0xcd, 0x3e, 0x59, 0x84, 0x0f, 0x00, 0xdb, 0x74, 0x38, 0xe6, 0xe9, 0xa0, 0x98, 0x3a, + 0x74, 0x9d, 0xf3, 0x3c, 0xa2, 0x99, 0x1d, 0x6a, 0x66, 0x67, 0xb6, 0x8b, 0xd2, 0xbc, 0x93, 0x90, + 0xb7, 0xb7, 0xc5, 0xb9, 0xff, 0x1d, 0x3a, 0x95, 0xb4, 0xfe, 0x7b, 0xf8, 0x9f, 0xb5, 0x6a, 0x83, + 0xd3, 0xce, 0x93, 0x6b, 0x78, 0xe9, 0xc5, 0x28, 0x34, 0x2e, 0xe7, 0xfb, 0xe1, 0xdb, 0x0f, 0xd8, + 0x61, 0xe5, 0x66, 0x05, 0xc1, 0xad, 0xfc, 0xd1, 0xfe, 0x86, 0xbc, 0x87, 0xa6, 0xd2, 0x42, 0x27, + 0x2a, 0xf7, 0xf4, 0xe6, 0x2f, 0xe3, 0x69, 0x18, 0x9e, 0xb3, 0x17, 0x37, 0x60, 0x1d, 0x6a, 0x84, + 0x40, 0xd7, 0x9d, 0x0d, 0x67, 0x9f, 0xdd, 0xf9, 0x64, 0x7a, 0x37, 0xbc, 0x99, 0x8c, 0xac, 0x67, + 0x95, 0xbb, 0xdb, 0xf1, 0x74, 0x34, 0x99, 0x5e, 0x5b, 0x35, 0x62, 0xc1, 0xf3, 0xfc, 0x8e, 0x8f, + 0x87, 0xa3, 0x2f, 0x56, 0xfd, 0x6a, 0x0a, 0x95, 0x7d, 0xbd, 0x7a, 0x51, 0x46, 0xbe, 0x4d, 0x33, + 0xf8, 0xca, 0xfc, 0x40, 0xff, 0x48, 0x16, 0xd4, 0x93, 0x21, 0xf3, 0xe5, 0x3d, 0x3e, 0xb0, 0x6c, + 0x71, 0xd5, 0xf2, 0x81, 0xf9, 0x32, 0xdb, 0x42, 0xc5, 0xca, 0x65, 0x5e, 0x34, 0xcd, 0xd5, 0xe5, + 0xef, 0x00, 0x00, 0x00, 0xff, 0xff, 0xb8, 0xd5, 0xf0, 0x13, 0x23, 0x04, 0x00, 0x00, } diff --git a/sdk/go/protos/feast/core/Store.pb.go b/sdk/go/protos/feast/core/Store.pb.go index 5dafb63d942..9120edcb42c 100644 --- a/sdk/go/protos/feast/core/Store.pb.go +++ b/sdk/go/protos/feast/core/Store.pb.go @@ -391,15 +391,27 @@ func (m *Store_CassandraConfig) GetPort() int32 { } type Store_Subscription struct { - // Name of featureSet to subscribe to. This field supports any valid basic POSIX regex, - // e.g. customer_.* or .* - // https://www.regular-expressions.info/posix.html + // Name of project that the feature sets belongs to. This can be one of + // - [project_name] + // - * + // If an asterisk is provided, filtering on projects will be disabled. All projects will + // be matched. It is NOT possible to provide an asterisk with a string in order to do + // pattern matching. + Project string `protobuf:"bytes,3,opt,name=project,proto3" json:"project,omitempty"` + // Name of the desired feature set. Asterisks can be used as wildcards in the name. + // Matching on names is only permitted if a specific project is defined. It is disallowed + // If the project name is set to "*" + // e.g. + // - * can be used to match all feature sets + // - my-feature-set* can be used to match all features prefixed by "my-feature-set" + // - my-feature-set-6 can be used to select a single feature set Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` - // Versions of the given featureSet that will be ingested into this store. + // Versions of the given feature sets that will be returned. // Valid options for version: - // latest: only subscribe to latest version of feature set - // [version number]: pin to a specific version - // >[version number]: subscribe to all versions larger than or equal to [version number] + // "latest": only the latest version is returned. + // "*": Subscribe to all versions + // [version number]: pin to a specific version. Project and feature set name must be + // explicitly defined if a specific version is pinned. Version string `protobuf:"bytes,2,opt,name=version,proto3" json:"version,omitempty"` XXX_NoUnkeyedLiteral struct{} `json:"-"` XXX_unrecognized []byte `json:"-"` @@ -431,6 +443,13 @@ func (m *Store_Subscription) XXX_DiscardUnknown() { var xxx_messageInfo_Store_Subscription proto.InternalMessageInfo +func (m *Store_Subscription) GetProject() string { + if m != nil { + return m.Project + } + return "" +} + func (m *Store_Subscription) GetName() string { if m != nil { return m.Name @@ -457,33 +476,34 @@ func init() { func init() { proto.RegisterFile("feast/core/Store.proto", fileDescriptor_4b177bc9ccf64875) } var fileDescriptor_4b177bc9ccf64875 = []byte{ - // 442 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x93, 0xcf, 0x6f, 0xd3, 0x30, - 0x14, 0xc7, 0x97, 0xae, 0xdd, 0x96, 0x97, 0xfe, 0x88, 0x7c, 0x40, 0x51, 0xd1, 0x50, 0xd8, 0xa9, - 0xa7, 0x58, 0x2a, 0xe2, 0x80, 0xc4, 0x81, 0xa6, 0x9d, 0x20, 0x02, 0x55, 0xcc, 0x05, 0x24, 0xb8, - 0x4c, 0xf9, 0xe1, 0x65, 0xde, 0xb4, 0x38, 0xd8, 0x2e, 0x52, 0xff, 0x3a, 0xfe, 0x35, 0x64, 0x27, - 0x69, 0x53, 0xda, 0xc3, 0x2e, 0x91, 0xfd, 0x7d, 0xdf, 0xf7, 0xc9, 0xd3, 0xf3, 0x7b, 0xf0, 0xe2, - 0x8e, 0xc6, 0x52, 0xe1, 0x94, 0x0b, 0x8a, 0x57, 0x8a, 0x0b, 0x1a, 0x94, 0x82, 0x2b, 0x8e, 0xc0, - 0xe8, 0x81, 0xd6, 0xaf, 0xfe, 0xf6, 0xa0, 0x67, 0x62, 0x08, 0x41, 0xb7, 0x88, 0x9f, 0xa8, 0x67, - 0xf9, 0xd6, 0xc4, 0x26, 0xe6, 0x8c, 0x30, 0x74, 0xd5, 0xa6, 0xa4, 0x5e, 0xc7, 0xb7, 0x26, 0xc3, - 0xe9, 0xcb, 0x60, 0x97, 0x18, 0x54, 0x40, 0xf3, 0xfd, 0xb6, 0x29, 0x29, 0x31, 0x46, 0xb4, 0x80, - 0x81, 0x5c, 0x27, 0x32, 0x15, 0xac, 0x54, 0x8c, 0x17, 0xd2, 0xeb, 0xfa, 0xa7, 0x13, 0x67, 0xfa, - 0xea, 0x48, 0x66, 0xcb, 0x46, 0xf6, 0x93, 0x50, 0x08, 0x7d, 0x41, 0x33, 0x26, 0x6f, 0x53, 0x5e, - 0xdc, 0xb1, 0xdc, 0x73, 0x7c, 0x6b, 0xe2, 0x4c, 0x2f, 0x0f, 0x21, 0x44, 0xbb, 0xe6, 0xc6, 0xf4, - 0xe9, 0x84, 0x38, 0x62, 0x77, 0x45, 0x9f, 0x61, 0x94, 0xb0, 0xfc, 0xf7, 0x9a, 0x8a, 0x4d, 0x83, - 0xe9, 0x1b, 0x8c, 0x7f, 0x88, 0x09, 0x59, 0x7e, 0xa3, 0x8d, 0x5b, 0xd2, 0xb0, 0x49, 0xad, 0x61, - 0x4b, 0x70, 0xd3, 0x58, 0xca, 0xb8, 0xc8, 0x44, 0xdc, 0xd0, 0x06, 0x86, 0xf6, 0xfa, 0x90, 0x36, - 0x6f, 0x9c, 0x5b, 0xdc, 0x28, 0xdd, 0x97, 0xc6, 0x6f, 0xc1, 0x69, 0x95, 0xae, 0x5b, 0x7f, 0xcf, - 0xa5, 0x6a, 0x5a, 0xaf, 0xcf, 0x5a, 0x2b, 0xb9, 0x50, 0xa6, 0xf5, 0x3d, 0x62, 0xce, 0xe3, 0x25, - 0x0c, 0xf7, 0x4b, 0x45, 0x97, 0x00, 0xa5, 0xe0, 0x0f, 0x34, 0x55, 0xb7, 0x2c, 0xab, 0xf3, 0xed, - 0x5a, 0x89, 0x32, 0x1d, 0xce, 0x62, 0x15, 0x4b, 0x6a, 0xc2, 0x9d, 0x2a, 0x5c, 0x2b, 0x51, 0x36, - 0x7e, 0x07, 0xa3, 0xff, 0x8a, 0x7d, 0x76, 0x29, 0xef, 0xa1, 0xdf, 0x7e, 0xc1, 0xa3, 0xd3, 0xe3, - 0xc1, 0xf9, 0x1f, 0x2a, 0x24, 0xe3, 0x45, 0xfd, 0xeb, 0xe6, 0x7a, 0xf5, 0x01, 0xec, 0xed, 0xe4, - 0x20, 0x07, 0xce, 0xa3, 0xe5, 0x8f, 0xd9, 0x97, 0x68, 0xe1, 0x9e, 0x20, 0x1b, 0x7a, 0xe4, 0x7a, - 0x11, 0xad, 0x5c, 0x0b, 0xf5, 0xe1, 0x22, 0x8c, 0x3e, 0xde, 0x7c, 0xbf, 0x26, 0x3f, 0xdd, 0x0e, - 0x1a, 0x80, 0x3d, 0x9f, 0xad, 0x56, 0xb3, 0xe5, 0x82, 0xcc, 0xdc, 0xd3, 0xf0, 0x02, 0xce, 0xaa, - 0x77, 0x08, 0x23, 0x68, 0xcd, 0x73, 0x08, 0x86, 0xfb, 0x55, 0xcf, 0xf9, 0x2f, 0x9c, 0x33, 0x75, - 0xbf, 0x4e, 0x82, 0x94, 0x3f, 0xe1, 0x9c, 0x3f, 0xd0, 0x47, 0x5c, 0x2d, 0x84, 0xcc, 0x1e, 0x71, - 0xce, 0xb1, 0x59, 0x06, 0x89, 0x77, 0x4b, 0x92, 0x9c, 0x19, 0xe9, 0xcd, 0xbf, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xdc, 0xaf, 0xad, 0x8c, 0x39, 0x03, 0x00, 0x00, + // 450 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x93, 0x4f, 0x6f, 0xd3, 0x30, + 0x18, 0xc6, 0x97, 0xfe, 0x59, 0x97, 0x37, 0xfd, 0x13, 0xf9, 0x80, 0xa2, 0xa2, 0xa1, 0xb0, 0x53, + 0x4f, 0xb1, 0x54, 0xc4, 0x81, 0x1b, 0x4d, 0x3b, 0x41, 0x04, 0xaa, 0x98, 0x0b, 0x93, 0xe0, 0x32, + 0xa5, 0x89, 0x97, 0x79, 0xd3, 0xe2, 0x60, 0xbb, 0x48, 0xfd, 0xa8, 0x7c, 0x1b, 0x64, 0x27, 0x69, + 0x53, 0xda, 0xc3, 0x2e, 0x91, 0xfd, 0xbc, 0xcf, 0xf3, 0xcb, 0x2b, 0xdb, 0x2f, 0xbc, 0xba, 0xa7, + 0xb1, 0x54, 0x38, 0xe1, 0x82, 0xe2, 0x95, 0xe2, 0x82, 0x06, 0x85, 0xe0, 0x8a, 0x23, 0x30, 0x7a, + 0xa0, 0xf5, 0xab, 0xbf, 0x5d, 0xe8, 0x9a, 0x1a, 0x42, 0xd0, 0xc9, 0xe3, 0x67, 0xea, 0x59, 0xbe, + 0x35, 0xb1, 0x89, 0x59, 0x23, 0x0c, 0x1d, 0xb5, 0x2d, 0xa8, 0xd7, 0xf2, 0xad, 0xc9, 0x70, 0xfa, + 0x3a, 0xd8, 0x07, 0x83, 0x12, 0x68, 0xbe, 0xdf, 0xb7, 0x05, 0x25, 0xc6, 0x88, 0x16, 0x30, 0x90, + 0x9b, 0xb5, 0x4c, 0x04, 0x2b, 0x14, 0xe3, 0xb9, 0xf4, 0x3a, 0x7e, 0x7b, 0xe2, 0x4c, 0xdf, 0x9c, + 0x48, 0x36, 0x6c, 0xe4, 0x30, 0x84, 0x42, 0xe8, 0x0b, 0x9a, 0x32, 0x79, 0x97, 0xf0, 0xfc, 0x9e, + 0x65, 0x9e, 0xe3, 0x5b, 0x13, 0x67, 0x7a, 0x79, 0x0c, 0x21, 0xda, 0x35, 0x37, 0xa6, 0xcf, 0x67, + 0xc4, 0x11, 0xfb, 0x2d, 0xfa, 0x02, 0xa3, 0x35, 0xcb, 0x7e, 0x6f, 0xa8, 0xd8, 0xd6, 0x98, 0xbe, + 0xc1, 0xf8, 0xc7, 0x98, 0x90, 0x65, 0x37, 0xda, 0xb8, 0x23, 0x0d, 0xeb, 0x68, 0x05, 0x5b, 0x82, + 0x9b, 0xc4, 0x52, 0xc6, 0x79, 0x2a, 0xe2, 0x9a, 0x36, 0x30, 0xb4, 0xb7, 0xc7, 0xb4, 0x79, 0xed, + 0xdc, 0xe1, 0x46, 0xc9, 0xa1, 0x34, 0x7e, 0x0f, 0x4e, 0xa3, 0x75, 0x7d, 0xf4, 0x0f, 0x5c, 0xaa, + 0xfa, 0xe8, 0xf5, 0x5a, 0x6b, 0x05, 0x17, 0xca, 0x1c, 0x7d, 0x97, 0x98, 0xf5, 0x78, 0x09, 0xc3, + 0xc3, 0x56, 0xd1, 0x25, 0x40, 0x21, 0xf8, 0x23, 0x4d, 0xd4, 0x1d, 0x4b, 0xab, 0xbc, 0x5d, 0x29, + 0x51, 0xaa, 0xcb, 0x69, 0xac, 0x62, 0x49, 0x4d, 0xb9, 0x55, 0x96, 0x2b, 0x25, 0x4a, 0xc7, 0x1f, + 0x60, 0xf4, 0x5f, 0xb3, 0x2f, 0x6e, 0xe5, 0x16, 0xfa, 0xcd, 0x1b, 0x44, 0x1e, 0xf4, 0xaa, 0xdf, + 0x7a, 0x6d, 0x13, 0xad, 0xb7, 0x27, 0xdf, 0x95, 0x07, 0xbd, 0x3f, 0x54, 0x48, 0xc6, 0xf3, 0xaa, + 0xa9, 0x7a, 0x7b, 0xf5, 0x11, 0xec, 0xdd, 0x9b, 0x42, 0x0e, 0xf4, 0xa2, 0xe5, 0xed, 0xec, 0x6b, + 0xb4, 0x70, 0xcf, 0x90, 0x0d, 0x5d, 0x72, 0xbd, 0x88, 0x56, 0xae, 0x85, 0xfa, 0x70, 0x11, 0x46, + 0x9f, 0x6e, 0x7e, 0x5c, 0x93, 0x9f, 0x6e, 0x0b, 0x0d, 0xc0, 0x9e, 0xcf, 0x56, 0xab, 0xd9, 0x72, + 0x41, 0x66, 0x6e, 0x3b, 0xbc, 0x80, 0xf3, 0xf2, 0x86, 0xc2, 0x08, 0x1a, 0x2f, 0x3d, 0x04, 0xc3, + 0xfd, 0xa6, 0x27, 0xe0, 0x17, 0xce, 0x98, 0x7a, 0xd8, 0xac, 0x83, 0x84, 0x3f, 0xe3, 0x8c, 0x3f, + 0xd2, 0x27, 0x5c, 0x8e, 0x8a, 0x4c, 0x9f, 0x70, 0xc6, 0xb1, 0x19, 0x13, 0x89, 0xf7, 0xe3, 0xb3, + 0x3e, 0x37, 0xd2, 0xbb, 0x7f, 0x01, 0x00, 0x00, 0xff, 0xff, 0x56, 0xfe, 0x58, 0x14, 0x53, 0x03, + 0x00, 0x00, } diff --git a/sdk/go/protos/feast/serving/ServingService.pb.go b/sdk/go/protos/feast/serving/ServingService.pb.go index 49c730ad3ca..212e8606ce7 100644 --- a/sdk/go/protos/feast/serving/ServingService.pb.go +++ b/sdk/go/protos/feast/serving/ServingService.pb.go @@ -231,13 +231,13 @@ func (m *GetFeastServingInfoResponse) GetJobStagingLocation() string { return "" } -type FeatureSetRequest struct { - // Feature set name - Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` - // Feature set version - Version int32 `protobuf:"varint,2,opt,name=version,proto3" json:"version,omitempty"` - // Features that should be retrieved from this feature set - FeatureNames []string `protobuf:"bytes,3,rep,name=feature_names,json=featureNames,proto3" json:"feature_names,omitempty"` +type FeatureReference struct { + // Project name + Project string `protobuf:"bytes,1,opt,name=project,proto3" json:"project,omitempty"` + // Feature name + Name string `protobuf:"bytes,2,opt,name=name,proto3" json:"name,omitempty"` + // Feature version + Version int32 `protobuf:"varint,3,opt,name=version,proto3" json:"version,omitempty"` // The features will be retrieved if: // entity_timestamp - max_age <= event_timestamp <= entity_timestamp // @@ -249,53 +249,53 @@ type FeatureSetRequest struct { XXX_sizecache int32 `json:"-"` } -func (m *FeatureSetRequest) Reset() { *m = FeatureSetRequest{} } -func (m *FeatureSetRequest) String() string { return proto.CompactTextString(m) } -func (*FeatureSetRequest) ProtoMessage() {} -func (*FeatureSetRequest) Descriptor() ([]byte, []int) { +func (m *FeatureReference) Reset() { *m = FeatureReference{} } +func (m *FeatureReference) String() string { return proto.CompactTextString(m) } +func (*FeatureReference) ProtoMessage() {} +func (*FeatureReference) Descriptor() ([]byte, []int) { return fileDescriptor_0c1ba93cf29a8d9d, []int{2} } -func (m *FeatureSetRequest) XXX_Unmarshal(b []byte) error { - return xxx_messageInfo_FeatureSetRequest.Unmarshal(m, b) +func (m *FeatureReference) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_FeatureReference.Unmarshal(m, b) } -func (m *FeatureSetRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { - return xxx_messageInfo_FeatureSetRequest.Marshal(b, m, deterministic) +func (m *FeatureReference) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_FeatureReference.Marshal(b, m, deterministic) } -func (m *FeatureSetRequest) XXX_Merge(src proto.Message) { - xxx_messageInfo_FeatureSetRequest.Merge(m, src) +func (m *FeatureReference) XXX_Merge(src proto.Message) { + xxx_messageInfo_FeatureReference.Merge(m, src) } -func (m *FeatureSetRequest) XXX_Size() int { - return xxx_messageInfo_FeatureSetRequest.Size(m) +func (m *FeatureReference) XXX_Size() int { + return xxx_messageInfo_FeatureReference.Size(m) } -func (m *FeatureSetRequest) XXX_DiscardUnknown() { - xxx_messageInfo_FeatureSetRequest.DiscardUnknown(m) +func (m *FeatureReference) XXX_DiscardUnknown() { + xxx_messageInfo_FeatureReference.DiscardUnknown(m) } -var xxx_messageInfo_FeatureSetRequest proto.InternalMessageInfo +var xxx_messageInfo_FeatureReference proto.InternalMessageInfo -func (m *FeatureSetRequest) GetName() string { +func (m *FeatureReference) GetProject() string { if m != nil { - return m.Name + return m.Project } return "" } -func (m *FeatureSetRequest) GetVersion() int32 { +func (m *FeatureReference) GetName() string { if m != nil { - return m.Version + return m.Name } - return 0 + return "" } -func (m *FeatureSetRequest) GetFeatureNames() []string { +func (m *FeatureReference) GetVersion() int32 { if m != nil { - return m.FeatureNames + return m.Version } - return nil + return 0 } -func (m *FeatureSetRequest) GetMaxAge() *duration.Duration { +func (m *FeatureReference) GetMaxAge() *duration.Duration { if m != nil { return m.MaxAge } @@ -303,8 +303,8 @@ func (m *FeatureSetRequest) GetMaxAge() *duration.Duration { } type GetOnlineFeaturesRequest struct { - // List of feature sets and their features that are being retrieved - FeatureSets []*FeatureSetRequest `protobuf:"bytes,1,rep,name=feature_sets,json=featureSets,proto3" json:"feature_sets,omitempty"` + // List of features that are being retrieved + Features []*FeatureReference `protobuf:"bytes,4,rep,name=features,proto3" json:"features,omitempty"` // List of entity rows, containing entity id and timestamp data. // Used during retrieval of feature rows and for joining feature // rows into a final dataset @@ -342,9 +342,9 @@ func (m *GetOnlineFeaturesRequest) XXX_DiscardUnknown() { var xxx_messageInfo_GetOnlineFeaturesRequest proto.InternalMessageInfo -func (m *GetOnlineFeaturesRequest) GetFeatureSets() []*FeatureSetRequest { +func (m *GetOnlineFeaturesRequest) GetFeatures() []*FeatureReference { if m != nil { - return m.FeatureSets + return m.Features } return nil } @@ -414,8 +414,8 @@ func (m *GetOnlineFeaturesRequest_EntityRow) GetFields() map[string]*types.Value } type GetBatchFeaturesRequest struct { - // List of feature sets and their features that are being retrieved. - FeatureSets []*FeatureSetRequest `protobuf:"bytes,1,rep,name=feature_sets,json=featureSets,proto3" json:"feature_sets,omitempty"` + // List of features that are being retrieved + Features []*FeatureReference `protobuf:"bytes,3,rep,name=features,proto3" json:"features,omitempty"` // Source of the entity dataset containing the timestamps and entity keys to retrieve // features for. DatasetSource *DatasetSource `protobuf:"bytes,2,opt,name=dataset_source,json=datasetSource,proto3" json:"dataset_source,omitempty"` @@ -449,9 +449,9 @@ func (m *GetBatchFeaturesRequest) XXX_DiscardUnknown() { var xxx_messageInfo_GetBatchFeaturesRequest proto.InternalMessageInfo -func (m *GetBatchFeaturesRequest) GetFeatureSets() []*FeatureSetRequest { +func (m *GetBatchFeaturesRequest) GetFeatures() []*FeatureReference { if m != nil { - return m.FeatureSets + return m.Features } return nil } @@ -870,7 +870,7 @@ func init() { proto.RegisterEnum("feast.serving.DataFormat", DataFormat_name, DataFormat_value) proto.RegisterType((*GetFeastServingInfoRequest)(nil), "feast.serving.GetFeastServingInfoRequest") proto.RegisterType((*GetFeastServingInfoResponse)(nil), "feast.serving.GetFeastServingInfoResponse") - proto.RegisterType((*FeatureSetRequest)(nil), "feast.serving.FeatureSetRequest") + proto.RegisterType((*FeatureReference)(nil), "feast.serving.FeatureReference") proto.RegisterType((*GetOnlineFeaturesRequest)(nil), "feast.serving.GetOnlineFeaturesRequest") proto.RegisterType((*GetOnlineFeaturesRequest_EntityRow)(nil), "feast.serving.GetOnlineFeaturesRequest.EntityRow") proto.RegisterMapType((map[string]*types.Value)(nil), "feast.serving.GetOnlineFeaturesRequest.EntityRow.FieldsEntry") @@ -889,77 +889,76 @@ func init() { func init() { proto.RegisterFile("feast/serving/ServingService.proto", fileDescriptor_0c1ba93cf29a8d9d) } var fileDescriptor_0c1ba93cf29a8d9d = []byte{ - // 1105 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x56, 0x4f, 0x73, 0xda, 0x46, - 0x14, 0x8f, 0xc0, 0xc6, 0xe1, 0x11, 0x63, 0x79, 0xed, 0xda, 0xb2, 0xe2, 0x24, 0x0c, 0xed, 0xd4, - 0x94, 0x83, 0x68, 0x49, 0x9b, 0x69, 0xd3, 0xe9, 0x4c, 0xc0, 0x08, 0x82, 0xc7, 0x11, 0x9e, 0x05, - 0x3b, 0x6d, 0x2f, 0x1a, 0x01, 0x0b, 0x96, 0x0d, 0x5a, 0x57, 0xbb, 0x38, 0xf1, 0xd7, 0xe8, 0xb1, - 0x87, 0x5e, 0x7a, 0xee, 0xb5, 0x9f, 0xa4, 0x9f, 0xa0, 0xb7, 0x7e, 0x83, 0x1e, 0x3b, 0x5a, 0xad, - 0x30, 0xff, 0x9c, 0xd8, 0x9d, 0x69, 0x4e, 0xda, 0x7d, 0xef, 0xf7, 0xfe, 0xee, 0x7b, 0x4f, 0x0f, - 0xb2, 0x3d, 0xe2, 0x30, 0x5e, 0x60, 0xc4, 0xbf, 0x74, 0xbd, 0x7e, 0xa1, 0x19, 0x7e, 0xc5, 0xa7, - 0x43, 0x8c, 0x0b, 0x9f, 0x72, 0x8a, 0x56, 0x05, 0xc6, 0x90, 0x18, 0xfd, 0x49, 0x9f, 0xd2, 0xfe, - 0x80, 0x14, 0x04, 0xb3, 0x3d, 0xea, 0x15, 0xb8, 0x3b, 0x24, 0x8c, 0x3b, 0xc3, 0x8b, 0x10, 0xaf, - 0x3f, 0x9e, 0x05, 0x74, 0x47, 0xbe, 0xc3, 0x5d, 0xea, 0x49, 0xfe, 0x76, 0x68, 0x93, 0x5f, 0x5d, - 0x10, 0x56, 0x38, 0x71, 0x06, 0x23, 0x69, 0x28, 0xbb, 0x0b, 0x7a, 0x8d, 0xf0, 0x6a, 0xc0, 0x95, - 0x8e, 0xd4, 0xbd, 0x1e, 0xc5, 0xe4, 0xa7, 0x11, 0x61, 0x3c, 0xfb, 0xab, 0x02, 0x0f, 0x17, 0xb2, - 0xd9, 0x05, 0xf5, 0x18, 0x41, 0x1a, 0xac, 0x5c, 0x12, 0x9f, 0xb9, 0xd4, 0xd3, 0x94, 0x8c, 0x92, - 0x4b, 0xe2, 0xe8, 0x8a, 0x9e, 0xc2, 0x52, 0x60, 0x4c, 0x8b, 0x65, 0x94, 0x5c, 0xba, 0xf8, 0xc4, - 0x98, 0x8a, 0xc7, 0x98, 0x54, 0xd8, 0xba, 0xba, 0x20, 0x58, 0x80, 0xd1, 0xe7, 0xb0, 0x79, 0x46, - 0xdb, 0x36, 0xe3, 0x4e, 0xdf, 0xf5, 0xfa, 0xf6, 0x80, 0x76, 0x44, 0x0c, 0x1a, 0x08, 0xdd, 0xe8, - 0x8c, 0xb6, 0x9b, 0x21, 0xeb, 0x50, 0x72, 0xb2, 0xbf, 0x28, 0xb0, 0x5e, 0x25, 0x0e, 0x1f, 0xf9, - 0xa4, 0x49, 0xb8, 0x74, 0x1b, 0x21, 0x58, 0xf2, 0x9c, 0x21, 0x91, 0x3e, 0x89, 0xf3, 0xa4, 0xab, - 0x81, 0x4f, 0xcb, 0xd7, 0xae, 0x7e, 0x0c, 0x41, 0xb6, 0x03, 0x15, 0x76, 0x80, 0x64, 0x5a, 0x3c, - 0x13, 0xcf, 0x25, 0xf1, 0x03, 0x49, 0xb4, 0x02, 0x1a, 0x2a, 0xc2, 0xca, 0xd0, 0x79, 0x6b, 0x3b, - 0x7d, 0xa2, 0x2d, 0x65, 0x94, 0x5c, 0xaa, 0xb8, 0x63, 0x84, 0x29, 0x37, 0xa2, 0x94, 0x1b, 0x15, - 0x99, 0x72, 0x9c, 0x18, 0x3a, 0x6f, 0x4b, 0x7d, 0x92, 0xfd, 0x3b, 0x0e, 0x5a, 0x8d, 0xf0, 0x86, - 0x37, 0x70, 0x3d, 0x22, 0xbd, 0x64, 0x91, 0x8f, 0xfb, 0x10, 0x19, 0xb0, 0x19, 0xe1, 0x4c, 0x53, - 0x32, 0xf1, 0x5c, 0xaa, 0x98, 0x99, 0x4f, 0xd4, 0x74, 0x6c, 0x38, 0xd5, 0x1b, 0x93, 0x18, 0xc2, - 0x90, 0x22, 0x1e, 0x77, 0xf9, 0x95, 0xed, 0xd3, 0x37, 0x4c, 0x8b, 0x09, 0x1d, 0x5f, 0xcc, 0xe8, - 0xb8, 0xc9, 0x05, 0xc3, 0x14, 0xa2, 0x98, 0xbe, 0xc1, 0x40, 0xa2, 0x23, 0x43, 0xdf, 0xc0, 0x0e, - 0x1d, 0xba, 0xdc, 0x16, 0x24, 0x97, 0x30, 0xdb, 0xf5, 0x6c, 0x5f, 0x3e, 0xb8, 0x16, 0xcf, 0x28, - 0xb9, 0xfb, 0x78, 0x2b, 0x00, 0x98, 0x92, 0x5f, 0xf7, 0xa2, 0x72, 0xd0, 0xff, 0x51, 0x20, 0x39, - 0x56, 0x8a, 0x4c, 0x50, 0xa5, 0x73, 0xe3, 0x6a, 0x15, 0x2f, 0x92, 0x2a, 0xea, 0x73, 0xb9, 0x6b, - 0x45, 0x08, 0xbc, 0x16, 0xca, 0x8c, 0x09, 0xe8, 0x18, 0x12, 0x3d, 0x97, 0x0c, 0xba, 0x51, 0x78, - 0xdf, 0xdd, 0x39, 0x3c, 0xa3, 0x2a, 0xe4, 0x4d, 0x8f, 0xfb, 0x57, 0x58, 0x2a, 0xd3, 0x5f, 0x41, - 0x6a, 0x82, 0x8c, 0x54, 0x88, 0x9f, 0x93, 0x2b, 0x59, 0x31, 0xc1, 0x11, 0xe5, 0x60, 0xf9, 0x32, - 0x68, 0x14, 0x51, 0x2e, 0xa9, 0x22, 0x92, 0x66, 0x45, 0x0b, 0x19, 0xa2, 0x85, 0x70, 0x08, 0x78, - 0x1e, 0xfb, 0x5a, 0xc9, 0xfe, 0xa6, 0xc0, 0x76, 0x8d, 0xf0, 0xb2, 0xc3, 0x3b, 0xa7, 0xff, 0xcb, - 0x53, 0xef, 0x43, 0xba, 0xeb, 0x70, 0x87, 0x11, 0x6e, 0x33, 0x3a, 0xf2, 0x3b, 0x91, 0x5f, 0xbb, - 0x33, 0x6a, 0x2a, 0x21, 0xa8, 0x29, 0x30, 0x78, 0xb5, 0x3b, 0x79, 0xcd, 0xfe, 0x1e, 0x83, 0x9d, - 0x05, 0xf9, 0x92, 0xdd, 0xfc, 0x1a, 0x1e, 0x88, 0xe4, 0xd8, 0x22, 0xac, 0xc8, 0xcf, 0x2f, 0xdf, - 0x9f, 0xef, 0x50, 0x3e, 0x4c, 0xb3, 0xc8, 0x0c, 0xc3, 0xa9, 0xde, 0xf5, 0x45, 0xff, 0x43, 0x91, - 0xc9, 0x0e, 0xef, 0xe8, 0xfb, 0xf1, 0x93, 0x86, 0x26, 0x5e, 0xfc, 0x17, 0x13, 0x1f, 0xe2, 0x55, - 0x5f, 0x88, 0x06, 0x9e, 0x79, 0x54, 0x99, 0xad, 0x4f, 0x20, 0x7e, 0x46, 0xdb, 0xb2, 0xa2, 0xd1, - 0x4c, 0x04, 0x07, 0xb4, 0x8d, 0x03, 0x76, 0xf6, 0x2b, 0x58, 0xad, 0x11, 0x1e, 0x5c, 0x65, 0x31, - 0xdc, 0x4e, 0xec, 0x19, 0xa4, 0x23, 0xb1, 0x3b, 0x99, 0xfb, 0x4b, 0x81, 0xf8, 0x01, 0x6d, 0xa3, - 0x34, 0xc4, 0xdc, 0xae, 0x8c, 0x3b, 0xe6, 0x76, 0x51, 0x7e, 0x6a, 0x1c, 0x6f, 0xcd, 0x8b, 0x4f, - 0x4d, 0xe1, 0x04, 0xe3, 0x0e, 0x1f, 0x31, 0xd1, 0xed, 0xe9, 0xa2, 0x36, 0x8f, 0x6e, 0x0a, 0x3e, - 0x96, 0x38, 0xb4, 0x09, 0xcb, 0xc4, 0xf7, 0xa9, 0x2f, 0x46, 0x63, 0x12, 0x87, 0x17, 0xf4, 0x10, - 0x92, 0x3d, 0x77, 0x40, 0xec, 0x91, 0xef, 0x32, 0x6d, 0x59, 0xcc, 0xd4, 0xfb, 0x01, 0xe1, 0xd8, - 0x77, 0x19, 0x7a, 0x0e, 0xa9, 0xa0, 0x34, 0xed, 0x1e, 0xf5, 0x87, 0x0e, 0xd7, 0x12, 0xc2, 0xd2, - 0xce, 0x82, 0x5a, 0xae, 0x0a, 0x00, 0x86, 0xee, 0xf8, 0x9c, 0xfd, 0x53, 0x81, 0xd5, 0xa9, 0x32, - 0x47, 0x07, 0x90, 0x12, 0xa6, 0x64, 0x67, 0x84, 0x49, 0xda, 0x7b, 0x57, 0x67, 0x18, 0x55, 0x77, - 0x40, 0xc2, 0xe3, 0xcb, 0x7b, 0x18, 0x7a, 0xe3, 0x9b, 0x4e, 0x00, 0xae, 0x79, 0xd3, 0x41, 0x28, - 0xef, 0x0e, 0x22, 0x76, 0x87, 0x20, 0xca, 0xea, 0x6c, 0x3f, 0xe7, 0x29, 0xa8, 0xb3, 0xff, 0x45, - 0xf4, 0x18, 0xf4, 0xaa, 0x59, 0x6a, 0xb6, 0xec, 0xa6, 0x89, 0x4f, 0xea, 0x56, 0xcd, 0x6e, 0xfd, - 0x70, 0x64, 0xda, 0x75, 0xeb, 0xa4, 0x74, 0x58, 0xaf, 0xa8, 0xf7, 0xd0, 0x23, 0xd8, 0x59, 0xc0, - 0x6f, 0x58, 0x87, 0x75, 0xcb, 0x54, 0x15, 0xb4, 0x0b, 0xda, 0x02, 0x76, 0xb9, 0xd4, 0xda, 0x7f, - 0xa9, 0xc6, 0xf2, 0xcf, 0x60, 0x45, 0xbe, 0x3c, 0xda, 0x04, 0xf5, 0xa0, 0x51, 0x9e, 0xd5, 0xfe, - 0x11, 0xac, 0x8f, 0xa9, 0x95, 0xc6, 0x6b, 0xeb, 0xb0, 0x51, 0xaa, 0xa8, 0x4a, 0xfe, 0x14, 0x92, - 0xe3, 0x1a, 0x40, 0x5b, 0x80, 0x02, 0x4c, 0xb3, 0x55, 0x6a, 0x1d, 0x37, 0x27, 0x64, 0xa7, 0xe9, - 0x47, 0xa6, 0x55, 0xa9, 0x5b, 0x35, 0x55, 0x99, 0xa1, 0xe3, 0x63, 0xcb, 0x0a, 0xe8, 0x31, 0xb4, - 0x01, 0x6b, 0x13, 0xf4, 0x4a, 0xc3, 0x32, 0xd5, 0x78, 0xfe, 0x5b, 0x80, 0xeb, 0xf4, 0xa1, 0x6d, - 0xd8, 0xa8, 0x94, 0x5a, 0x25, 0xbb, 0xda, 0xc0, 0xaf, 0x4a, 0xad, 0x09, 0x5b, 0x9b, 0xa0, 0x4e, - 0x32, 0x4a, 0x27, 0xb8, 0xa1, 0x2a, 0xc5, 0x9f, 0xe3, 0x90, 0x9e, 0x5e, 0xae, 0xd0, 0x00, 0x36, - 0x16, 0xac, 0x33, 0xe8, 0xb3, 0xf9, 0xf9, 0x73, 0xc3, 0x46, 0xa4, 0xe7, 0x6f, 0x03, 0x95, 0x2d, - 0xdb, 0x83, 0xf5, 0xb9, 0x49, 0x86, 0xf6, 0x6e, 0xf9, 0xfb, 0xd2, 0x73, 0xb7, 0x1d, 0x8a, 0xa8, - 0x03, 0xea, 0xec, 0x94, 0x42, 0x9f, 0xce, 0x4b, 0x2f, 0xfa, 0x37, 0xe9, 0x7b, 0xef, 0xc5, 0x49, - 0x23, 0x26, 0x24, 0xc2, 0x89, 0x84, 0x76, 0xe7, 0x45, 0xae, 0xe7, 0x9b, 0xfe, 0xe8, 0x06, 0x6e, - 0xa8, 0xa6, 0xdc, 0x82, 0xe9, 0xd5, 0xb6, 0xbc, 0x26, 0x33, 0x57, 0x3a, 0xaa, 0x1f, 0x05, 0xdb, - 0xc0, 0x8f, 0xc5, 0xbe, 0xcb, 0x4f, 0x47, 0x6d, 0xa3, 0x43, 0x87, 0x85, 0x3e, 0x3d, 0x23, 0xe7, - 0x05, 0xb9, 0x2f, 0x77, 0xcf, 0x0b, 0x7d, 0x1a, 0x6e, 0xb8, 0xac, 0x30, 0xb5, 0x43, 0xb7, 0x13, - 0x82, 0xfa, 0xf4, 0xdf, 0x00, 0x00, 0x00, 0xff, 0xff, 0xa8, 0xc6, 0xa7, 0xc9, 0x5b, 0x0b, 0x00, - 0x00, + // 1101 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xbc, 0x56, 0x51, 0x73, 0xda, 0xc6, + 0x13, 0x8f, 0xc0, 0xc6, 0x66, 0xf9, 0x1b, 0x2b, 0x67, 0xff, 0x6d, 0x59, 0x71, 0x12, 0x86, 0xe9, + 0xd4, 0x94, 0x07, 0xd1, 0x92, 0x36, 0xd3, 0x26, 0xd3, 0x99, 0x40, 0x10, 0x04, 0x8f, 0x03, 0x9e, + 0x03, 0x3b, 0x6d, 0x5f, 0x34, 0x02, 0x4e, 0x58, 0x36, 0xe8, 0xa8, 0xee, 0x70, 0xe2, 0x2f, 0xd1, + 0x87, 0xbe, 0x76, 0xa6, 0xdf, 0xa0, 0xaf, 0xfd, 0x24, 0xfd, 0x02, 0xed, 0xa7, 0xe8, 0x63, 0x47, + 0xa7, 0x03, 0x23, 0xc0, 0x8e, 0xdd, 0x87, 0x3e, 0xe9, 0x6e, 0xf7, 0xb7, 0xb7, 0xb7, 0xbf, 0xdb, + 0x5d, 0x2d, 0x64, 0x1d, 0x62, 0x33, 0x5e, 0x60, 0xc4, 0xbf, 0x74, 0xbd, 0x7e, 0xa1, 0x15, 0x7e, + 0xc5, 0xa7, 0x4b, 0x8c, 0x91, 0x4f, 0x39, 0x45, 0x1b, 0x02, 0x63, 0x48, 0x8c, 0xfe, 0xb4, 0x4f, + 0x69, 0x7f, 0x40, 0x0a, 0x42, 0xd9, 0x19, 0x3b, 0x05, 0xee, 0x0e, 0x09, 0xe3, 0xf6, 0x70, 0x14, + 0xe2, 0xf5, 0x27, 0xf3, 0x80, 0xde, 0xd8, 0xb7, 0xb9, 0x4b, 0x3d, 0xa9, 0xdf, 0x0d, 0x7d, 0xf2, + 0xab, 0x11, 0x61, 0x85, 0x53, 0x7b, 0x30, 0x96, 0x8e, 0xb2, 0xfb, 0xa0, 0xd7, 0x08, 0xaf, 0x06, + 0x5a, 0x79, 0x91, 0xba, 0xe7, 0x50, 0x4c, 0x7e, 0x1c, 0x13, 0xc6, 0xb3, 0xbf, 0x2a, 0xf0, 0x68, + 0xa9, 0x9a, 0x8d, 0xa8, 0xc7, 0x08, 0xd2, 0x60, 0xed, 0x92, 0xf8, 0xcc, 0xa5, 0x9e, 0xa6, 0x64, + 0x94, 0x5c, 0x12, 0x4f, 0xb6, 0xe8, 0x19, 0xac, 0x04, 0xce, 0xb4, 0x58, 0x46, 0xc9, 0xa5, 0x8b, + 0x4f, 0x8d, 0x48, 0x3c, 0xc6, 0xec, 0x81, 0xed, 0xab, 0x11, 0xc1, 0x02, 0x8c, 0x3e, 0x87, 0xed, + 0x73, 0xda, 0xb1, 0x18, 0xb7, 0xfb, 0xae, 0xd7, 0xb7, 0x06, 0xb4, 0x2b, 0x62, 0xd0, 0x40, 0x9c, + 0x8d, 0xce, 0x69, 0xa7, 0x15, 0xaa, 0x8e, 0xa4, 0x26, 0xfb, 0x93, 0x02, 0x6a, 0x95, 0xd8, 0x7c, + 0xec, 0x13, 0x4c, 0x1c, 0xe2, 0x13, 0xaf, 0x2b, 0x6e, 0x35, 0xf2, 0xe9, 0x39, 0xe9, 0xf2, 0xc9, + 0xad, 0xe4, 0x16, 0x21, 0x58, 0xf1, 0xec, 0x61, 0x78, 0xab, 0x24, 0x16, 0xeb, 0xd9, 0x18, 0xe2, + 0x19, 0x25, 0xb7, 0x7a, 0x1d, 0x43, 0x11, 0xd6, 0x86, 0xf6, 0x07, 0xcb, 0xee, 0x13, 0x6d, 0x25, + 0xa3, 0xe4, 0x52, 0xc5, 0x3d, 0x23, 0xa4, 0xd9, 0x98, 0xd0, 0x6c, 0x54, 0x24, 0xcd, 0x38, 0x31, + 0xb4, 0x3f, 0x94, 0xfa, 0x24, 0xfb, 0x67, 0x1c, 0xb4, 0x1a, 0xe1, 0x4d, 0x6f, 0xe0, 0x7a, 0x44, + 0xde, 0x8c, 0x49, 0x3a, 0xd1, 0x4b, 0x58, 0x77, 0xa4, 0x48, 0x5b, 0xc9, 0xc4, 0x73, 0xa9, 0x65, + 0xc4, 0x44, 0x62, 0xc1, 0x53, 0x03, 0x84, 0x21, 0x45, 0x3c, 0xee, 0xf2, 0x2b, 0xcb, 0xa7, 0xef, + 0x99, 0x16, 0x13, 0xf6, 0x5f, 0xcc, 0xd9, 0xdf, 0xe4, 0xda, 0x30, 0x85, 0x29, 0xa6, 0xef, 0x31, + 0x90, 0xc9, 0x92, 0xa1, 0x6f, 0x60, 0x8f, 0x0e, 0x5d, 0x6e, 0x09, 0x91, 0x4b, 0x98, 0xe5, 0x7a, + 0x96, 0x2f, 0x1f, 0x57, 0xb0, 0xb1, 0x8e, 0x77, 0x02, 0x80, 0x29, 0xf5, 0x75, 0x6f, 0xf2, 0xf4, + 0xfa, 0xdf, 0x0a, 0x24, 0xa7, 0x87, 0x22, 0x13, 0x54, 0x79, 0xb9, 0x69, 0x66, 0x0a, 0xee, 0x53, + 0x45, 0x7d, 0x81, 0xb3, 0xf6, 0x04, 0x81, 0x37, 0x43, 0x9b, 0xa9, 0x00, 0x9d, 0x40, 0xc2, 0x71, + 0xc9, 0xa0, 0x37, 0x09, 0xef, 0xdb, 0x7b, 0x87, 0x67, 0x54, 0x85, 0xbd, 0xe9, 0x71, 0xff, 0x0a, + 0xcb, 0xc3, 0xf4, 0xb7, 0x90, 0x9a, 0x11, 0x23, 0x15, 0xe2, 0x17, 0xe4, 0x4a, 0xe6, 0x46, 0xb0, + 0x44, 0x39, 0x58, 0xbd, 0x0c, 0x8a, 0x42, 0x24, 0x46, 0xaa, 0x88, 0xa4, 0x5b, 0x51, 0x2e, 0x86, + 0x28, 0x17, 0x1c, 0x02, 0x5e, 0xc4, 0xbe, 0x56, 0xb2, 0xbf, 0x28, 0xb0, 0x5b, 0x23, 0xbc, 0x6c, + 0xf3, 0xee, 0xd9, 0x6d, 0x4f, 0x1c, 0xbf, 0xef, 0x13, 0xbf, 0x86, 0x74, 0xcf, 0xe6, 0x36, 0x23, + 0xdc, 0x62, 0x74, 0xec, 0x77, 0x27, 0xf7, 0xd9, 0x9f, 0x3b, 0xa2, 0x12, 0x82, 0x5a, 0x02, 0x83, + 0x37, 0x7a, 0xb3, 0xdb, 0xec, 0x6f, 0x31, 0xd8, 0x5b, 0xc2, 0x93, 0xac, 0xd8, 0x77, 0xf0, 0x3f, + 0x41, 0x8a, 0x25, 0xc2, 0x61, 0x9a, 0x22, 0xee, 0xf8, 0xe5, 0xc7, 0x79, 0x0e, 0xed, 0x43, 0x7a, + 0x05, 0x23, 0x0c, 0xa7, 0x9c, 0xeb, 0x8d, 0xfe, 0xbb, 0x22, 0x49, 0x0e, 0xf7, 0xe8, 0xbb, 0xe9, + 0x53, 0x86, 0x2e, 0x5e, 0xfd, 0x1b, 0x17, 0xff, 0xc5, 0x6b, 0xbe, 0x12, 0x05, 0x3b, 0xf7, 0x98, + 0x92, 0xad, 0x4f, 0x20, 0x7e, 0x4e, 0x3b, 0x32, 0x93, 0xd1, 0x5c, 0x04, 0x87, 0xb4, 0x83, 0x03, + 0x75, 0xf6, 0x2b, 0xd8, 0xa8, 0x11, 0x1e, 0x6c, 0x65, 0x12, 0xdc, 0xcd, 0xec, 0x39, 0xa4, 0x27, + 0x66, 0xf7, 0x72, 0xf7, 0x97, 0x02, 0xf1, 0x43, 0xda, 0x41, 0x69, 0x88, 0xb9, 0x3d, 0x19, 0x77, + 0xcc, 0xed, 0xa1, 0x7c, 0xa4, 0xe5, 0xee, 0x2c, 0x9a, 0x47, 0x3a, 0x6d, 0x82, 0x71, 0x9b, 0x8f, + 0x99, 0xa8, 0xf2, 0x74, 0x51, 0x5b, 0x44, 0xb7, 0x84, 0x1e, 0x4b, 0x1c, 0xda, 0x86, 0x55, 0xe2, + 0xfb, 0xd4, 0x17, 0xad, 0x30, 0x89, 0xc3, 0x0d, 0x7a, 0x04, 0x49, 0xc7, 0x1d, 0x10, 0x6b, 0xec, + 0xbb, 0x4c, 0x5b, 0xcd, 0xc4, 0x73, 0x49, 0xbc, 0x1e, 0x08, 0x4e, 0x7c, 0x97, 0xa1, 0x17, 0x90, + 0x0a, 0x52, 0xd3, 0x72, 0xa8, 0x3f, 0xb4, 0xb9, 0x96, 0x10, 0x9e, 0xf6, 0x96, 0xe4, 0x72, 0x55, + 0x00, 0x30, 0xf4, 0xa6, 0xeb, 0xec, 0x1f, 0x0a, 0x6c, 0x44, 0xd2, 0x1c, 0x1d, 0x42, 0x4a, 0xb8, + 0x92, 0x95, 0x11, 0x92, 0x74, 0x70, 0x5b, 0x65, 0x18, 0x55, 0x77, 0x40, 0xc2, 0xe5, 0x9b, 0x07, + 0x18, 0x9c, 0xe9, 0x4e, 0x27, 0x00, 0xd7, 0xba, 0x68, 0x10, 0xca, 0xed, 0x41, 0xc4, 0xee, 0x11, + 0x44, 0x59, 0x9d, 0xaf, 0xe7, 0x3c, 0x15, 0xbf, 0xab, 0xc8, 0xbf, 0x0f, 0x3d, 0x01, 0xbd, 0x6a, + 0x96, 0x5a, 0x6d, 0xab, 0x65, 0xe2, 0xd3, 0x7a, 0xa3, 0x66, 0xb5, 0xbf, 0x3f, 0x36, 0xad, 0x7a, + 0xe3, 0xb4, 0x74, 0x54, 0xaf, 0xa8, 0x0f, 0xd0, 0x63, 0xd8, 0x5b, 0xa2, 0x6f, 0x36, 0x8e, 0xea, + 0x0d, 0x53, 0x55, 0xd0, 0x3e, 0x68, 0x4b, 0xd4, 0xe5, 0x52, 0xfb, 0xf5, 0x1b, 0x35, 0x96, 0x7f, + 0x0e, 0x6b, 0xf2, 0xe5, 0xd1, 0x36, 0xa8, 0x87, 0xcd, 0xf2, 0xfc, 0xe9, 0xff, 0x87, 0x87, 0x53, + 0x69, 0xa5, 0xf9, 0xae, 0x71, 0xd4, 0x2c, 0x55, 0x54, 0x25, 0x7f, 0x06, 0xc9, 0x69, 0x0e, 0xa0, + 0x1d, 0x40, 0x01, 0xa6, 0xd5, 0x2e, 0xb5, 0x4f, 0x5a, 0x33, 0xb6, 0x51, 0xf9, 0xb1, 0xd9, 0xa8, + 0xd4, 0x1b, 0x35, 0x55, 0x99, 0x93, 0xe3, 0x93, 0x46, 0x23, 0x90, 0xc7, 0xd0, 0x16, 0x6c, 0xce, + 0xc8, 0x2b, 0xcd, 0x86, 0xa9, 0xc6, 0xf3, 0x2f, 0x01, 0xae, 0xe9, 0x43, 0xbb, 0xb0, 0x55, 0x29, + 0xb5, 0x4b, 0x56, 0xb5, 0x89, 0xdf, 0x96, 0xda, 0x33, 0xbe, 0xb6, 0x41, 0x9d, 0x55, 0x94, 0x4e, + 0x71, 0x53, 0x55, 0x8a, 0x3f, 0xc7, 0x21, 0x1d, 0x1d, 0xa0, 0xd0, 0x00, 0xb6, 0x96, 0x8c, 0x2c, + 0xe8, 0xb3, 0xc5, 0xfe, 0x73, 0xc3, 0xd4, 0xa3, 0xe7, 0xef, 0x02, 0x95, 0x25, 0xeb, 0xc0, 0xc3, + 0x85, 0x4e, 0x86, 0x0e, 0xee, 0xf8, 0xdb, 0xd2, 0x73, 0x77, 0x6d, 0x8a, 0xa8, 0x0b, 0xea, 0x7c, + 0x97, 0x42, 0x9f, 0x2e, 0x5a, 0x2f, 0xfb, 0x27, 0xe9, 0x07, 0x1f, 0xc5, 0x49, 0x27, 0x26, 0x24, + 0xc2, 0x8e, 0x84, 0xf6, 0x17, 0x4d, 0xae, 0xfb, 0x9b, 0xfe, 0xf8, 0x06, 0x6d, 0x78, 0x4c, 0xb9, + 0x0d, 0xd1, 0xf1, 0xb5, 0xbc, 0x29, 0x99, 0x2b, 0x1d, 0xd7, 0x8f, 0x83, 0x29, 0xe0, 0x87, 0x62, + 0xdf, 0xe5, 0x67, 0xe3, 0x8e, 0xd1, 0xa5, 0xc3, 0x42, 0x9f, 0x9e, 0x93, 0x8b, 0x82, 0x9c, 0x89, + 0x7b, 0x17, 0x85, 0x3e, 0x0d, 0xa7, 0x58, 0x56, 0x88, 0xcc, 0xc9, 0x9d, 0x84, 0x90, 0x3e, 0xfb, + 0x27, 0x00, 0x00, 0xff, 0xff, 0x36, 0xb8, 0x08, 0x14, 0x3f, 0x0b, 0x00, 0x00, } // Reference imports to suppress errors if they are not otherwise used. diff --git a/sdk/go/request.go b/sdk/go/request.go index 5ffacea2e84..9e97dffb72f 100644 --- a/sdk/go/request.go +++ b/sdk/go/request.go @@ -8,23 +8,30 @@ import ( ) var ( - ErrInvalidFeatureName = "invalid feature name %s provided, feature names must be in the format featureSet:version:featureName" + ErrInvalidFeatureName = "invalid feature ids %s provided, feature names must be in the format /:" ) // OnlineFeaturesRequest wrapper on feast.serving.GetOnlineFeaturesRequest. type OnlineFeaturesRequest struct { - - // Features is the list of features to obtain from Feast. Each feature must be given by its fully qualified ID, - // in the format featureSet:version:featureName. - Features []string + // Features is the list of features to obtain from Feast. Each feature can be given as + // + // : + // / + // /: + // The only required components are the feature name and project. + Features []string // Entities is the list of entity rows to retrieve features on. Each row is a map of entity name to entity value. - Entities []Row + Entities []Row + + // Project is the default project to use when looking up features. This is only used when a project is not found + // within the feature id. + Project string } // Builds the feast-specified request payload from the wrapper. func (r OnlineFeaturesRequest) buildRequest() (*serving.GetOnlineFeaturesRequest, error) { - featureSets, err := buildFeatureSets(r.Features) + features, err := buildFeatures(r.Features, r.Project) if err != nil { return nil, err } @@ -33,55 +40,62 @@ func (r OnlineFeaturesRequest) buildRequest() (*serving.GetOnlineFeaturesRequest for i := range r.Entities { entityRows[i] = &serving.GetOnlineFeaturesRequest_EntityRow{ - Fields: r.Entities[i], + Fields: r.Entities[i], } } return &serving.GetOnlineFeaturesRequest{ - FeatureSets: featureSets, - EntityRows: entityRows, + Features: features, + EntityRows: entityRows, }, nil } -// buildFeatureSets create a slice of FeatureSetRequest object from -// a slice of "feature_set:version:feature_name" string. -// -// It returns an error when "feature_set:version:feature_name" string -// has an invalid format. -func buildFeatureSets(features []string) ([]*serving.FeatureSetRequest, error) { - var requests []*serving.FeatureSetRequest - - // Map of "feature_set_name:version" to "FeatureSetRequest" pointer - // to reference existing FeatureSetRequest, if any. - fsNameVersionToRequest := make(map[string]*serving.FeatureSetRequest) - - for _, feature := range features { - splits := strings.Split(feature, ":") - if len(splits) != 3 { - return nil, fmt.Errorf(ErrInvalidFeatureName, feature) - } +// buildFeatures create a slice of FeatureReferences from a slice of "/:" +// It returns an error when the format is invalid +func buildFeatures(featureReferences []string, defaultProject string) ([]*serving.FeatureReference, error) { + var features []*serving.FeatureReference - featureSetName, featureSetVersionString, featureName := splits[0], splits[1], splits[2] - featureSetVersion, err := strconv.Atoi(featureSetVersionString) - if err != nil { - return nil, fmt.Errorf(ErrInvalidFeatureName, feature) + for _, featureRef := range featureReferences { + var project string + var name string + var version int + var featureSplit []string + + projectSplit := strings.Split(featureRef, "/") + + if len(projectSplit) == 2 { + project = projectSplit[0] + featureSplit = strings.Split(projectSplit[1], ":") + } else if len(projectSplit) == 1 { + project = defaultProject + featureSplit = strings.Split(projectSplit[0], ":") + } else { + return nil, fmt.Errorf(ErrInvalidFeatureName, featureRef) } - fsNameVersion := featureSetName + ":" + featureSetVersionString - if request, ok := fsNameVersionToRequest[fsNameVersion]; !ok { - request = &serving.FeatureSetRequest{ - Name: featureSetName, - Version: int32(featureSetVersion), - FeatureNames: []string{featureName}, + if len(featureSplit) == 2 { + name = featureSplit[0] + v, err := strconv.Atoi(featureSplit[1]) + if err != nil { + return nil, fmt.Errorf(ErrInvalidFeatureName, featureRef) } - fsNameVersionToRequest[fsNameVersion] = request - // Adding FeatureSetRequest in this step ensures the order of - // FeatureSetRequest in the slice follows the order of feature sets - // in the "features" argument in buildFeatureSets method. - requests = append(requests, request) + version = v + } else if len(featureSplit) == 1 { + name = featureSplit[0] } else { - request.FeatureNames = append(request.FeatureNames, featureName) + return nil, fmt.Errorf(ErrInvalidFeatureName, featureRef) } + + + if project == "" || name == "" || version < 0 { + return nil, fmt.Errorf(ErrInvalidFeatureName, featureRef) + } + + features = append(features, &serving.FeatureReference{ + Name: name, + Version: int32(version), + Project: project, + }) } - return requests, nil -} \ No newline at end of file + return features, nil +} diff --git a/sdk/go/request_test.go b/sdk/go/request_test.go index 3da4fd465b8..2e403f0bd3e 100644 --- a/sdk/go/request_test.go +++ b/sdk/go/request_test.go @@ -20,24 +20,40 @@ func TestGetOnlineFeaturesRequest(t *testing.T) { { name: "valid", req: OnlineFeaturesRequest{ - Features: []string{"fs:1:feature1", "fs:1:feature2", "fs:2:feature1"}, + Features: []string{"my_project_1/feature1:1", "my_project_2/feature1:1", "my_project_4/feature3", "feature2:2", "feature2"}, Entities: []Row{ {"entity1": Int64Val(1), "entity2": StrVal("bob")}, {"entity1": Int64Val(1), "entity2": StrVal("annie")}, {"entity1": Int64Val(1), "entity2": StrVal("jane")}, }, + Project: "my_project_3", }, want: &serving.GetOnlineFeaturesRequest{ - FeatureSets: []*serving.FeatureSetRequest{ + Features: []*serving.FeatureReference{ { - Name: "fs", - Version: 1, - FeatureNames: []string{"feature1", "feature2"}, + Project: "my_project_1", + Name: "feature1", + Version: 1, }, { - Name: "fs", - Version: 2, - FeatureNames: []string{"feature1"}, + Project: "my_project_2", + Name: "feature1", + Version: 1, + }, + { + Project: "my_project_4", + Name: "feature3", + Version: 0, + }, + { + Project: "my_project_3", + Name: "feature2", + Version: 2, + }, + { + Project: "my_project_3", + Name: "feature2", + Version: 0, }, }, EntityRows: []*serving.GetOnlineFeaturesRequest_EntityRow{ @@ -63,38 +79,72 @@ func TestGetOnlineFeaturesRequest(t *testing.T) { OmitEntitiesInResponse: false, }, wantErr: false, - err: nil, + err: nil, + }, + { + name: "valid_project_in_name", + req: OnlineFeaturesRequest{ + Features: []string{"project/feature1"}, + Entities: []Row{}, + }, + want: &serving.GetOnlineFeaturesRequest{ + Features: []*serving.FeatureReference{ + { + Project: "project", + Name: "feature1", + Version: 0, + }, + }, + EntityRows: []*serving.GetOnlineFeaturesRequest_EntityRow{ + }, + OmitEntitiesInResponse: false, + }, + wantErr: false, + err: nil, + }, + { + name: "no_project", + req: OnlineFeaturesRequest{ + Features: []string{"feature1"}, + Entities: []Row{}, + }, + wantErr: true, + err: fmt.Errorf(ErrInvalidFeatureName, "feature1"), }, { name: "invalid_feature_name/wrong_format", req: OnlineFeaturesRequest{ - Features: []string{"fs1:feature1"}, - Entities: []Row{}, + Features: []string{"fs1:3:feature1"}, + Entities: []Row{}, + Project: "my_project", }, wantErr: true, - err: fmt.Errorf(ErrInvalidFeatureName, "fs1:feature1"), + err: fmt.Errorf(ErrInvalidFeatureName, "fs1:3:feature1"), }, { name: "invalid_feature_name/invalid_version", req: OnlineFeaturesRequest{ - Features: []string{"fs:a:feature1"}, - Entities: []Row{}, + Features: []string{"project/a:feature1"}, + Entities: []Row{}, }, wantErr: true, - err: fmt.Errorf(ErrInvalidFeatureName, "fs:a:feature1"), + err: fmt.Errorf(ErrInvalidFeatureName, "project/a:feature1"), }, } for _, tc := range tt { t.Run(tc.name, func(t *testing.T) { got, err := tc.req.buildRequest() + if (err != nil) != tc.wantErr { t.Errorf("error = %v, wantErr %v", err, tc.wantErr) return } + if tc.wantErr && err.Error() != tc.err.Error() { t.Errorf("error = %v, expected err = %v", err, tc.err) return } + if !cmp.Equal(got, tc.want) { m := json.Marshaler{} gotJson, _ := m.MarshalToString(got) diff --git a/sdk/go/response_test.go b/sdk/go/response_test.go index 9975457c147..882c1695d59 100644 --- a/sdk/go/response_test.go +++ b/sdk/go/response_test.go @@ -13,14 +13,14 @@ RawResponse: &serving.GetOnlineFeaturesResponse{ FieldValues: []*serving.GetOnlineFeaturesResponse_FieldValues{ { Fields: map[string]*types.Value{ - "fs:1:feature1": Int64Val(1), - "fs:1:feature2": &types.Value{}, + "project1/feature1": Int64Val(1), + "project1/feature2": &types.Value{}, }, }, { Fields: map[string]*types.Value{ - "fs:1:feature1": Int64Val(2), - "fs:1:feature2": Int64Val(2), + "project1/feature1": Int64Val(2), + "project1/feature2": Int64Val(2), }, }, }, @@ -30,8 +30,8 @@ RawResponse: &serving.GetOnlineFeaturesResponse{ func TestOnlineFeaturesResponseToRow(t *testing.T) { actual := response.Rows() expected := []Row{ - {"fs:1:feature1": Int64Val(1), "fs:1:feature2": &types.Value{}}, - {"fs:1:feature1": Int64Val(2), "fs:1:feature2": Int64Val(2)}, + {"project1/feature1": Int64Val(1), "project1/feature2": &types.Value{}}, + {"project1/feature1": Int64Val(2), "project1/feature2": Int64Val(2)}, } if !cmp.Equal(actual, expected) { t.Errorf("expected: %v, got: %v", expected, actual) @@ -53,7 +53,7 @@ func TestOnlineFeaturesResponseToInt64Array(t *testing.T) { { name: "valid", args: args{ - order: []string{"fs:1:feature2", "fs:1:feature1"}, + order: []string{"project1/feature2", "project1/feature1" }, fillNa: []int64{-1, -1}, }, want: [][]int64{{-1, 1}, {2, 2}}, @@ -72,12 +72,12 @@ func TestOnlineFeaturesResponseToInt64Array(t *testing.T) { { name: "length mismatch", args: args{ - order: []string{"fs:1:feature2", "fs:1:feature3"}, + order: []string{"project1/feature2", "project1/feature3" }, fillNa: []int64{-1, -1}, }, want: nil, wantErr: true, - err: fmt.Errorf(ErrFeatureNotFound, "fs:1:feature3"), + err: fmt.Errorf(ErrFeatureNotFound, "project1/feature3"), }, } for _, tc := range tt { diff --git a/sdk/java/src/main/java/com/gojek/feast/FeastClient.java b/sdk/java/src/main/java/com/gojek/feast/FeastClient.java index 91ddd2a442c..01157306d58 100644 --- a/sdk/java/src/main/java/com/gojek/feast/FeastClient.java +++ b/sdk/java/src/main/java/com/gojek/feast/FeastClient.java @@ -16,7 +16,7 @@ */ package com.gojek.feast; -import feast.serving.ServingAPIProto.FeatureSetRequest; +import feast.serving.ServingAPIProto.FeatureReference; import feast.serving.ServingAPIProto.GetFeastServingInfoRequest; import feast.serving.ServingAPIProto.GetFeastServingInfoResponse; import feast.serving.ServingAPIProto.GetOnlineFeaturesRequest; @@ -59,42 +59,50 @@ public GetFeastServingInfoResponse getFeastServingInfo() { /** * Get online features from Feast. * - *

See {@link #getOnlineFeatures(List, List, boolean)} + *

See {@link #getOnlineFeatures(List, List, str)} * - * @param featureIds list of feature id to retrieve, feature id follows this format - * [feature_set_name]:[version]:[feature_name] + * @param features list of string feature references to retrieve, feature reference follows this + * format [project]/[name]:[version] * @param rows list of {@link Row} to select the entities to retrieve the features for + * @param defaultProject {@link String} Default project to find features in if not provided in + * feature reference. * @return list of {@link Row} containing features */ - public List getOnlineFeatures(List featureIds, List rows) { - return getOnlineFeatures(featureIds, rows, false); + public List getOnlineFeatures(List features, List rows, String defaultProject) { + return getOnlineFeatures(features, rows, defaultProject, false); } /** * Get online features from Feast. * - *

Example of retrieving online features for driver feature set, version 1, with features - * driver_id and driver_name + *

Example of retrieving online features for the driver project, with features driver_id and + * driver_name, both version 1 * *

{@code
    * FeastClient client = FeastClient.create("localhost", 6566);
-   * List requestedFeatureIds = Arrays.asList("driver:1:driver_id", "driver:1:driver_name");
+   * List requestedFeatureIds = Arrays.asList("driver/driver_id:1", "driver/driver_name:1");
    * List requestedRows =
    *         Arrays.asList(Row.create().set("driver_id", 123), Row.create().set("driver_id", 456));
    * List retrievedFeatures = client.getOnlineFeatures(requestedFeatureIds, requestedRows);
    * retrievedFeatures.forEach(System.out::println);
    * }
* - * @param featureIds list of feature id to retrieve, feature id follows this format - * [feature_set_name]:[version]:[feature_name] + * @param featureRefStrings list of feature refs to retrieve, feature refs follow this format + * [project]/[name]:[version] * @param rows list of {@link Row} to select the entities to retrieve the features for + * @param defaultProject {@link String} Default project to find features in if not provided in + * feature reference. * @param omitEntitiesInResponse if true, the returned {@link Row} will not contain field and * value for the entity * @return list of {@link Row} containing features */ public List getOnlineFeatures( - List featureIds, List rows, boolean omitEntitiesInResponse) { - List featureSets = RequestUtil.createFeatureSets(featureIds); + List featureRefStrings, + List rows, + String defaultProject, + boolean omitEntitiesInResponse) { + List features = + RequestUtil.createFeatureRefs(featureRefStrings, defaultProject); List entityRows = rows.stream() .map( @@ -108,7 +116,7 @@ public List getOnlineFeatures( GetOnlineFeaturesResponse response = stub.getOnlineFeatures( GetOnlineFeaturesRequest.newBuilder() - .addAllFeatureSets(featureSets) + .addAllFeatures(features) .addAllEntityRows(entityRows) .setOmitEntitiesInResponse(omitEntitiesInResponse) .build()); diff --git a/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java index e80b40bad9c..075c570c4e9 100644 --- a/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java +++ b/sdk/java/src/main/java/com/gojek/feast/RequestUtil.java @@ -16,60 +16,76 @@ */ package com.gojek.feast; -import feast.serving.ServingAPIProto.FeatureSetRequest; +import feast.serving.ServingAPIProto.FeatureReference; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; @SuppressWarnings("WeakerAccess") public class RequestUtil { - public static List createFeatureSets(List featureIds) { - if (featureIds == null) { - throw new IllegalArgumentException("featureIds cannot be null"); + + public static List createFeatureRefs( + List featureRefStrings, String defaultProject) { + if (featureRefStrings == null) { + throw new IllegalArgumentException("featureRefs cannot be null"); } - // featureSetMap is a map of pair of feature set name and version -> a list of feature names - Map, List> featureSetMap = new HashMap<>(); + List featureRefs = new ArrayList<>(); + + for (String featureRefString : featureRefStrings) { + String project; + String name; + int version = 0; + String[] featureSplit; + String[] projectSplit = featureRefString.split("/"); - for (String featureId : featureIds) { - String[] parts = featureId.split(":"); - if (parts.length < 3) { + if (projectSplit.length == 2) { + project = projectSplit[0]; + featureSplit = projectSplit[1].split(":"); + } else if (projectSplit.length == 1) { + project = defaultProject; + featureSplit = projectSplit[0].split(":"); + } else { throw new IllegalArgumentException( String.format( - "Feature id '%s' has invalid format. Expected format: ::.", - featureId)); + "Feature id '%s' has invalid format. Expected format: ::.", + featureRefString)); } - String featureSetName = parts[0]; - int featureSetVersion; - try { - featureSetVersion = Integer.parseInt(parts[1]); - } catch (NumberFormatException e) { + + if (featureSplit.length == 2) { + name = featureSplit[0]; + try { + version = Integer.parseInt(featureSplit[1]); + } catch (NumberFormatException e) { + throw new IllegalArgumentException( + String.format( + "Feature id '%s' contains invalid version. Expected format: /:.", + featureRefString)); + } + } else if (projectSplit.length == 1) { + name = featureSplit[0]; + } else { throw new IllegalArgumentException( String.format( - "Feature id '%s' contains invalid version. Expected format: ::.", - parts[1])); + "Feature id '%s' has invalid format. Expected format: /:.", + featureRefString)); } - Pair key = new ImmutablePair<>(featureSetName, featureSetVersion); - if (!featureSetMap.containsKey(key)) { - featureSetMap.put(key, new ArrayList<>()); + if (project.isEmpty() || name.isEmpty() || version < 0) { + throw new IllegalArgumentException( + String.format( + "Feature id '%s' has invalid format. Expected format: /:.", + featureRefString)); } - String featureName = parts[2]; - featureSetMap.get(key).add(featureName); + + featureRefs.add( + FeatureReference.newBuilder() + .setName(name) + .setProject(project) + .setVersion(version) + .build()); } - return featureSetMap.entrySet().stream() - .map( - entry -> - FeatureSetRequest.newBuilder() - .setName(entry.getKey().getKey()) - .setVersion(entry.getKey().getValue()) - .addAllFeatureNames(entry.getValue()) - .build()) - .collect(Collectors.toList()); + ; + return featureRefs; } } diff --git a/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java index 21c8bde15ec..1c58e9435c6 100644 --- a/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java +++ b/sdk/java/src/test/java/com/gojek/feast/RequestUtilTest.java @@ -20,7 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import com.google.protobuf.TextFormat; -import feast.serving.ServingAPIProto.FeatureSetRequest; +import feast.serving.ServingAPIProto.FeatureReference; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; @@ -36,43 +36,57 @@ class RequestUtilTest { private static Stream provideValidFeatureIds() { return Stream.of( Arguments.of( - Collections.singletonList("driver:1:driver_id"), + Collections.singletonList("driver_project/driver_id:1"), Collections.singletonList( - FeatureSetRequest.newBuilder() - .setName("driver") + FeatureReference.newBuilder() + .setProject("driver_project") + .setName("driver_id") .setVersion(1) - .addFeatureNames("driver_id"))), + .build())), Arguments.of( - Arrays.asList("driver:1:driver_id", "driver:1:driver_name"), - Collections.singletonList( - FeatureSetRequest.newBuilder() - .setName("driver") + Arrays.asList("driver_project/driver_id:1", "driver_project/driver_name:1"), + Arrays.asList( + FeatureReference.newBuilder() + .setProject("driver_project") + .setName("driver_id") + .setVersion(1) + .build(), + FeatureReference.newBuilder() + .setProject("driver_project") + .setName("driver_name") .setVersion(1) - .addAllFeatureNames(Arrays.asList("driver_id", "driver_name")) .build())), Arguments.of( - Arrays.asList("driver:1:driver_id", "driver:1:driver_name", "booking:2:booking_id"), Arrays.asList( - FeatureSetRequest.newBuilder() - .setName("driver") + "driver_project/driver_id:1", + "driver_project/driver_name:1", + "booking_project/driver_name:1"), + Arrays.asList( + FeatureReference.newBuilder() + .setProject("driver_project") + .setVersion(1) + .setName("driver_id") + .build(), + FeatureReference.newBuilder() + .setProject("driver_project") .setVersion(1) - .addAllFeatureNames(Arrays.asList("driver_id", "driver_name")) + .setName("driver_name") .build(), - FeatureSetRequest.newBuilder() - .setName("booking") - .setVersion(2) - .addFeatureNames("booking_id") + FeatureReference.newBuilder() + .setProject("booking_project") + .setVersion(1) + .setName("driver_name") .build()))); } @ParameterizedTest @MethodSource("provideValidFeatureIds") void createFeatureSets_ShouldReturnFeatureSetsForValidFeatureIds( - List input, List expected) { - List actual = RequestUtil.createFeatureSets(input); + List input, List expected) { + List actual = RequestUtil.createFeatureRefs(input, "my-project"); // Order of the actual and expected featureSets do no not matter - actual.sort(Comparator.comparing(FeatureSetRequest::getName)); - expected.sort(Comparator.comparing(FeatureSetRequest::getName)); + actual.sort(Comparator.comparing(FeatureReference::getName)); + expected.sort(Comparator.comparing(FeatureReference::getName)); assertEquals(expected.size(), actual.size()); for (int i = 0; i < expected.size(); i++) { String expectedString = TextFormat.printer().printToString(expected.get(i)); @@ -81,23 +95,23 @@ void createFeatureSets_ShouldReturnFeatureSetsForValidFeatureIds( } } - private static Stream provideInvalidFeatureIds() { + private static Stream provideInvalidFeatureRefs() { return Stream.of( - Arguments.of(Collections.singletonList("feature_set_only")), - Arguments.of(Collections.singletonList("missing:feature_name")), - Arguments.of(Collections.singletonList("invalid:version:value")), + Arguments.of(Collections.singletonList("missing:bad_version")), Arguments.of(Collections.singletonList(""))); } @ParameterizedTest - @MethodSource("provideInvalidFeatureIds") - void createFeatureSets_ShouldThrowExceptionForInvalidFeatureIds(List input) { - assertThrows(IllegalArgumentException.class, () -> RequestUtil.createFeatureSets(input)); + @MethodSource("provideInvalidFeatureRefs") + void createFeatureSets_ShouldThrowExceptionForInvalidFeatureRefs(List input) { + assertThrows( + IllegalArgumentException.class, () -> RequestUtil.createFeatureRefs(input, "my-project")); } @ParameterizedTest @NullSource - void createFeatureSets_ShouldThrowExceptionForNullFeatureIds(List input) { - assertThrows(IllegalArgumentException.class, () -> RequestUtil.createFeatureSets(input)); + void createFeatureSets_ShouldThrowExceptionForNullFeatureRefs(List input) { + assertThrows( + IllegalArgumentException.class, () -> RequestUtil.createFeatureRefs(input, "my-project")); } } diff --git a/sdk/python/feast/__init__.py b/sdk/python/feast/__init__.py index 5f62ab496fd..adcac0cd248 100644 --- a/sdk/python/feast/__init__.py +++ b/sdk/python/feast/__init__.py @@ -1,4 +1,5 @@ from pkg_resources import get_distribution, DistributionNotFound + try: __version__ = get_distribution(__name__).version except DistributionNotFound: diff --git a/sdk/python/feast/client.py b/sdk/python/feast/client.py index 719022ea7a3..640e587cc9f 100644 --- a/sdk/python/feast/client.py +++ b/sdk/python/feast/client.py @@ -20,7 +20,8 @@ import time from collections import OrderedDict from math import ceil -from typing import Dict, List, Tuple, Union +from typing import Dict, List, Tuple, Union, Optional +from typing import List from urllib.parse import urlparse import fastavro @@ -36,11 +37,18 @@ ApplyFeatureSetResponse, GetFeatureSetRequest, GetFeatureSetResponse, + CreateProjectRequest, + CreateProjectResponse, + ArchiveProjectRequest, + ArchiveProjectResponse, + ListProjectsRequest, + ListProjectsResponse, ) from feast.core.CoreService_pb2_grpc import CoreServiceStub from feast.core.FeatureSet_pb2 import FeatureSetStatus from feast.feature_set import FeatureSet, Entity from feast.job import Job +from feast.serving.ServingService_pb2 import FeatureReference from feast.loaders.abstract_producer import get_producer from feast.loaders.file import export_source_to_staging_location from feast.loaders.ingest import KAFKA_CHUNK_PRODUCTION_TIMEOUT @@ -53,7 +61,6 @@ GetOnlineFeaturesResponse, DatasetSource, DataFormat, - FeatureSetRequest, FeastServingType, ) from feast.serving.ServingService_pb2_grpc import ServingServiceStub @@ -64,6 +71,7 @@ GRPC_CONNECTION_TIMEOUT_APPLY = 600 # type: int FEAST_SERVING_URL_ENV_KEY = "FEAST_SERVING_URL" # type: str FEAST_CORE_URL_ENV_KEY = "FEAST_CORE_URL" # type: str +FEAST_PROJECT_ENV_KEY = "FEAST_PROJECT" # type: str BATCH_FEATURE_REQUEST_WAIT_TIME_SECONDS = 300 CPU_COUNT = os.cpu_count() # type: int @@ -74,7 +82,7 @@ class Client: """ def __init__( - self, core_url: str = None, serving_url: str = None, verbose: bool = False + self, core_url: str = None, serving_url: str = None, project: str = None ): """ The Feast Client should be initialized with at least one service url @@ -82,11 +90,11 @@ def __init__( Args: core_url: Feast Core URL. Used to manage features serving_url: Feast Serving URL. Used to retrieve features - verbose: Enable verbose logging + project: Sets the active project. This field is optional. """ self._core_url = core_url self._serving_url = serving_url - self._verbose = verbose + self._project = project self.__core_channel: grpc.Channel = None self.__serving_channel: grpc.Channel = None self._core_service_stub: CoreServiceStub = None @@ -96,6 +104,9 @@ def __init__( def core_url(self) -> str: """ Retrieve Feast Core URL + + Returns: + Feast Core URL string """ if self._core_url is not None: @@ -109,8 +120,8 @@ def core_url(self, value: str): """ Set the Feast Core URL - Returns: - Feast Core URL string + Args: + value: Feast Core URL """ self._core_url = value @@ -118,6 +129,9 @@ def core_url(self, value: str): def serving_url(self) -> str: """ Retrieve Serving Core URL + + Returns: + Feast Serving URL string """ if self._serving_url is not None: return self._serving_url @@ -130,8 +144,8 @@ def serving_url(self, value: str): """ Set the Feast Serving URL - Returns: - Feast Serving URL string + Args: + value: Feast Serving URL """ self._serving_url = value @@ -214,6 +228,74 @@ def _connect_serving(self, skip_if_connected=True): else: self._serving_service_stub = ServingServiceStub(self.__serving_channel) + @property + def project(self) -> Union[str, None]: + """ + Retrieve currently active project + + Returns: + Project name + """ + if self._project is not None: + return self._project + if os.getenv(FEAST_PROJECT_ENV_KEY) is not None: + return os.getenv(FEAST_PROJECT_ENV_KEY) + return None + + def set_project(self, project: str): + """ + Set currently active Feast project + + Args: + project: Project to set as active + """ + self._project = project + + def list_projects(self) -> List[str]: + """ + List all active Feast projects + + Returns: + List of project names + + """ + self._connect_core() + response = self._core_service_stub.ListProjects( + ListProjectsRequest(), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT + ) # type: ListProjectsResponse + return list(response.projects) + + def create_project(self, project): + """ + Creates a Feast project + + Args: + project: Name of project + """ + + self._connect_core() + self._core_service_stub.CreateProject( + CreateProjectRequest(name=project), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT + ) # type: CreateProjectResponse + + def archive_project(self, project): + """ + Archives a project. Project will still continue to function for + ingestion and retrieval, but will be in a read-only state. It will + also not be visible from the Core API for management purposes. + + Args: + project: Name of project to archive + """ + + self._connect_core() + self._core_service_stub.ArchiveProject( + ArchiveProjectRequest(name=project), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT + ) # type: ArchiveProjectResponse + + if self._project == project: + self._project = "" + def apply(self, feature_sets: Union[List[FeatureSet], FeatureSet]): """ Idempotently registers feature set(s) with Feast Core. Either a single @@ -240,15 +322,27 @@ def _apply_feature_set(self, feature_set: FeatureSet): feature_set: Feature set that will be registered """ self._connect_core() - feature_set._client = self feature_set.is_valid() + feature_set_proto = feature_set.to_proto() + if len(feature_set_proto.spec.project) == 0: + if self.project is None: + raise ValueError( + f"No project found in feature set {feature_set.name}. " + f"Please set the project within the feature set or within " + f"your Feast Client." + ) + else: + feature_set_proto.spec.project = self.project # Convert the feature set to a request and send to Feast Core - apply_fs_response = self._core_service_stub.ApplyFeatureSet( - ApplyFeatureSetRequest(feature_set=feature_set.to_proto()), - timeout=GRPC_CONNECTION_TIMEOUT_APPLY, - ) # type: ApplyFeatureSetResponse + try: + apply_fs_response = self._core_service_stub.ApplyFeatureSet( + ApplyFeatureSetRequest(feature_set=feature_set_proto), + timeout=GRPC_CONNECTION_TIMEOUT_APPLY, + ) # type: ApplyFeatureSetResponse + except grpc.RpcError as e: + raise grpc.RpcError(e.details()) # Extract the returned feature set applied_fs = FeatureSet.from_proto(apply_fs_response.feature_set) @@ -266,18 +360,41 @@ def _apply_feature_set(self, feature_set: FeatureSet): # Deep copy from the returned feature set to the local feature set feature_set._update_from_feature_set(applied_fs) - def list_feature_sets(self) -> List[FeatureSet]: + def list_feature_sets( + self, project: str = None, name: str = None, version: str = None + ) -> List[FeatureSet]: """ Retrieve a list of feature sets from Feast Core + Args: + project: Filter feature sets based on project name + name: Filter feature sets based on feature set name + version: Filter feature sets based on version number + Returns: List of feature sets """ self._connect_core() + if project is None: + if self.project is not None: + project = self.project + else: + project = "*" + + if name is None: + name = "*" + + if version is None: + version = "*" + + filter = ListFeatureSetsRequest.Filter( + project=project, feature_set_name=name, feature_set_version=version + ) + # Get latest feature sets from Feast Core feature_set_protos = self._core_service_stub.ListFeatureSets( - ListFeatureSetsRequest() + ListFeatureSetsRequest(filter=filter) ) # type: ListFeatureSetsResponse # Extract feature sets and return @@ -289,13 +406,14 @@ def list_feature_sets(self) -> List[FeatureSet]: return feature_sets def get_feature_set( - self, name: str, version: int = None + self, name: str, version: int = None, project: str = None ) -> Union[FeatureSet, None]: """ Retrieves a feature set. If no version is specified then the latest version will be returned. Args: + project: Feast project that this feature set belongs to name: Name of feature set version: Version of feature set @@ -305,11 +423,23 @@ def get_feature_set( """ self._connect_core() + if project is None: + if self.project is not None: + project = self.project + else: + raise ValueError("No project has been configured.") + if version is None: version = 0 - get_feature_set_response = self._core_service_stub.GetFeatureSet( - GetFeatureSetRequest(name=name.strip(), version=int(version)) - ) # type: GetFeatureSetResponse + + try: + get_feature_set_response = self._core_service_stub.GetFeatureSet( + GetFeatureSetRequest( + project=project, name=name.strip(), version=int(version) + ) + ) # type: GetFeatureSetResponse + except grpc.RpcError as e: + raise grpc.RpcError(e.details()) return FeatureSet.from_proto(get_feature_set_response.feature_set) def list_entities(self) -> Dict[str, Entity]: @@ -326,22 +456,26 @@ def list_entities(self) -> Dict[str, Entity]: return entities_dict def get_batch_features( - self, feature_ids: List[str], entity_rows: Union[pd.DataFrame, str] + self, + feature_refs: List[str], + entity_rows: Union[pd.DataFrame, str], + default_project: str = None, ) -> Job: """ Retrieves historical features from a Feast Serving deployment. Args: - feature_ids (List[str]): - List of feature ids that will be returned for each entity. - Each feature id should have the following format - "feature_set_name:version:feature_name". + feature_refs (List[str]): + List of feature references that will be returned for each entity. + Each feature reference should have the following format + "project/feature:version". entity_rows (Union[pd.DataFrame, str]): Pandas dataframe containing entities and a 'datetime' column. Each entity in a feature set must be present as a column in this dataframe. The datetime column must contain timestamps in datetime64 format. + default_project: Default project where feature values will be found. Returns: feast.job.Job: @@ -354,27 +488,29 @@ def get_batch_features( >>> from datetime import datetime >>> >>> feast_client = Client(core_url="localhost:6565", serving_url="localhost:6566") - >>> feature_ids = ["customer:1:bookings_7d"] + >>> feature_refs = ["my_project/bookings_7d:1", "booking_14d"] >>> entity_rows = pd.DataFrame( >>> { >>> "datetime": [pd.datetime.now() for _ in range(3)], >>> "customer": [1001, 1002, 1003], >>> } >>> ) - >>> feature_retrieval_job = feast_client.get_batch_features(feature_ids, entity_rows) + >>> feature_retrieval_job = feast_client.get_batch_features( + >>> feature_refs, entity_rows, default_project="my_project") >>> df = feature_retrieval_job.to_dataframe() >>> print(df) """ self._connect_serving() - fs_request = _build_feature_set_request(feature_ids) + feature_references = _build_feature_references( + feature_refs=feature_refs, default_project=default_project + ) # Retrieve serving information to determine store type and # staging location serving_info = self._serving_service_stub.GetFeastServingInfo( - GetFeastServingInfoRequest(), - timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT + GetFeastServingInfoRequest(), timeout=GRPC_CONNECTION_TIMEOUT_DEFAULT ) # type: GetFeastServingInfoResponse if serving_info.type != FeastServingType.FEAST_SERVING_TYPE_BATCH: @@ -385,35 +521,25 @@ def get_batch_features( if isinstance(entity_rows, pd.DataFrame): # Pandas DataFrame detected - # Validate entity rows to based on entities in Feast Core - self._validate_dataframe_for_batch_retrieval( - entity_rows=entity_rows, - feature_sets_request=fs_request - ) # Remove timezone from datetime column if isinstance( - entity_rows["datetime"].dtype, - pd.core.dtypes.dtypes.DatetimeTZDtype + entity_rows["datetime"].dtype, pd.core.dtypes.dtypes.DatetimeTZDtype ): entity_rows["datetime"] = pd.DatetimeIndex( entity_rows["datetime"] ).tz_localize(None) elif isinstance(entity_rows, str): # String based source - if entity_rows.endswith((".avro", "*")): - # Validate Avro entity rows to based on entities in Feast Core - self._validate_avro_for_batch_retrieval( - source=entity_rows, - feature_sets_request=fs_request - ) - else: + if not entity_rows.endswith((".avro", "*")): raise Exception( f"Only .avro and wildcard paths are accepted as entity_rows" ) else: - raise Exception(f"Only pandas.DataFrame and str types are allowed" - f" as entity_rows, but got {type(entity_rows)}.") + raise Exception( + f"Only pandas.DataFrame and str types are allowed" + f" as entity_rows, but got {type(entity_rows)}." + ) # Export and upload entity row DataFrame to staging location # provided by Feast @@ -422,11 +548,10 @@ def get_batch_features( ) # type: List[str] request = GetBatchFeaturesRequest( - feature_sets=fs_request, + features=feature_references, dataset_source=DatasetSource( file_source=DatasetSource.FileSource( - file_uris=staged_files, - data_format=DataFormat.DATA_FORMAT_AVRO + file_uris=staged_files, data_format=DataFormat.DATA_FORMAT_AVRO ) ), ) @@ -435,165 +560,57 @@ def get_batch_features( response = self._serving_service_stub.GetBatchFeatures(request) return Job(response.job, self._serving_service_stub) - def _validate_dataframe_for_batch_retrieval( - self, entity_rows: pd.DataFrame, feature_sets_request - ): - """ - Validate whether an the entity rows in a DataFrame contains the correct - information for batch retrieval. - - Datetime column must be present in the DataFrame. - - Args: - entity_rows (pd.DataFrame): - Pandas DataFrame containing entities and datetime column. Each - entity in a feature set must be present as a column in this - DataFrame. - - feature_sets_request: - Feature sets that will be requested. - """ - - self._validate_columns( - columns=entity_rows.columns, - feature_sets_request=feature_sets_request, - datetime_field="datetime" - ) - - def _validate_avro_for_batch_retrieval( - self, source: str, feature_sets_request - ): - """ - Validate whether the entity rows in an Avro source file contains the - correct information for batch retrieval. - - Only gs:// and local files (file://) uri schemes are allowed. - - Avro file must have a column named "event_timestamp". - - No checks will be done if a GCS path is provided. - - Args: - source (str): - File path to Avro. - - feature_sets_request: - Feature sets that will be requested. - """ - p = urlparse(source) - - if p.scheme == "gs": - # GCS path provided (Risk is delegated to user) - # No validation if GCS path is provided - return - elif p.scheme == "file" or not p.scheme: - # Local file (file://) provided - file_path = os.path.abspath(os.path.join(p.netloc, p.path)) - else: - raise Exception(f"Unsupported uri scheme provided {p.scheme}, only " - f"local files (file://), and gs:// schemes are " - f"allowed") - - with open(file_path, "rb") as f: - reader = fastavro.reader(f) - schema = json.loads(reader.metadata["avro.schema"]) - columns = [x["name"] for x in schema["fields"]] - self._validate_columns( - columns=columns, - feature_sets_request=feature_sets_request, - datetime_field="event_timestamp" - ) - - def _validate_columns( - self, columns: List[str], - feature_sets_request, - datetime_field: str - ) -> None: - """ - Check if the required column contains the correct values for batch - retrieval. - - Args: - columns (List[str]): - List of columns to validate against feature_sets_request. - - feature_sets_request (): - Feature sets that will be requested. - - datetime_field (str): - Name of the datetime field that must be enforced and present as - a column in the data source. - - Returns: - None: - None - """ - # Ensure datetime column exists - if datetime_field not in columns: - raise ValueError( - f'Entity rows does not contain "{datetime_field}" column in ' - f'columns {columns}' - ) - - # Validate Avro columns based on feature set entities - for feature_set in feature_sets_request: - fs = self.get_feature_set( - name=feature_set.name, version=feature_set.version - ) - if fs is None: - raise ValueError( - f'Feature set "{feature_set.name}:{feature_set.version}" ' - f"could not be found" - ) - for entity_type in fs.entities: - if entity_type.name not in columns: - raise ValueError( - f'Input does not contain entity' - f' "{entity_type.name}" column in columns "{columns}"' - ) def get_online_features( self, - feature_ids: List[str], + feature_refs: List[str], entity_rows: List[GetOnlineFeaturesRequest.EntityRow], + default_project: Optional[str] = None, ) -> GetOnlineFeaturesResponse: """ Retrieves the latest online feature data from Feast Serving Args: - feature_ids: List of feature Ids in the following format - [feature_set_name]:[version]:[feature_name] + feature_refs: List of feature references in the following format + [project]/[feature_name]:[version]. Only the feature name + is a required component in the reference. example: - ["feature_set_1:6:my_feature_1", - "feature_set_1:6:my_feature_2",] + ["my_project/my_feature_1:3", + "my_project3/my_feature_4:1",] entity_rows: List of GetFeaturesRequest.EntityRow where each row contains entities. Timestamp should not be set for online retrieval. All entity types within a feature + default_project: This project will be used if the project name is + not provided in the feature reference Returns: Returns a list of maps where each item in the list contains the latest feature values for the provided entities """ - self._connect_serving() return self._serving_service_stub.GetOnlineFeatures( GetOnlineFeaturesRequest( - feature_sets=_build_feature_set_request(feature_ids), + features=_build_feature_references( + feature_refs=feature_refs, + default_project=( + default_project if not self.project else self.project + ), + ), entity_rows=entity_rows, ) ) # type: GetOnlineFeaturesResponse def ingest( - self, - feature_set: Union[str, FeatureSet], - source: Union[pd.DataFrame, str], - chunk_size: int = 10000, - version: int = None, - force_update: bool = False, - max_workers: int = max(CPU_COUNT - 1, 1), - disable_progress_bar: bool = False, - timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT + self, + feature_set: Union[str, FeatureSet], + source: Union[pd.DataFrame, str], + chunk_size: int = 10000, + version: int = None, + force_update: bool = False, + max_workers: int = max(CPU_COUNT - 1, 1), + disable_progress_bar: bool = False, + timeout: int = KAFKA_CHUNK_PRODUCTION_TIMEOUT, ) -> None: """ Loads feature data into Feast for a specific feature set. @@ -644,9 +661,7 @@ def ingest( raise Exception(f"Feature set name must be provided") # Read table and get row count - dir_path, dest_path = _read_table_from_source( - source, chunk_size, max_workers - ) + dir_path, dest_path = _read_table_from_source(source, chunk_size, max_workers) pq_file = pq.ParquetFile(dest_path) @@ -657,7 +672,7 @@ def ingest( feature_set.infer_fields_from_pa( table=pq_file.read_row_group(0), discard_unused_fields=True, - replace_existing_features=True + replace_existing_features=True, ) self.apply(feature_set) current_time = time.time() @@ -690,10 +705,11 @@ def ingest( # Transform and push data to Kafka if feature_set.source.source_type == "Kafka": for chunk in get_feature_row_chunks( - file=dest_path, - row_groups=list(range(pq_file.num_row_groups)), - fs=feature_set, - max_workers=max_workers): + file=dest_path, + row_groups=list(range(pq_file.num_row_groups)), + fs=feature_set, + max_workers=max_workers, + ): # Push FeatureRow one chunk at a time to kafka for serialized_row in chunk: @@ -722,39 +738,62 @@ def ingest( return None -def _build_feature_set_request( - feature_ids: List[str] -) -> List[FeatureSetRequest]: +def _build_feature_references( + feature_refs: List[str], default_project: str = None +) -> List[FeatureReference]: """ Builds a list of FeatureSet objects from feature set ids in order to retrieve feature data from Feast Serving Args: - feature_ids: List of feature ids - ("feature_set_name:version:feature_name") + feature_refs: List of feature reference strings + ("project/feature:version") + default_project: This project will be used if the project name is + not provided in the feature reference """ - feature_set_request = dict() # type: Dict[str, FeatureSetRequest] - for feature_id in feature_ids: - fid_parts = feature_id.split(":") - if len(fid_parts) == 3: - feature_set, version, feature = fid_parts + + features = [] + + for feature_ref in feature_refs: + project_split = feature_ref.split("/") + version = 0 + + if len(project_split) == 2: + project, feature_version = project_split + elif len(project_split) == 1: + feature_version = project_split[0] + if default_project is None: + raise ValueError( + f"No project specified in {feature_ref} and no default project provided" + ) + project = default_project + else: + raise ValueError( + f'Could not parse feature ref {feature_ref}, expecting "project/feature:version"' + ) + + feature_split = feature_version.split(":") + if len(feature_split) == 2: + name, version = feature_split + version = int(version) + elif len(feature_split) == 1: + name = feature_split[0] else: raise ValueError( - f"Could not parse feature id ${feature_id}, needs 2 colons" + f'Could not parse feature ref {feature_ref}, expecting "project/feature:version"' ) - if feature_set not in feature_set_request: - feature_set_request[feature_set] = FeatureSetRequest( - name=feature_set, version=int(version) + if len(project) == 0 or len(name) == 0 or version < 0: + raise ValueError( + f'Could not parse feature ref {feature_ref}, expecting "project/feature:version"' ) - feature_set_request[feature_set].feature_names.append(feature) - return list(feature_set_request.values()) + + features.append(FeatureReference(project=project, name=name, version=version)) + return features def _read_table_from_source( - source: Union[pd.DataFrame, str], - chunk_size: int, - max_workers: int + source: Union[pd.DataFrame, str], chunk_size: int, max_workers: int ) -> Tuple[str, str]: """ Infers a data source type (path or Pandas DataFrame) and reads it in as @@ -804,8 +843,7 @@ def _read_table_from_source( else: table = pq.read_table(file_path) else: - raise ValueError( - f"Unknown data source provided for ingestion: {source}") + raise ValueError(f"Unknown data source provided for ingestion: {source}") # Ensure that PyArrow table is initialised assert isinstance(table, pa.lib.Table) @@ -814,7 +852,7 @@ def _read_table_from_source( dir_path = tempfile.mkdtemp() tmp_table_name = f"{int(time.time())}.parquet" dest_path = f"{dir_path}/{tmp_table_name}" - row_group_size = min(ceil(table.num_rows/max_workers), chunk_size) + row_group_size = min(ceil(table.num_rows / max_workers), chunk_size) pq.write_table(table=table, where=dest_path, row_group_size=row_group_size) # Remove table from memory diff --git a/sdk/python/feast/core/CoreService_pb2.py b/sdk/python/feast/core/CoreService_pb2.py index 3185bece28e..858703d7f3e 100644 --- a/sdk/python/feast/core/CoreService_pb2.py +++ b/sdk/python/feast/core/CoreService_pb2.py @@ -22,7 +22,7 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\020CoreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"5\n\x14GetFeatureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"D\n\x15GetFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\x94\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1a?\n\x06\x46ilter\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"G\n\x17ListFeatureSetsResponse\x12,\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x16.feast.core.FeatureSet\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"E\n\x16\x41pplyFeatureSetRequest\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\xb3\x01\n\x17\x41pplyFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\x32\xa0\x04\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x1c\x66\x65\x61st/core/CoreService.proto\x12\nfeast.core\x1a\x1b\x66\x65\x61st/core/FeatureSet.proto\x1a\x16\x66\x65\x61st/core/Store.proto\"F\n\x14GetFeatureSetRequest\x12\x0f\n\x07project\x18\x03 \x01(\t\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\"D\n\x15GetFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\xa5\x01\n\x16ListFeatureSetsRequest\x12\x39\n\x06\x66ilter\x18\x01 \x01(\x0b\x32).feast.core.ListFeatureSetsRequest.Filter\x1aP\n\x06\x46ilter\x12\x0f\n\x07project\x18\x03 \x01(\t\x12\x18\n\x10\x66\x65\x61ture_set_name\x18\x01 \x01(\t\x12\x1b\n\x13\x66\x65\x61ture_set_version\x18\x02 \x01(\t\"G\n\x17ListFeatureSetsResponse\x12,\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32\x16.feast.core.FeatureSet\"a\n\x11ListStoresRequest\x12\x34\n\x06\x66ilter\x18\x01 \x01(\x0b\x32$.feast.core.ListStoresRequest.Filter\x1a\x16\n\x06\x46ilter\x12\x0c\n\x04name\x18\x01 \x01(\t\"6\n\x12ListStoresResponse\x12 \n\x05store\x18\x01 \x03(\x0b\x32\x11.feast.core.Store\"E\n\x16\x41pplyFeatureSetRequest\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\"\xb3\x01\n\x17\x41pplyFeatureSetResponse\x12+\n\x0b\x66\x65\x61ture_set\x18\x01 \x01(\x0b\x32\x16.feast.core.FeatureSet\x12:\n\x06status\x18\x02 \x01(\x0e\x32*.feast.core.ApplyFeatureSetResponse.Status\"/\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07\x43REATED\x10\x01\x12\t\n\x05\x45RROR\x10\x02\"\x1c\n\x1aGetFeastCoreVersionRequest\".\n\x1bGetFeastCoreVersionResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\"6\n\x12UpdateStoreRequest\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\"\x95\x01\n\x13UpdateStoreResponse\x12 \n\x05store\x18\x01 \x01(\x0b\x32\x11.feast.core.Store\x12\x36\n\x06status\x18\x02 \x01(\x0e\x32&.feast.core.UpdateStoreResponse.Status\"$\n\x06Status\x12\r\n\tNO_CHANGE\x10\x00\x12\x0b\n\x07UPDATED\x10\x01\"$\n\x14\x43reateProjectRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"\x17\n\x15\x43reateProjectResponse\"%\n\x15\x41rchiveProjectRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"\x18\n\x16\x41rchiveProjectResponse\"\x15\n\x13ListProjectsRequest\"(\n\x14ListProjectsResponse\x12\x10\n\x08projects\x18\x01 \x03(\t2\xa2\x06\n\x0b\x43oreService\x12\x66\n\x13GetFeastCoreVersion\x12&.feast.core.GetFeastCoreVersionRequest\x1a\'.feast.core.GetFeastCoreVersionResponse\x12T\n\rGetFeatureSet\x12 .feast.core.GetFeatureSetRequest\x1a!.feast.core.GetFeatureSetResponse\x12Z\n\x0fListFeatureSets\x12\".feast.core.ListFeatureSetsRequest\x1a#.feast.core.ListFeatureSetsResponse\x12K\n\nListStores\x12\x1d.feast.core.ListStoresRequest\x1a\x1e.feast.core.ListStoresResponse\x12Z\n\x0f\x41pplyFeatureSet\x12\".feast.core.ApplyFeatureSetRequest\x1a#.feast.core.ApplyFeatureSetResponse\x12N\n\x0bUpdateStore\x12\x1e.feast.core.UpdateStoreRequest\x1a\x1f.feast.core.UpdateStoreResponse\x12T\n\rCreateProject\x12 .feast.core.CreateProjectRequest\x1a!.feast.core.CreateProjectResponse\x12W\n\x0e\x41rchiveProject\x12!.feast.core.ArchiveProjectRequest\x1a\".feast.core.ArchiveProjectResponse\x12Q\n\x0cListProjects\x12\x1f.feast.core.ListProjectsRequest\x1a .feast.core.ListProjectsResponseBO\n\nfeast.coreB\x10\x43oreServiceProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') , dependencies=[feast_dot_core_dot_FeatureSet__pb2.DESCRIPTOR,feast_dot_core_dot_Store__pb2.DESCRIPTOR,]) @@ -49,8 +49,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=805, - serialized_end=852, + serialized_start=839, + serialized_end=886, ) _sym_db.RegisterEnumDescriptor(_APPLYFEATURESETRESPONSE_STATUS) @@ -71,8 +71,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1102, - serialized_end=1138, + serialized_start=1136, + serialized_end=1172, ) _sym_db.RegisterEnumDescriptor(_UPDATESTORERESPONSE_STATUS) @@ -85,14 +85,21 @@ containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='name', full_name='feast.core.GetFeatureSetRequest.name', index=0, + name='project', full_name='feast.core.GetFeatureSetRequest.project', index=0, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='name', full_name='feast.core.GetFeatureSetRequest.name', index=1, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='version', full_name='feast.core.GetFeatureSetRequest.version', index=1, + name='version', full_name='feast.core.GetFeatureSetRequest.version', index=2, number=2, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, @@ -111,7 +118,7 @@ oneofs=[ ], serialized_start=97, - serialized_end=150, + serialized_end=167, ) @@ -141,8 +148,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=152, - serialized_end=220, + serialized_start=169, + serialized_end=237, ) @@ -154,14 +161,21 @@ containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='feature_set_name', full_name='feast.core.ListFeatureSetsRequest.Filter.feature_set_name', index=0, + name='project', full_name='feast.core.ListFeatureSetsRequest.Filter.project', index=0, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='feature_set_name', full_name='feast.core.ListFeatureSetsRequest.Filter.feature_set_name', index=1, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='feature_set_version', full_name='feast.core.ListFeatureSetsRequest.Filter.feature_set_version', index=1, + name='feature_set_version', full_name='feast.core.ListFeatureSetsRequest.Filter.feature_set_version', index=2, number=2, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, @@ -179,8 +193,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=308, - serialized_end=371, + serialized_start=325, + serialized_end=405, ) _LISTFEATURESETSREQUEST = _descriptor.Descriptor( @@ -209,8 +223,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=223, - serialized_end=371, + serialized_start=240, + serialized_end=405, ) @@ -240,8 +254,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=373, - serialized_end=444, + serialized_start=407, + serialized_end=478, ) @@ -271,8 +285,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=521, - serialized_end=543, + serialized_start=555, + serialized_end=577, ) _LISTSTORESREQUEST = _descriptor.Descriptor( @@ -301,8 +315,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=446, - serialized_end=543, + serialized_start=480, + serialized_end=577, ) @@ -332,8 +346,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=545, - serialized_end=599, + serialized_start=579, + serialized_end=633, ) @@ -363,8 +377,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=601, - serialized_end=670, + serialized_start=635, + serialized_end=704, ) @@ -402,8 +416,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=673, - serialized_end=852, + serialized_start=707, + serialized_end=886, ) @@ -426,8 +440,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=854, - serialized_end=882, + serialized_start=888, + serialized_end=916, ) @@ -457,8 +471,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=884, - serialized_end=930, + serialized_start=918, + serialized_end=964, ) @@ -488,8 +502,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=932, - serialized_end=986, + serialized_start=966, + serialized_end=1020, ) @@ -527,8 +541,173 @@ extension_ranges=[], oneofs=[ ], - serialized_start=989, - serialized_end=1138, + serialized_start=1023, + serialized_end=1172, +) + + +_CREATEPROJECTREQUEST = _descriptor.Descriptor( + name='CreateProjectRequest', + full_name='feast.core.CreateProjectRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='feast.core.CreateProjectRequest.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1174, + serialized_end=1210, +) + + +_CREATEPROJECTRESPONSE = _descriptor.Descriptor( + name='CreateProjectResponse', + full_name='feast.core.CreateProjectResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1212, + serialized_end=1235, +) + + +_ARCHIVEPROJECTREQUEST = _descriptor.Descriptor( + name='ArchiveProjectRequest', + full_name='feast.core.ArchiveProjectRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='name', full_name='feast.core.ArchiveProjectRequest.name', index=0, + number=1, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1237, + serialized_end=1274, +) + + +_ARCHIVEPROJECTRESPONSE = _descriptor.Descriptor( + name='ArchiveProjectResponse', + full_name='feast.core.ArchiveProjectResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1276, + serialized_end=1300, +) + + +_LISTPROJECTSREQUEST = _descriptor.Descriptor( + name='ListProjectsRequest', + full_name='feast.core.ListProjectsRequest', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1302, + serialized_end=1323, +) + + +_LISTPROJECTSRESPONSE = _descriptor.Descriptor( + name='ListProjectsResponse', + full_name='feast.core.ListProjectsResponse', + filename=None, + file=DESCRIPTOR, + containing_type=None, + fields=[ + _descriptor.FieldDescriptor( + name='projects', full_name='feast.core.ListProjectsResponse.projects', index=0, + number=1, type=9, cpp_type=9, label=3, + has_default_value=False, default_value=[], + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + ], + extensions=[ + ], + nested_types=[], + enum_types=[ + ], + serialized_options=None, + is_extendable=False, + syntax='proto3', + extension_ranges=[], + oneofs=[ + ], + serialized_start=1325, + serialized_end=1365, ) _GETFEATURESETRESPONSE.fields_by_name['feature_set'].message_type = feast_dot_core_dot_FeatureSet__pb2._FEATURESET @@ -558,6 +737,12 @@ DESCRIPTOR.message_types_by_name['GetFeastCoreVersionResponse'] = _GETFEASTCOREVERSIONRESPONSE DESCRIPTOR.message_types_by_name['UpdateStoreRequest'] = _UPDATESTOREREQUEST DESCRIPTOR.message_types_by_name['UpdateStoreResponse'] = _UPDATESTORERESPONSE +DESCRIPTOR.message_types_by_name['CreateProjectRequest'] = _CREATEPROJECTREQUEST +DESCRIPTOR.message_types_by_name['CreateProjectResponse'] = _CREATEPROJECTRESPONSE +DESCRIPTOR.message_types_by_name['ArchiveProjectRequest'] = _ARCHIVEPROJECTREQUEST +DESCRIPTOR.message_types_by_name['ArchiveProjectResponse'] = _ARCHIVEPROJECTRESPONSE +DESCRIPTOR.message_types_by_name['ListProjectsRequest'] = _LISTPROJECTSREQUEST +DESCRIPTOR.message_types_by_name['ListProjectsResponse'] = _LISTPROJECTSRESPONSE _sym_db.RegisterFileDescriptor(DESCRIPTOR) GetFeatureSetRequest = _reflection.GeneratedProtocolMessageType('GetFeatureSetRequest', (_message.Message,), { @@ -660,6 +845,48 @@ }) _sym_db.RegisterMessage(UpdateStoreResponse) +CreateProjectRequest = _reflection.GeneratedProtocolMessageType('CreateProjectRequest', (_message.Message,), { + 'DESCRIPTOR' : _CREATEPROJECTREQUEST, + '__module__' : 'feast.core.CoreService_pb2' + # @@protoc_insertion_point(class_scope:feast.core.CreateProjectRequest) + }) +_sym_db.RegisterMessage(CreateProjectRequest) + +CreateProjectResponse = _reflection.GeneratedProtocolMessageType('CreateProjectResponse', (_message.Message,), { + 'DESCRIPTOR' : _CREATEPROJECTRESPONSE, + '__module__' : 'feast.core.CoreService_pb2' + # @@protoc_insertion_point(class_scope:feast.core.CreateProjectResponse) + }) +_sym_db.RegisterMessage(CreateProjectResponse) + +ArchiveProjectRequest = _reflection.GeneratedProtocolMessageType('ArchiveProjectRequest', (_message.Message,), { + 'DESCRIPTOR' : _ARCHIVEPROJECTREQUEST, + '__module__' : 'feast.core.CoreService_pb2' + # @@protoc_insertion_point(class_scope:feast.core.ArchiveProjectRequest) + }) +_sym_db.RegisterMessage(ArchiveProjectRequest) + +ArchiveProjectResponse = _reflection.GeneratedProtocolMessageType('ArchiveProjectResponse', (_message.Message,), { + 'DESCRIPTOR' : _ARCHIVEPROJECTRESPONSE, + '__module__' : 'feast.core.CoreService_pb2' + # @@protoc_insertion_point(class_scope:feast.core.ArchiveProjectResponse) + }) +_sym_db.RegisterMessage(ArchiveProjectResponse) + +ListProjectsRequest = _reflection.GeneratedProtocolMessageType('ListProjectsRequest', (_message.Message,), { + 'DESCRIPTOR' : _LISTPROJECTSREQUEST, + '__module__' : 'feast.core.CoreService_pb2' + # @@protoc_insertion_point(class_scope:feast.core.ListProjectsRequest) + }) +_sym_db.RegisterMessage(ListProjectsRequest) + +ListProjectsResponse = _reflection.GeneratedProtocolMessageType('ListProjectsResponse', (_message.Message,), { + 'DESCRIPTOR' : _LISTPROJECTSRESPONSE, + '__module__' : 'feast.core.CoreService_pb2' + # @@protoc_insertion_point(class_scope:feast.core.ListProjectsResponse) + }) +_sym_db.RegisterMessage(ListProjectsResponse) + DESCRIPTOR._options = None @@ -669,8 +896,8 @@ file=DESCRIPTOR, index=0, serialized_options=None, - serialized_start=1141, - serialized_end=1685, + serialized_start=1368, + serialized_end=2170, methods=[ _descriptor.MethodDescriptor( name='GetFeastCoreVersion', @@ -726,6 +953,33 @@ output_type=_UPDATESTORERESPONSE, serialized_options=None, ), + _descriptor.MethodDescriptor( + name='CreateProject', + full_name='feast.core.CoreService.CreateProject', + index=6, + containing_service=None, + input_type=_CREATEPROJECTREQUEST, + output_type=_CREATEPROJECTRESPONSE, + serialized_options=None, + ), + _descriptor.MethodDescriptor( + name='ArchiveProject', + full_name='feast.core.CoreService.ArchiveProject', + index=7, + containing_service=None, + input_type=_ARCHIVEPROJECTREQUEST, + output_type=_ARCHIVEPROJECTRESPONSE, + serialized_options=None, + ), + _descriptor.MethodDescriptor( + name='ListProjects', + full_name='feast.core.CoreService.ListProjects', + index=8, + containing_service=None, + input_type=_LISTPROJECTSREQUEST, + output_type=_LISTPROJECTSRESPONSE, + serialized_options=None, + ), ]) _sym_db.RegisterServiceDescriptor(_CORESERVICE) diff --git a/sdk/python/feast/core/CoreService_pb2.pyi b/sdk/python/feast/core/CoreService_pb2.pyi index 5cd6eaf6717..645226982ad 100644 --- a/sdk/python/feast/core/CoreService_pb2.pyi +++ b/sdk/python/feast/core/CoreService_pb2.pyi @@ -15,6 +15,7 @@ from google.protobuf.descriptor import ( from google.protobuf.internal.containers import ( RepeatedCompositeFieldContainer as google___protobuf___internal___containers___RepeatedCompositeFieldContainer, + RepeatedScalarFieldContainer as google___protobuf___internal___containers___RepeatedScalarFieldContainer, ) from google.protobuf.message import ( @@ -37,11 +38,13 @@ from typing_extensions import ( class GetFeatureSetRequest(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + project = ... # type: typing___Text name = ... # type: typing___Text version = ... # type: int def __init__(self, *, + project : typing___Optional[typing___Text] = None, name : typing___Optional[typing___Text] = None, version : typing___Optional[int] = None, ) -> None: ... @@ -50,9 +53,9 @@ class GetFeatureSetRequest(google___protobuf___message___Message): def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): - def ClearField(self, field_name: typing_extensions___Literal[u"name",u"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"name",u"project",u"version"]) -> None: ... else: - def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name",u"version",b"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name",u"project",b"project",u"version",b"version"]) -> None: ... class GetFeatureSetResponse(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @@ -79,11 +82,13 @@ class ListFeatureSetsRequest(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... class Filter(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + project = ... # type: typing___Text feature_set_name = ... # type: typing___Text feature_set_version = ... # type: typing___Text def __init__(self, *, + project : typing___Optional[typing___Text] = None, feature_set_name : typing___Optional[typing___Text] = None, feature_set_version : typing___Optional[typing___Text] = None, ) -> None: ... @@ -92,9 +97,9 @@ class ListFeatureSetsRequest(google___protobuf___message___Message): def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): - def ClearField(self, field_name: typing_extensions___Literal[u"feature_set_name",u"feature_set_version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"feature_set_name",u"feature_set_version",u"project"]) -> None: ... else: - def ClearField(self, field_name: typing_extensions___Literal[u"feature_set_name",b"feature_set_name",u"feature_set_version",b"feature_set_version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"feature_set_name",b"feature_set_name",u"feature_set_version",b"feature_set_version",u"project",b"project"]) -> None: ... @property @@ -341,3 +346,84 @@ class UpdateStoreResponse(google___protobuf___message___Message): else: def HasField(self, field_name: typing_extensions___Literal[u"store",b"store"]) -> bool: ... def ClearField(self, field_name: typing_extensions___Literal[u"status",b"status",u"store",b"store"]) -> None: ... + +class CreateProjectRequest(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + name = ... # type: typing___Text + + def __init__(self, + *, + name : typing___Optional[typing___Text] = None, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> CreateProjectRequest: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + if sys.version_info >= (3,): + def ClearField(self, field_name: typing_extensions___Literal[u"name"]) -> None: ... + else: + def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name"]) -> None: ... + +class CreateProjectResponse(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + + def __init__(self, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> CreateProjectResponse: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + +class ArchiveProjectRequest(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + name = ... # type: typing___Text + + def __init__(self, + *, + name : typing___Optional[typing___Text] = None, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> ArchiveProjectRequest: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + if sys.version_info >= (3,): + def ClearField(self, field_name: typing_extensions___Literal[u"name"]) -> None: ... + else: + def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name"]) -> None: ... + +class ArchiveProjectResponse(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + + def __init__(self, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> ArchiveProjectResponse: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + +class ListProjectsRequest(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + + def __init__(self, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> ListProjectsRequest: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + +class ListProjectsResponse(google___protobuf___message___Message): + DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + projects = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text] + + def __init__(self, + *, + projects : typing___Optional[typing___Iterable[typing___Text]] = None, + ) -> None: ... + @classmethod + def FromString(cls, s: bytes) -> ListProjectsResponse: ... + def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... + if sys.version_info >= (3,): + def ClearField(self, field_name: typing_extensions___Literal[u"projects"]) -> None: ... + else: + def ClearField(self, field_name: typing_extensions___Literal[u"projects",b"projects"]) -> None: ... diff --git a/sdk/python/feast/core/CoreService_pb2_grpc.py b/sdk/python/feast/core/CoreService_pb2_grpc.py index c4d28087791..0e17d0552a2 100644 --- a/sdk/python/feast/core/CoreService_pb2_grpc.py +++ b/sdk/python/feast/core/CoreService_pb2_grpc.py @@ -44,6 +44,21 @@ def __init__(self, channel): request_serializer=feast_dot_core_dot_CoreService__pb2.UpdateStoreRequest.SerializeToString, response_deserializer=feast_dot_core_dot_CoreService__pb2.UpdateStoreResponse.FromString, ) + self.CreateProject = channel.unary_unary( + '/feast.core.CoreService/CreateProject', + request_serializer=feast_dot_core_dot_CoreService__pb2.CreateProjectRequest.SerializeToString, + response_deserializer=feast_dot_core_dot_CoreService__pb2.CreateProjectResponse.FromString, + ) + self.ArchiveProject = channel.unary_unary( + '/feast.core.CoreService/ArchiveProject', + request_serializer=feast_dot_core_dot_CoreService__pb2.ArchiveProjectRequest.SerializeToString, + response_deserializer=feast_dot_core_dot_CoreService__pb2.ArchiveProjectResponse.FromString, + ) + self.ListProjects = channel.unary_unary( + '/feast.core.CoreService/ListProjects', + request_serializer=feast_dot_core_dot_CoreService__pb2.ListProjectsRequest.SerializeToString, + response_deserializer=feast_dot_core_dot_CoreService__pb2.ListProjectsResponse.FromString, + ) class CoreServiceServicer(object): @@ -108,6 +123,32 @@ def UpdateStore(self, request, context): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def CreateProject(self, request, context): + """Creates a project. Projects serve as namespaces within which resources like features will be + created. Both feature set names as well as field names must be unique within a project. Project + names themselves must be globally unique. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ArchiveProject(self, request, context): + """Archives a project. Archived projects will continue to exist and function, but won't be visible + through the Core API. Any existing ingestion or serving requests will continue to function, + but will result in warning messages being logged. It is not possible to unarchive a project + through the Core API + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + def ListProjects(self, request, context): + """Lists all projects active projects. + """ + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_CoreServiceServicer_to_server(servicer, server): rpc_method_handlers = { @@ -141,6 +182,21 @@ def add_CoreServiceServicer_to_server(servicer, server): request_deserializer=feast_dot_core_dot_CoreService__pb2.UpdateStoreRequest.FromString, response_serializer=feast_dot_core_dot_CoreService__pb2.UpdateStoreResponse.SerializeToString, ), + 'CreateProject': grpc.unary_unary_rpc_method_handler( + servicer.CreateProject, + request_deserializer=feast_dot_core_dot_CoreService__pb2.CreateProjectRequest.FromString, + response_serializer=feast_dot_core_dot_CoreService__pb2.CreateProjectResponse.SerializeToString, + ), + 'ArchiveProject': grpc.unary_unary_rpc_method_handler( + servicer.ArchiveProject, + request_deserializer=feast_dot_core_dot_CoreService__pb2.ArchiveProjectRequest.FromString, + response_serializer=feast_dot_core_dot_CoreService__pb2.ArchiveProjectResponse.SerializeToString, + ), + 'ListProjects': grpc.unary_unary_rpc_method_handler( + servicer.ListProjects, + request_deserializer=feast_dot_core_dot_CoreService__pb2.ListProjectsRequest.FromString, + response_serializer=feast_dot_core_dot_CoreService__pb2.ListProjectsResponse.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'feast.core.CoreService', rpc_method_handlers) diff --git a/sdk/python/feast/core/FeatureSet_pb2.py b/sdk/python/feast/core/FeatureSet_pb2.py index 0cb77f1a70b..991220ccae5 100644 --- a/sdk/python/feast/core/FeatureSet_pb2.py +++ b/sdk/python/feast/core/FeatureSet_pb2.py @@ -25,7 +25,7 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\017FeatureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x1b\x66\x65\x61st/core/FeatureSet.proto\x12\nfeast.core\x1a\x17\x66\x65\x61st/types/Value.proto\x1a\x17\x66\x65\x61st/core/Source.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"`\n\nFeatureSet\x12(\n\x04spec\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\x12(\n\x04meta\x18\x02 \x01(\x0b\x32\x1a.feast.core.FeatureSetMeta\"\xd4\x01\n\x0e\x46\x65\x61tureSetSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12(\n\x08\x65ntities\x18\x03 \x03(\x0b\x32\x16.feast.core.EntitySpec\x12)\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x17.feast.core.FeatureSpec\x12*\n\x07max_age\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\"\n\x06source\x18\x06 \x01(\x0b\x32\x12.feast.core.Source\"K\n\nEntitySpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"L\n\x0b\x46\x65\x61tureSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"u\n\x0e\x46\x65\x61tureSetMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x06status\x18\x02 \x01(\x0e\x32\x1c.feast.core.FeatureSetStatus*L\n\x10\x46\x65\x61tureSetStatus\x12\x12\n\x0eSTATUS_INVALID\x10\x00\x12\x12\n\x0eSTATUS_PENDING\x10\x01\x12\x10\n\x0cSTATUS_READY\x10\x02\x42N\n\nfeast.coreB\x0f\x46\x65\x61tureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x1b\x66\x65\x61st/core/FeatureSet.proto\x12\nfeast.core\x1a\x17\x66\x65\x61st/types/Value.proto\x1a\x17\x66\x65\x61st/core/Source.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x1fgoogle/protobuf/timestamp.proto\"`\n\nFeatureSet\x12(\n\x04spec\x18\x01 \x01(\x0b\x32\x1a.feast.core.FeatureSetSpec\x12(\n\x04meta\x18\x02 \x01(\x0b\x32\x1a.feast.core.FeatureSetMeta\"\xe5\x01\n\x0e\x46\x65\x61tureSetSpec\x12\x0f\n\x07project\x18\x07 \x01(\t\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12(\n\x08\x65ntities\x18\x03 \x03(\x0b\x32\x16.feast.core.EntitySpec\x12)\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x17.feast.core.FeatureSpec\x12*\n\x07max_age\x18\x05 \x01(\x0b\x32\x19.google.protobuf.Duration\x12\"\n\x06source\x18\x06 \x01(\x0b\x32\x12.feast.core.Source\"K\n\nEntitySpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"L\n\x0b\x46\x65\x61tureSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12/\n\nvalue_type\x18\x02 \x01(\x0e\x32\x1b.feast.types.ValueType.Enum\"u\n\x0e\x46\x65\x61tureSetMeta\x12\x35\n\x11\x63reated_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12,\n\x06status\x18\x02 \x01(\x0e\x32\x1c.feast.core.FeatureSetStatus*L\n\x10\x46\x65\x61tureSetStatus\x12\x12\n\x0eSTATUS_INVALID\x10\x00\x12\x12\n\x0eSTATUS_PENDING\x10\x01\x12\x10\n\x0cSTATUS_READY\x10\x02\x42N\n\nfeast.coreB\x0f\x46\x65\x61tureSetProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') , dependencies=[feast_dot_types_dot_Value__pb2.DESCRIPTOR,feast_dot_core_dot_Source__pb2.DESCRIPTOR,google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,]) @@ -50,8 +50,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=745, - serialized_end=821, + serialized_start=762, + serialized_end=838, ) _sym_db.RegisterEnumDescriptor(_FEATURESETSTATUS) @@ -108,42 +108,49 @@ containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='name', full_name='feast.core.FeatureSetSpec.name', index=0, + name='project', full_name='feast.core.FeatureSetSpec.project', index=0, + number=7, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='name', full_name='feast.core.FeatureSetSpec.name', index=1, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='version', full_name='feast.core.FeatureSetSpec.version', index=1, + name='version', full_name='feast.core.FeatureSetSpec.version', index=2, number=2, type=5, cpp_type=1, label=1, has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='entities', full_name='feast.core.FeatureSetSpec.entities', index=2, + name='entities', full_name='feast.core.FeatureSetSpec.entities', index=3, number=3, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='features', full_name='feast.core.FeatureSetSpec.features', index=3, + name='features', full_name='feast.core.FeatureSetSpec.features', index=4, number=4, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='max_age', full_name='feast.core.FeatureSetSpec.max_age', index=4, + name='max_age', full_name='feast.core.FeatureSetSpec.max_age', index=5, number=5, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='source', full_name='feast.core.FeatureSetSpec.source', index=5, + name='source', full_name='feast.core.FeatureSetSpec.source', index=6, number=6, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, @@ -162,7 +169,7 @@ oneofs=[ ], serialized_start=257, - serialized_end=469, + serialized_end=486, ) @@ -199,8 +206,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=471, - serialized_end=546, + serialized_start=488, + serialized_end=563, ) @@ -237,8 +244,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=548, - serialized_end=624, + serialized_start=565, + serialized_end=641, ) @@ -275,8 +282,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=626, - serialized_end=743, + serialized_start=643, + serialized_end=760, ) _FEATURESET.fields_by_name['spec'].message_type = _FEATURESETSPEC diff --git a/sdk/python/feast/core/FeatureSet_pb2.pyi b/sdk/python/feast/core/FeatureSet_pb2.pyi index 6fa03ed3592..c663c70c682 100644 --- a/sdk/python/feast/core/FeatureSet_pb2.pyi +++ b/sdk/python/feast/core/FeatureSet_pb2.pyi @@ -89,6 +89,7 @@ class FeatureSet(google___protobuf___message___Message): class FeatureSetSpec(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + project = ... # type: typing___Text name = ... # type: typing___Text version = ... # type: int @@ -106,6 +107,7 @@ class FeatureSetSpec(google___protobuf___message___Message): def __init__(self, *, + project : typing___Optional[typing___Text] = None, name : typing___Optional[typing___Text] = None, version : typing___Optional[int] = None, entities : typing___Optional[typing___Iterable[EntitySpec]] = None, @@ -119,10 +121,10 @@ class FeatureSetSpec(google___protobuf___message___Message): def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): def HasField(self, field_name: typing_extensions___Literal[u"max_age",u"source"]) -> bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"entities",u"features",u"max_age",u"name",u"source",u"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"entities",u"features",u"max_age",u"name",u"project",u"source",u"version"]) -> None: ... else: def HasField(self, field_name: typing_extensions___Literal[u"max_age",b"max_age",u"source",b"source"]) -> bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"entities",b"entities",u"features",b"features",u"max_age",b"max_age",u"name",b"name",u"source",b"source",u"version",b"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"entities",b"entities",u"features",b"features",u"max_age",b"max_age",u"name",b"name",u"project",b"project",u"source",b"source",u"version",b"version"]) -> None: ... class EntitySpec(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... diff --git a/sdk/python/feast/core/Store_pb2.py b/sdk/python/feast/core/Store_pb2.py index c7f9e07d871..716a597b9a3 100644 --- a/sdk/python/feast/core/Store_pb2.py +++ b/sdk/python/feast/core/Store_pb2.py @@ -20,7 +20,7 @@ package='feast.core', syntax='proto3', serialized_options=_b('\n\nfeast.coreB\nStoreProtoZ/github.com/gojek/feast/sdk/go/protos/feast/core'), - serialized_pb=_b('\n\x16\x66\x65\x61st/core/Store.proto\x12\nfeast.core\"\xb9\x04\n\x05Store\x12\x0c\n\x04name\x18\x01 \x01(\t\x12)\n\x04type\x18\x02 \x01(\x0e\x32\x1b.feast.core.Store.StoreType\x12\x35\n\rsubscriptions\x18\x04 \x03(\x0b\x32\x1e.feast.core.Store.Subscription\x12\x35\n\x0credis_config\x18\x0b \x01(\x0b\x32\x1d.feast.core.Store.RedisConfigH\x00\x12;\n\x0f\x62igquery_config\x18\x0c \x01(\x0b\x32 .feast.core.Store.BigQueryConfigH\x00\x12=\n\x10\x63\x61ssandra_config\x18\r \x01(\x0b\x32!.feast.core.Store.CassandraConfigH\x00\x1a)\n\x0bRedisConfig\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x0c\n\x04port\x18\x02 \x01(\x05\x1a\x38\n\x0e\x42igQueryConfig\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x1a-\n\x0f\x43\x61ssandraConfig\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x0c\n\x04port\x18\x02 \x01(\x05\x1a-\n\x0cSubscription\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\"@\n\tStoreType\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05REDIS\x10\x01\x12\x0c\n\x08\x42IGQUERY\x10\x02\x12\r\n\tCASSANDRA\x10\x03\x42\x08\n\x06\x63onfigBI\n\nfeast.coreB\nStoreProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') + serialized_pb=_b('\n\x16\x66\x65\x61st/core/Store.proto\x12\nfeast.core\"\xca\x04\n\x05Store\x12\x0c\n\x04name\x18\x01 \x01(\t\x12)\n\x04type\x18\x02 \x01(\x0e\x32\x1b.feast.core.Store.StoreType\x12\x35\n\rsubscriptions\x18\x04 \x03(\x0b\x32\x1e.feast.core.Store.Subscription\x12\x35\n\x0credis_config\x18\x0b \x01(\x0b\x32\x1d.feast.core.Store.RedisConfigH\x00\x12;\n\x0f\x62igquery_config\x18\x0c \x01(\x0b\x32 .feast.core.Store.BigQueryConfigH\x00\x12=\n\x10\x63\x61ssandra_config\x18\r \x01(\x0b\x32!.feast.core.Store.CassandraConfigH\x00\x1a)\n\x0bRedisConfig\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x0c\n\x04port\x18\x02 \x01(\x05\x1a\x38\n\x0e\x42igQueryConfig\x12\x12\n\nproject_id\x18\x01 \x01(\t\x12\x12\n\ndataset_id\x18\x02 \x01(\t\x1a-\n\x0f\x43\x61ssandraConfig\x12\x0c\n\x04host\x18\x01 \x01(\t\x12\x0c\n\x04port\x18\x02 \x01(\x05\x1a>\n\x0cSubscription\x12\x0f\n\x07project\x18\x03 \x01(\t\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\t\"@\n\tStoreType\x12\x0b\n\x07INVALID\x10\x00\x12\t\n\x05REDIS\x10\x01\x12\x0c\n\x08\x42IGQUERY\x10\x02\x12\r\n\tCASSANDRA\x10\x03\x42\x08\n\x06\x63onfigBI\n\nfeast.coreB\nStoreProtoZ/github.com/gojek/feast/sdk/go/protos/feast/coreb\x06proto3') ) @@ -50,8 +50,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=534, - serialized_end=598, + serialized_start=551, + serialized_end=615, ) _sym_db.RegisterEnumDescriptor(_STORE_STORETYPE) @@ -175,14 +175,21 @@ containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='name', full_name='feast.core.Store.Subscription.name', index=0, + name='project', full_name='feast.core.Store.Subscription.project', index=0, + number=3, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), + message_type=None, enum_type=None, containing_type=None, + is_extension=False, extension_scope=None, + serialized_options=None, file=DESCRIPTOR), + _descriptor.FieldDescriptor( + name='name', full_name='feast.core.Store.Subscription.name', index=1, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='version', full_name='feast.core.Store.Subscription.version', index=1, + name='version', full_name='feast.core.Store.Subscription.version', index=2, number=2, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, @@ -201,7 +208,7 @@ oneofs=[ ], serialized_start=487, - serialized_end=532, + serialized_end=549, ) _STORE = _descriptor.Descriptor( @@ -270,7 +277,7 @@ index=0, containing_type=None, fields=[]), ], serialized_start=39, - serialized_end=608, + serialized_end=625, ) _STORE_REDISCONFIG.containing_type = _STORE diff --git a/sdk/python/feast/core/Store_pb2.pyi b/sdk/python/feast/core/Store_pb2.pyi index 726a9d5443e..541bcd329bb 100644 --- a/sdk/python/feast/core/Store_pb2.pyi +++ b/sdk/python/feast/core/Store_pb2.pyi @@ -109,11 +109,13 @@ class Store(google___protobuf___message___Message): class Subscription(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + project = ... # type: typing___Text name = ... # type: typing___Text version = ... # type: typing___Text def __init__(self, *, + project : typing___Optional[typing___Text] = None, name : typing___Optional[typing___Text] = None, version : typing___Optional[typing___Text] = None, ) -> None: ... @@ -122,9 +124,9 @@ class Store(google___protobuf___message___Message): def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): - def ClearField(self, field_name: typing_extensions___Literal[u"name",u"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"name",u"project",u"version"]) -> None: ... else: - def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name",u"version",b"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"name",b"name",u"project",b"project",u"version",b"version"]) -> None: ... name = ... # type: typing___Text type = ... # type: Store.StoreType diff --git a/sdk/python/feast/exceptions.py b/sdk/python/feast/exceptions.py deleted file mode 100644 index ca5f20694c9..00000000000 --- a/sdk/python/feast/exceptions.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2019 The Feast Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -def format_grpc_exception(method: str, code: str, details: str) -> str: - """ - Consistent format for use in printing gRPC exceptions - """ - return f'{method} failed with code "{code}"\n{details}' diff --git a/sdk/python/feast/feature_set.py b/sdk/python/feast/feature_set.py index b402ef3acd5..d5576607513 100644 --- a/sdk/python/feast/feature_set.py +++ b/sdk/python/feast/feature_set.py @@ -30,7 +30,11 @@ from feast.type_map import pa_to_feast_value_type from feast.type_map import python_type_to_feast_value_type from google.protobuf import json_format +from feast.core.FeatureSet_pb2 import FeatureSetSpec as FeatureSetSpecProto +from feast.core.FeatureSet_pb2 import FeatureSetMeta as FeatureSetMetaProto +from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto from google.protobuf.duration_pb2 import Duration +from feast.type_map import python_type_to_feast_value_type from google.protobuf.json_format import MessageToJson from pandas.api.types import is_datetime64_ns_dtype from pyarrow.lib import TimestampType @@ -44,12 +48,14 @@ class FeatureSet: def __init__( self, name: str, + project: str = None, features: List[Feature] = None, entities: List[Entity] = None, source: Source = None, - max_age: Optional[Duration] = None + max_age: Optional[Duration] = None, ): self._name = name + self._project = project self._fields = OrderedDict() # type: Dict[str, Field] if features is not None: self.features = features @@ -61,7 +67,6 @@ def __init__( self._source = source self._max_age = max_age self._version = None - self._client = None self._status = None self._created_timestamp = None @@ -73,7 +78,11 @@ def __eq__(self, other): if key not in other.fields.keys() or self.fields[key] != other.fields[key]: return False - if self.name != other.name or self.max_age != other.max_age: + if ( + self.name != other.name + or self.project != other.project + or self.max_age != other.max_age + ): return False return True @@ -81,10 +90,14 @@ def __str__(self): return str(MessageToJson(self.to_proto())) def __repr__(self): - shortname = "" + self._name - if self._version: - shortname += ":" + str(self._version).strip() - return shortname + ref = "" + if self.project: + ref += self.project + "/" + if self.name: + ref += self.name + if self.version: + ref += ":" + str(self.version).strip() + return ref @property def fields(self) -> Dict[str, Field]: @@ -159,6 +172,20 @@ def name(self, name): """ self._name = name + @property + def project(self): + """ + Returns the project that this feature set belongs to + """ + return self._project + + @project.setter + def project(self, project): + """ + Sets the project that this feature set belongs to + """ + self._project = project + @property def source(self): """ @@ -410,12 +437,13 @@ def infer_fields_from_df( print(output_log) def infer_fields_from_pa( - self, table: pa.lib.Table, - entities: Optional[List[Entity]] = None, - features: Optional[List[Feature]] = None, - replace_existing_features: bool = False, - replace_existing_entities: bool = False, - discard_unused_fields: bool = False + self, + table: pa.lib.Table, + entities: Optional[List[Entity]] = None, + features: Optional[List[Feature]] = None, + replace_existing_features: bool = False, + replace_existing_entities: bool = False, + discard_unused_fields: bool = False, ) -> None: """ Adds fields (Features or Entities) to a feature set based on the schema @@ -514,22 +542,21 @@ def infer_fields_from_pa( # Only overwrite conflicting fields if replacement is allowed if column in new_fields: if ( - isinstance(self._fields[column], Feature) - and not replace_existing_features + isinstance(self._fields[column], Feature) + and not replace_existing_features ): continue if ( - isinstance(self._fields[column], Entity) - and not replace_existing_entities + isinstance(self._fields[column], Entity) + and not replace_existing_entities ): continue # Store this fields as a feature # TODO: (Minor) Change the parameter name from dtype to patype new_fields[column] = Feature( - name=column, - dtype=self._infer_pa_column_type(table.column(column)) + name=column, dtype=self._infer_pa_column_type(table.column(column)) ) output_log += f"{type(new_fields[column]).__name__} {new_fields[column].name} ({new_fields[column].dtype}) added from PyArrow Table.\n" @@ -598,6 +625,7 @@ def _update_from_feature_set(self, feature_set): """ self.name = feature_set.name + self.project = feature_set.project self.version = feature_set.version self.source = feature_set.source self.max_age = feature_set.max_age @@ -629,6 +657,9 @@ def is_valid(self): if feature set is invalid. """ + if not self.name: + raise ValueError(f"No name found in feature set.") + if len(self.entities) == 0: raise ValueError(f"No entities found in feature set {self.name}") @@ -691,7 +722,10 @@ def from_proto(cls, feature_set_proto: FeatureSetProto): None if feature_set_proto.spec.source.type == 0 else Source.from_proto(feature_set_proto.spec.source) - ) + ), + project=feature_set_proto.spec.project + if len(feature_set_proto.spec.project) == 0 + else feature_set_proto.spec.project, ) feature_set._version = feature_set_proto.spec.version feature_set._status = feature_set_proto.meta.status @@ -713,6 +747,7 @@ def to_proto(self) -> FeatureSetProto: spec = FeatureSetSpecProto( name=self.name, version=self.version, + project=self.project, max_age=self.max_age, source=self.source.to_proto() if self.source is not None else None, features=[ diff --git a/sdk/python/feast/job.py b/sdk/python/feast/job.py index 26f6181ee2d..f849f6630da 100644 --- a/sdk/python/feast/job.py +++ b/sdk/python/feast/job.py @@ -132,10 +132,7 @@ def result(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC): for record in avro_reader: yield record - def to_dataframe( - self, - timeout_sec: int = DEFAULT_TIMEOUT_SEC - ) -> pd.DataFrame: + def to_dataframe(self, timeout_sec: int = DEFAULT_TIMEOUT_SEC) -> pd.DataFrame: """ Wait until a job is done to get an iterable rows of result. This method will split the response into chunked DataFrame of a specified size to @@ -157,9 +154,7 @@ def to_dataframe( return pd.DataFrame.from_records(records) def to_chunked_dataframe( - self, - max_chunk_size: int = -1, - timeout_sec: int = DEFAULT_TIMEOUT_SEC + self, max_chunk_size: int = -1, timeout_sec: int = DEFAULT_TIMEOUT_SEC ) -> pd.DataFrame: """ Wait until a job is done to get an iterable rows of result. This method diff --git a/sdk/python/feast/loaders/abstract_producer.py b/sdk/python/feast/loaders/abstract_producer.py index 884ae49984c..d0ddabf1e56 100644 --- a/sdk/python/feast/loaders/abstract_producer.py +++ b/sdk/python/feast/loaders/abstract_producer.py @@ -22,12 +22,7 @@ class AbstractProducer: Abstract class for Kafka producers """ - def __init__( - self, - brokers: str, - row_count: int, - disable_progress_bar: bool - ): + def __init__(self, brokers: str, row_count: int, disable_progress_bar: bool): self.brokers = brokers self.row_count = row_count self.error_count = 0 @@ -35,20 +30,15 @@ def __init__( # Progress bar will always display average rate self.pbar = tqdm( - total=row_count, - unit="rows", - smoothing=0, - disable=disable_progress_bar + total=row_count, unit="rows", smoothing=0, disable=disable_progress_bar ) def produce(self, topic: str, data: str): - message = "{} should implement a produce method".format( - self.__class__.__name__) + message = "{} should implement a produce method".format(self.__class__.__name__) raise NotImplementedError(message) def flush(self, timeout: int): - message = "{} should implement a flush method".format( - self.__class__.__name__) + message = "{} should implement a flush method".format(self.__class__.__name__) raise NotImplementedError(message) def _inc_pbar(self, meta): @@ -98,13 +88,9 @@ class ConfluentProducer(AbstractProducer): Concrete implementation of Confluent Kafka producer (confluent-kafka) """ - def __init__( - self, - brokers: str, - row_count: int, - disable_progress_bar: bool - ): + def __init__(self, brokers: str, row_count: int, disable_progress_bar: bool): from confluent_kafka import Producer + self.producer = Producer({"bootstrap.servers": brokers}) super().__init__(brokers, row_count, disable_progress_bar) @@ -122,8 +108,7 @@ def produce(self, topic: str, value: bytes) -> None: """ try: - self.producer.produce( - topic, value=value, callback=self._delivery_callback) + self.producer.produce(topic, value=value, callback=self._delivery_callback) # Serve delivery callback queue. # NOTE: Since produce() is an asynchronous API this poll() call # will most likely not serve the delivery callback for the @@ -173,13 +158,9 @@ class KafkaPythonProducer(AbstractProducer): Concrete implementation of Python Kafka producer (kafka-python) """ - def __init__( - self, - brokers: str, - row_count: int, - disable_progress_bar: bool - ): + def __init__(self, brokers: str, row_count: int, disable_progress_bar: bool): from kafka import KafkaProducer + self.producer = KafkaProducer(bootstrap_servers=[brokers]) super().__init__(brokers, row_count, disable_progress_bar) @@ -199,8 +180,11 @@ def produce(self, topic: str, value: bytes): KafkaTimeoutError: if unable to fetch topic metadata, or unable to obtain memory buffer prior to configured max_block_ms """ - return self.producer.send(topic, value=value).add_callback( - self._inc_pbar).add_errback(self._set_error) + return ( + self.producer.send(topic, value=value) + .add_callback(self._inc_pbar) + .add_errback(self._set_error) + ) def flush(self, timeout: Optional[int]): """ @@ -220,7 +204,7 @@ def flush(self, timeout: Optional[int]): def get_producer( - brokers: str, row_count: int, disable_progress_bar: bool + brokers: str, row_count: int, disable_progress_bar: bool ) -> Union[ConfluentProducer, KafkaPythonProducer]: """ Simple context helper function that returns a AbstractProducer object when diff --git a/sdk/python/feast/loaders/file.py b/sdk/python/feast/loaders/file.py index 108f2790dd8..bb050c07c6d 100644 --- a/sdk/python/feast/loaders/file.py +++ b/sdk/python/feast/loaders/file.py @@ -27,7 +27,7 @@ def export_source_to_staging_location( - source: Union[pd.DataFrame, str], staging_location_uri: str + source: Union[pd.DataFrame, str], staging_location_uri: str ) -> List[str]: """ Uploads a DataFrame as an Avro file to a remote staging location. @@ -69,38 +69,33 @@ def export_source_to_staging_location( uri_path = None # Remote gs staging location provided by serving - dir_path, file_name, source_path = export_dataframe_to_local( - source, - uri_path - ) + dir_path, file_name, source_path = export_dataframe_to_local(source, uri_path) elif urlparse(source).scheme in ["", "file"]: # Local file provided as a source dir_path = None file_name = os.path.basename(source) - source_path = os.path.abspath(os.path.join( - urlparse(source).netloc, urlparse(source).path)) + source_path = os.path.abspath( + os.path.join(urlparse(source).netloc, urlparse(source).path) + ) elif urlparse(source).scheme == "gs": # Google Cloud Storage path provided input_source_uri = urlparse(source) if "*" in source: # Wildcard path - return _get_files( - bucket=input_source_uri.hostname, - uri=input_source_uri - ) + return _get_files(bucket=input_source_uri.hostname, uri=input_source_uri) else: return [source] else: - raise Exception(f"Only string and DataFrame types are allowed as a " - f"source, {type(source)} was provided.") + raise Exception( + f"Only string and DataFrame types are allowed as a " + f"source, {type(source)} was provided." + ) # Push data to required staging location if uri.scheme == "gs": # Staging location is a Google Cloud Storage path upload_file_to_gcs( - source_path, - uri.hostname, - str(uri.path).strip("/") + "/" + file_name + source_path, uri.hostname, str(uri.path).strip("/") + "/" + file_name ) elif uri.scheme == "file": # Staging location is a file path @@ -120,8 +115,7 @@ def export_source_to_staging_location( def export_dataframe_to_local( - df: pd.DataFrame, - dir_path: Optional[str] = None + df: pd.DataFrame, dir_path: Optional[str] = None ) -> Tuple[str, str, str]: """ Exports a pandas DataFrame to the local filesystem. @@ -149,11 +143,7 @@ def export_dataframe_to_local( # Temporarily rename datetime column to event_timestamp. Ideally we would # force the schema with our avro writer instead. - df.columns = [ - "event_timestamp" - if col == "datetime" else col - for col in df.columns - ] + df.columns = ["event_timestamp" if col == "datetime" else col for col in df.columns] try: # Export dataset to file in local path @@ -163,9 +153,7 @@ def export_dataframe_to_local( finally: # Revert event_timestamp column to datetime df.columns = [ - "datetime" - if col == "event_timestamp" else col - for col in df.columns + "datetime" if col == "event_timestamp" else col for col in df.columns ] return dir_path, file_name, dest_path @@ -223,13 +211,14 @@ def _get_files(bucket: str, uri: ParseResult) -> List[str]: if "*" in path: regex = re.compile(path.replace("*", ".*?").strip("/")) blob_list = bucket.list_blobs( - prefix=path.strip("/").split("*")[0], - delimiter="/" + prefix=path.strip("/").split("*")[0], delimiter="/" ) # File path should not be in path (file path must be longer than path) - return [f"{uri.scheme}://{uri.hostname}/{file}" - for file in [x.name for x in blob_list] - if re.match(regex, file) and file not in path] + return [ + f"{uri.scheme}://{uri.hostname}/{file}" + for file in [x.name for x in blob_list] + if re.match(regex, file) and file not in path + ] else: raise Exception(f"{path} is not a wildcard path") diff --git a/sdk/python/feast/loaders/ingest.py b/sdk/python/feast/loaders/ingest.py index cbe80086e67..95b699d0005 100644 --- a/sdk/python/feast/loaders/ingest.py +++ b/sdk/python/feast/loaders/ingest.py @@ -7,8 +7,10 @@ import pyarrow.parquet as pq from feast.constants import DATETIME_COLUMN from feast.feature_set import FeatureSet -from feast.type_map import pa_column_to_timestamp_proto_column, \ - pa_column_to_proto_column +from feast.type_map import ( + pa_column_to_timestamp_proto_column, + pa_column_to_proto_column, +) from feast.types import Field_pb2 as FieldProto from feast.types.FeatureRow_pb2 import FeatureRow @@ -22,11 +24,7 @@ KAFKA_CHUNK_PRODUCTION_TIMEOUT = 120 # type: int -def _encode_pa_tables( - file: str, - fs: FeatureSet, - row_group_idx: int, -) -> List[bytes]: +def _encode_pa_tables(file: str, fs: FeatureSet, row_group_idx: int) -> List[bytes]: """ Helper function to encode a PyArrow table(s) read from parquet file(s) into FeatureRows. @@ -58,17 +56,15 @@ def _encode_pa_tables( table = pq_file.read_row_group(row_group_idx) # Add datetime column - datetime_col = pa_column_to_timestamp_proto_column( - table.column(DATETIME_COLUMN)) + datetime_col = pa_column_to_timestamp_proto_column(table.column(DATETIME_COLUMN)) # Preprocess the columns by converting all its values to Proto values proto_columns = { - field_name: pa_column_to_proto_column(field.dtype, - table.column(field_name)) + field_name: pa_column_to_proto_column(field.dtype, table.column(field_name)) for field_name, field in fs.fields.items() } - feature_set = f"{fs.name}:{fs.version}" + feature_set = f"{fs.project}/{fs.name}:{fs.version}" # List to store result feature_rows = [] @@ -80,8 +76,9 @@ def _encode_pa_tables( # Iterate through the rows for row_idx in range(table.num_rows): - feature_row = FeatureRow(event_timestamp=datetime_col[row_idx], - feature_set=feature_set) + feature_row = FeatureRow( + event_timestamp=datetime_col[row_idx], feature_set=feature_set + ) # Loop optimization declaration ext = feature_row.fields.extend @@ -96,10 +93,7 @@ def _encode_pa_tables( def get_feature_row_chunks( - file: str, - row_groups: List[int], - fs: FeatureSet, - max_workers: int + file: str, row_groups: List[int], fs: FeatureSet, max_workers: int ) -> Iterable[List[bytes]]: """ Iterator function to encode a PyArrow table read from a parquet file to diff --git a/sdk/python/feast/serving/ServingService_pb2.py b/sdk/python/feast/serving/ServingService_pb2.py index e7258f5a7d5..9d0d55f2ab4 100644 --- a/sdk/python/feast/serving/ServingService_pb2.py +++ b/sdk/python/feast/serving/ServingService_pb2.py @@ -24,7 +24,7 @@ package='feast.serving', syntax='proto3', serialized_options=_b('\n\rfeast.servingB\017ServingAPIProtoZ2github.com/gojek/feast/sdk/go/protos/feast/serving'), - serialized_pb=_b('\n\"feast/serving/ServingService.proto\x12\rfeast.serving\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x17\x66\x65\x61st/types/Value.proto\"\x1c\n\x1aGetFeastServingInfoRequest\"{\n\x1bGetFeastServingInfoResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\x12-\n\x04type\x18\x02 \x01(\x0e\x32\x1f.feast.serving.FeastServingType\x12\x1c\n\x14job_staging_location\x18\n \x01(\t\"u\n\x11\x46\x65\x61tureSetRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0f\n\x07version\x18\x02 \x01(\x05\x12\x15\n\rfeature_names\x18\x03 \x03(\t\x12*\n\x07max_age\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\"\x93\x03\n\x18GetOnlineFeaturesRequest\x12\x36\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32 .feast.serving.FeatureSetRequest\x12\x46\n\x0b\x65ntity_rows\x18\x02 \x03(\x0b\x32\x31.feast.serving.GetOnlineFeaturesRequest.EntityRow\x12!\n\x19omit_entities_in_response\x18\x03 \x01(\x08\x1a\xd3\x01\n\tEntityRow\x12\x34\n\x10\x65ntity_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12M\n\x06\x66ields\x18\x02 \x03(\x0b\x32=.feast.serving.GetOnlineFeaturesRequest.EntityRow.FieldsEntry\x1a\x41\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"\x87\x01\n\x17GetBatchFeaturesRequest\x12\x36\n\x0c\x66\x65\x61ture_sets\x18\x01 \x03(\x0b\x32 .feast.serving.FeatureSetRequest\x12\x34\n\x0e\x64\x61taset_source\x18\x02 \x01(\x0b\x32\x1c.feast.serving.DatasetSource\"\x8c\x02\n\x19GetOnlineFeaturesResponse\x12J\n\x0c\x66ield_values\x18\x01 \x03(\x0b\x32\x34.feast.serving.GetOnlineFeaturesResponse.FieldValues\x1a\xa2\x01\n\x0b\x46ieldValues\x12P\n\x06\x66ields\x18\x01 \x03(\x0b\x32@.feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry\x1a\x41\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\";\n\x18GetBatchFeaturesResponse\x12\x1f\n\x03job\x18\x01 \x01(\x0b\x32\x12.feast.serving.Job\"0\n\rGetJobRequest\x12\x1f\n\x03job\x18\x01 \x01(\x0b\x32\x12.feast.serving.Job\"1\n\x0eGetJobResponse\x12\x1f\n\x03job\x18\x01 \x01(\x0b\x32\x12.feast.serving.Job\"\xb3\x01\n\x03Job\x12\n\n\x02id\x18\x01 \x01(\t\x12$\n\x04type\x18\x02 \x01(\x0e\x32\x16.feast.serving.JobType\x12(\n\x06status\x18\x03 \x01(\x0e\x32\x18.feast.serving.JobStatus\x12\r\n\x05\x65rror\x18\x04 \x01(\t\x12\x11\n\tfile_uris\x18\x05 \x03(\t\x12.\n\x0b\x64\x61ta_format\x18\x06 \x01(\x0e\x32\x19.feast.serving.DataFormat\"\xb2\x01\n\rDatasetSource\x12>\n\x0b\x66ile_source\x18\x01 \x01(\x0b\x32\'.feast.serving.DatasetSource.FileSourceH\x00\x1aO\n\nFileSource\x12\x11\n\tfile_uris\x18\x01 \x03(\t\x12.\n\x0b\x64\x61ta_format\x18\x02 \x01(\x0e\x32\x19.feast.serving.DataFormatB\x10\n\x0e\x64\x61taset_source*o\n\x10\x46\x65\x61stServingType\x12\x1e\n\x1a\x46\x45\x41ST_SERVING_TYPE_INVALID\x10\x00\x12\x1d\n\x19\x46\x45\x41ST_SERVING_TYPE_ONLINE\x10\x01\x12\x1c\n\x18\x46\x45\x41ST_SERVING_TYPE_BATCH\x10\x02*6\n\x07JobType\x12\x14\n\x10JOB_TYPE_INVALID\x10\x00\x12\x15\n\x11JOB_TYPE_DOWNLOAD\x10\x01*h\n\tJobStatus\x12\x16\n\x12JOB_STATUS_INVALID\x10\x00\x12\x16\n\x12JOB_STATUS_PENDING\x10\x01\x12\x16\n\x12JOB_STATUS_RUNNING\x10\x02\x12\x13\n\x0fJOB_STATUS_DONE\x10\x03*;\n\nDataFormat\x12\x17\n\x13\x44\x41TA_FORMAT_INVALID\x10\x00\x12\x14\n\x10\x44\x41TA_FORMAT_AVRO\x10\x01\x32\x92\x03\n\x0eServingService\x12l\n\x13GetFeastServingInfo\x12).feast.serving.GetFeastServingInfoRequest\x1a*.feast.serving.GetFeastServingInfoResponse\x12\x66\n\x11GetOnlineFeatures\x12\'.feast.serving.GetOnlineFeaturesRequest\x1a(.feast.serving.GetOnlineFeaturesResponse\x12\x63\n\x10GetBatchFeatures\x12&.feast.serving.GetBatchFeaturesRequest\x1a\'.feast.serving.GetBatchFeaturesResponse\x12\x45\n\x06GetJob\x12\x1c.feast.serving.GetJobRequest\x1a\x1d.feast.serving.GetJobResponseBT\n\rfeast.servingB\x0fServingAPIProtoZ2github.com/gojek/feast/sdk/go/protos/feast/servingb\x06proto3') + serialized_pb=_b('\n\"feast/serving/ServingService.proto\x12\rfeast.serving\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/duration.proto\x1a\x17\x66\x65\x61st/types/Value.proto\"\x1c\n\x1aGetFeastServingInfoRequest\"{\n\x1bGetFeastServingInfoResponse\x12\x0f\n\x07version\x18\x01 \x01(\t\x12-\n\x04type\x18\x02 \x01(\x0e\x32\x1f.feast.serving.FeastServingType\x12\x1c\n\x14job_staging_location\x18\n \x01(\t\"n\n\x10\x46\x65\x61tureReference\x12\x0f\n\x07project\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0f\n\x07version\x18\x03 \x01(\x05\x12*\n\x07max_age\x18\x04 \x01(\x0b\x32\x19.google.protobuf.Duration\"\x8e\x03\n\x18GetOnlineFeaturesRequest\x12\x31\n\x08\x66\x65\x61tures\x18\x04 \x03(\x0b\x32\x1f.feast.serving.FeatureReference\x12\x46\n\x0b\x65ntity_rows\x18\x02 \x03(\x0b\x32\x31.feast.serving.GetOnlineFeaturesRequest.EntityRow\x12!\n\x19omit_entities_in_response\x18\x03 \x01(\x08\x1a\xd3\x01\n\tEntityRow\x12\x34\n\x10\x65ntity_timestamp\x18\x01 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12M\n\x06\x66ields\x18\x02 \x03(\x0b\x32=.feast.serving.GetOnlineFeaturesRequest.EntityRow.FieldsEntry\x1a\x41\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\"\x82\x01\n\x17GetBatchFeaturesRequest\x12\x31\n\x08\x66\x65\x61tures\x18\x03 \x03(\x0b\x32\x1f.feast.serving.FeatureReference\x12\x34\n\x0e\x64\x61taset_source\x18\x02 \x01(\x0b\x32\x1c.feast.serving.DatasetSource\"\x8c\x02\n\x19GetOnlineFeaturesResponse\x12J\n\x0c\x66ield_values\x18\x01 \x03(\x0b\x32\x34.feast.serving.GetOnlineFeaturesResponse.FieldValues\x1a\xa2\x01\n\x0b\x46ieldValues\x12P\n\x06\x66ields\x18\x01 \x03(\x0b\x32@.feast.serving.GetOnlineFeaturesResponse.FieldValues.FieldsEntry\x1a\x41\n\x0b\x46ieldsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12!\n\x05value\x18\x02 \x01(\x0b\x32\x12.feast.types.Value:\x02\x38\x01\";\n\x18GetBatchFeaturesResponse\x12\x1f\n\x03job\x18\x01 \x01(\x0b\x32\x12.feast.serving.Job\"0\n\rGetJobRequest\x12\x1f\n\x03job\x18\x01 \x01(\x0b\x32\x12.feast.serving.Job\"1\n\x0eGetJobResponse\x12\x1f\n\x03job\x18\x01 \x01(\x0b\x32\x12.feast.serving.Job\"\xb3\x01\n\x03Job\x12\n\n\x02id\x18\x01 \x01(\t\x12$\n\x04type\x18\x02 \x01(\x0e\x32\x16.feast.serving.JobType\x12(\n\x06status\x18\x03 \x01(\x0e\x32\x18.feast.serving.JobStatus\x12\r\n\x05\x65rror\x18\x04 \x01(\t\x12\x11\n\tfile_uris\x18\x05 \x03(\t\x12.\n\x0b\x64\x61ta_format\x18\x06 \x01(\x0e\x32\x19.feast.serving.DataFormat\"\xb2\x01\n\rDatasetSource\x12>\n\x0b\x66ile_source\x18\x01 \x01(\x0b\x32\'.feast.serving.DatasetSource.FileSourceH\x00\x1aO\n\nFileSource\x12\x11\n\tfile_uris\x18\x01 \x03(\t\x12.\n\x0b\x64\x61ta_format\x18\x02 \x01(\x0e\x32\x19.feast.serving.DataFormatB\x10\n\x0e\x64\x61taset_source*o\n\x10\x46\x65\x61stServingType\x12\x1e\n\x1a\x46\x45\x41ST_SERVING_TYPE_INVALID\x10\x00\x12\x1d\n\x19\x46\x45\x41ST_SERVING_TYPE_ONLINE\x10\x01\x12\x1c\n\x18\x46\x45\x41ST_SERVING_TYPE_BATCH\x10\x02*6\n\x07JobType\x12\x14\n\x10JOB_TYPE_INVALID\x10\x00\x12\x15\n\x11JOB_TYPE_DOWNLOAD\x10\x01*h\n\tJobStatus\x12\x16\n\x12JOB_STATUS_INVALID\x10\x00\x12\x16\n\x12JOB_STATUS_PENDING\x10\x01\x12\x16\n\x12JOB_STATUS_RUNNING\x10\x02\x12\x13\n\x0fJOB_STATUS_DONE\x10\x03*;\n\nDataFormat\x12\x17\n\x13\x44\x41TA_FORMAT_INVALID\x10\x00\x12\x14\n\x10\x44\x41TA_FORMAT_AVRO\x10\x01\x32\x92\x03\n\x0eServingService\x12l\n\x13GetFeastServingInfo\x12).feast.serving.GetFeastServingInfoRequest\x1a*.feast.serving.GetFeastServingInfoResponse\x12\x66\n\x11GetOnlineFeatures\x12\'.feast.serving.GetOnlineFeaturesRequest\x1a(.feast.serving.GetOnlineFeaturesResponse\x12\x63\n\x10GetBatchFeatures\x12&.feast.serving.GetBatchFeaturesRequest\x1a\'.feast.serving.GetBatchFeaturesResponse\x12\x45\n\x06GetJob\x12\x1c.feast.serving.GetJobRequest\x1a\x1d.feast.serving.GetJobResponseBT\n\rfeast.servingB\x0fServingAPIProtoZ2github.com/gojek/feast/sdk/go/protos/feast/servingb\x06proto3') , dependencies=[google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,google_dot_protobuf_dot_duration__pb2.DESCRIPTOR,feast_dot_types_dot_Value__pb2.DESCRIPTOR,]) @@ -49,8 +49,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1757, - serialized_end=1868, + serialized_start=1740, + serialized_end=1851, ) _sym_db.RegisterEnumDescriptor(_FEASTSERVINGTYPE) @@ -72,8 +72,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1870, - serialized_end=1924, + serialized_start=1853, + serialized_end=1907, ) _sym_db.RegisterEnumDescriptor(_JOBTYPE) @@ -103,8 +103,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=1926, - serialized_end=2030, + serialized_start=1909, + serialized_end=2013, ) _sym_db.RegisterEnumDescriptor(_JOBSTATUS) @@ -126,8 +126,8 @@ ], containing_type=None, serialized_options=None, - serialized_start=2032, - serialized_end=2091, + serialized_start=2015, + serialized_end=2074, ) _sym_db.RegisterEnumDescriptor(_DATAFORMAT) @@ -215,36 +215,36 @@ ) -_FEATURESETREQUEST = _descriptor.Descriptor( - name='FeatureSetRequest', - full_name='feast.serving.FeatureSetRequest', +_FEATUREREFERENCE = _descriptor.Descriptor( + name='FeatureReference', + full_name='feast.serving.FeatureReference', filename=None, file=DESCRIPTOR, containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='name', full_name='feast.serving.FeatureSetRequest.name', index=0, + name='project', full_name='feast.serving.FeatureReference.project', index=0, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='version', full_name='feast.serving.FeatureSetRequest.version', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, + name='name', full_name='feast.serving.FeatureReference.name', index=1, + number=2, type=9, cpp_type=9, label=1, + has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='feature_names', full_name='feast.serving.FeatureSetRequest.feature_names', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], + name='version', full_name='feast.serving.FeatureReference.version', index=2, + number=3, type=5, cpp_type=1, label=1, + has_default_value=False, default_value=0, message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, serialized_options=None, file=DESCRIPTOR), _descriptor.FieldDescriptor( - name='max_age', full_name='feast.serving.FeatureSetRequest.max_age', index=3, + name='max_age', full_name='feast.serving.FeatureReference.max_age', index=3, number=4, type=11, cpp_type=10, label=1, has_default_value=False, default_value=None, message_type=None, enum_type=None, containing_type=None, @@ -263,7 +263,7 @@ oneofs=[ ], serialized_start=298, - serialized_end=415, + serialized_end=408, ) @@ -300,8 +300,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=756, - serialized_end=821, + serialized_start=744, + serialized_end=809, ) _GETONLINEFEATURESREQUEST_ENTITYROW = _descriptor.Descriptor( @@ -337,8 +337,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=610, - serialized_end=821, + serialized_start=598, + serialized_end=809, ) _GETONLINEFEATURESREQUEST = _descriptor.Descriptor( @@ -349,8 +349,8 @@ containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='feature_sets', full_name='feast.serving.GetOnlineFeaturesRequest.feature_sets', index=0, - number=1, type=11, cpp_type=10, label=3, + name='features', full_name='feast.serving.GetOnlineFeaturesRequest.features', index=0, + number=4, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, @@ -381,8 +381,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=418, - serialized_end=821, + serialized_start=411, + serialized_end=809, ) @@ -394,8 +394,8 @@ containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='feature_sets', full_name='feast.serving.GetBatchFeaturesRequest.feature_sets', index=0, - number=1, type=11, cpp_type=10, label=3, + name='features', full_name='feast.serving.GetBatchFeaturesRequest.features', index=0, + number=3, type=11, cpp_type=10, label=3, has_default_value=False, default_value=[], message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, @@ -419,8 +419,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=824, - serialized_end=959, + serialized_start=812, + serialized_end=942, ) @@ -457,8 +457,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=756, - serialized_end=821, + serialized_start=744, + serialized_end=809, ) _GETONLINEFEATURESRESPONSE_FIELDVALUES = _descriptor.Descriptor( @@ -487,8 +487,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1068, - serialized_end=1230, + serialized_start=1051, + serialized_end=1213, ) _GETONLINEFEATURESRESPONSE = _descriptor.Descriptor( @@ -517,8 +517,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=962, - serialized_end=1230, + serialized_start=945, + serialized_end=1213, ) @@ -548,8 +548,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1232, - serialized_end=1291, + serialized_start=1215, + serialized_end=1274, ) @@ -579,8 +579,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1293, - serialized_end=1341, + serialized_start=1276, + serialized_end=1324, ) @@ -610,8 +610,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1343, - serialized_end=1392, + serialized_start=1326, + serialized_end=1375, ) @@ -676,8 +676,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1395, - serialized_end=1574, + serialized_start=1378, + serialized_end=1557, ) @@ -714,8 +714,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=1658, - serialized_end=1737, + serialized_start=1641, + serialized_end=1720, ) _DATASETSOURCE = _descriptor.Descriptor( @@ -747,20 +747,20 @@ name='dataset_source', full_name='feast.serving.DatasetSource.dataset_source', index=0, containing_type=None, fields=[]), ], - serialized_start=1577, - serialized_end=1755, + serialized_start=1560, + serialized_end=1738, ) _GETFEASTSERVINGINFORESPONSE.fields_by_name['type'].enum_type = _FEASTSERVINGTYPE -_FEATURESETREQUEST.fields_by_name['max_age'].message_type = google_dot_protobuf_dot_duration__pb2._DURATION +_FEATUREREFERENCE.fields_by_name['max_age'].message_type = google_dot_protobuf_dot_duration__pb2._DURATION _GETONLINEFEATURESREQUEST_ENTITYROW_FIELDSENTRY.fields_by_name['value'].message_type = feast_dot_types_dot_Value__pb2._VALUE _GETONLINEFEATURESREQUEST_ENTITYROW_FIELDSENTRY.containing_type = _GETONLINEFEATURESREQUEST_ENTITYROW _GETONLINEFEATURESREQUEST_ENTITYROW.fields_by_name['entity_timestamp'].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP _GETONLINEFEATURESREQUEST_ENTITYROW.fields_by_name['fields'].message_type = _GETONLINEFEATURESREQUEST_ENTITYROW_FIELDSENTRY _GETONLINEFEATURESREQUEST_ENTITYROW.containing_type = _GETONLINEFEATURESREQUEST -_GETONLINEFEATURESREQUEST.fields_by_name['feature_sets'].message_type = _FEATURESETREQUEST +_GETONLINEFEATURESREQUEST.fields_by_name['features'].message_type = _FEATUREREFERENCE _GETONLINEFEATURESREQUEST.fields_by_name['entity_rows'].message_type = _GETONLINEFEATURESREQUEST_ENTITYROW -_GETBATCHFEATURESREQUEST.fields_by_name['feature_sets'].message_type = _FEATURESETREQUEST +_GETBATCHFEATURESREQUEST.fields_by_name['features'].message_type = _FEATUREREFERENCE _GETBATCHFEATURESREQUEST.fields_by_name['dataset_source'].message_type = _DATASETSOURCE _GETONLINEFEATURESRESPONSE_FIELDVALUES_FIELDSENTRY.fields_by_name['value'].message_type = feast_dot_types_dot_Value__pb2._VALUE _GETONLINEFEATURESRESPONSE_FIELDVALUES_FIELDSENTRY.containing_type = _GETONLINEFEATURESRESPONSE_FIELDVALUES @@ -781,7 +781,7 @@ _DATASETSOURCE.fields_by_name['file_source'].containing_oneof = _DATASETSOURCE.oneofs_by_name['dataset_source'] DESCRIPTOR.message_types_by_name['GetFeastServingInfoRequest'] = _GETFEASTSERVINGINFOREQUEST DESCRIPTOR.message_types_by_name['GetFeastServingInfoResponse'] = _GETFEASTSERVINGINFORESPONSE -DESCRIPTOR.message_types_by_name['FeatureSetRequest'] = _FEATURESETREQUEST +DESCRIPTOR.message_types_by_name['FeatureReference'] = _FEATUREREFERENCE DESCRIPTOR.message_types_by_name['GetOnlineFeaturesRequest'] = _GETONLINEFEATURESREQUEST DESCRIPTOR.message_types_by_name['GetBatchFeaturesRequest'] = _GETBATCHFEATURESREQUEST DESCRIPTOR.message_types_by_name['GetOnlineFeaturesResponse'] = _GETONLINEFEATURESRESPONSE @@ -810,12 +810,12 @@ }) _sym_db.RegisterMessage(GetFeastServingInfoResponse) -FeatureSetRequest = _reflection.GeneratedProtocolMessageType('FeatureSetRequest', (_message.Message,), { - 'DESCRIPTOR' : _FEATURESETREQUEST, +FeatureReference = _reflection.GeneratedProtocolMessageType('FeatureReference', (_message.Message,), { + 'DESCRIPTOR' : _FEATUREREFERENCE, '__module__' : 'feast.serving.ServingService_pb2' - # @@protoc_insertion_point(class_scope:feast.serving.FeatureSetRequest) + # @@protoc_insertion_point(class_scope:feast.serving.FeatureReference) }) -_sym_db.RegisterMessage(FeatureSetRequest) +_sym_db.RegisterMessage(FeatureReference) GetOnlineFeaturesRequest = _reflection.GeneratedProtocolMessageType('GetOnlineFeaturesRequest', (_message.Message,), { @@ -924,8 +924,8 @@ file=DESCRIPTOR, index=0, serialized_options=None, - serialized_start=2094, - serialized_end=2496, + serialized_start=2077, + serialized_end=2479, methods=[ _descriptor.MethodDescriptor( name='GetFeastServingInfo', diff --git a/sdk/python/feast/serving/ServingService_pb2.pyi b/sdk/python/feast/serving/ServingService_pb2.pyi index d03fa6568fa..e10245d6c7a 100644 --- a/sdk/python/feast/serving/ServingService_pb2.pyi +++ b/sdk/python/feast/serving/ServingService_pb2.pyi @@ -147,32 +147,32 @@ class GetFeastServingInfoResponse(google___protobuf___message___Message): else: def ClearField(self, field_name: typing_extensions___Literal[u"job_staging_location",b"job_staging_location",u"type",b"type",u"version",b"version"]) -> None: ... -class FeatureSetRequest(google___protobuf___message___Message): +class FeatureReference(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... + project = ... # type: typing___Text name = ... # type: typing___Text version = ... # type: int - feature_names = ... # type: google___protobuf___internal___containers___RepeatedScalarFieldContainer[typing___Text] @property def max_age(self) -> google___protobuf___duration_pb2___Duration: ... def __init__(self, *, + project : typing___Optional[typing___Text] = None, name : typing___Optional[typing___Text] = None, version : typing___Optional[int] = None, - feature_names : typing___Optional[typing___Iterable[typing___Text]] = None, max_age : typing___Optional[google___protobuf___duration_pb2___Duration] = None, ) -> None: ... @classmethod - def FromString(cls, s: bytes) -> FeatureSetRequest: ... + def FromString(cls, s: bytes) -> FeatureReference: ... def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): def HasField(self, field_name: typing_extensions___Literal[u"max_age"]) -> bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"feature_names",u"max_age",u"name",u"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"max_age",u"name",u"project",u"version"]) -> None: ... else: def HasField(self, field_name: typing_extensions___Literal[u"max_age",b"max_age"]) -> bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"feature_names",b"feature_names",u"max_age",b"max_age",u"name",b"name",u"version",b"version"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"max_age",b"max_age",u"name",b"name",u"project",b"project",u"version",b"version"]) -> None: ... class GetOnlineFeaturesRequest(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @@ -227,14 +227,14 @@ class GetOnlineFeaturesRequest(google___protobuf___message___Message): omit_entities_in_response = ... # type: bool @property - def feature_sets(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[FeatureSetRequest]: ... + def features(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[FeatureReference]: ... @property def entity_rows(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[GetOnlineFeaturesRequest.EntityRow]: ... def __init__(self, *, - feature_sets : typing___Optional[typing___Iterable[FeatureSetRequest]] = None, + features : typing___Optional[typing___Iterable[FeatureReference]] = None, entity_rows : typing___Optional[typing___Iterable[GetOnlineFeaturesRequest.EntityRow]] = None, omit_entities_in_response : typing___Optional[bool] = None, ) -> None: ... @@ -243,22 +243,22 @@ class GetOnlineFeaturesRequest(google___protobuf___message___Message): def MergeFrom(self, other_msg: google___protobuf___message___Message) -> None: ... def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): - def ClearField(self, field_name: typing_extensions___Literal[u"entity_rows",u"feature_sets",u"omit_entities_in_response"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"entity_rows",u"features",u"omit_entities_in_response"]) -> None: ... else: - def ClearField(self, field_name: typing_extensions___Literal[u"entity_rows",b"entity_rows",u"feature_sets",b"feature_sets",u"omit_entities_in_response",b"omit_entities_in_response"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"entity_rows",b"entity_rows",u"features",b"features",u"omit_entities_in_response",b"omit_entities_in_response"]) -> None: ... class GetBatchFeaturesRequest(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... @property - def feature_sets(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[FeatureSetRequest]: ... + def features(self) -> google___protobuf___internal___containers___RepeatedCompositeFieldContainer[FeatureReference]: ... @property def dataset_source(self) -> DatasetSource: ... def __init__(self, *, - feature_sets : typing___Optional[typing___Iterable[FeatureSetRequest]] = None, + features : typing___Optional[typing___Iterable[FeatureReference]] = None, dataset_source : typing___Optional[DatasetSource] = None, ) -> None: ... @classmethod @@ -267,10 +267,10 @@ class GetBatchFeaturesRequest(google___protobuf___message___Message): def CopyFrom(self, other_msg: google___protobuf___message___Message) -> None: ... if sys.version_info >= (3,): def HasField(self, field_name: typing_extensions___Literal[u"dataset_source"]) -> bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"dataset_source",u"feature_sets"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"dataset_source",u"features"]) -> None: ... else: def HasField(self, field_name: typing_extensions___Literal[u"dataset_source",b"dataset_source"]) -> bool: ... - def ClearField(self, field_name: typing_extensions___Literal[u"dataset_source",b"dataset_source",u"feature_sets",b"feature_sets"]) -> None: ... + def ClearField(self, field_name: typing_extensions___Literal[u"dataset_source",b"dataset_source",u"features",b"features"]) -> None: ... class GetOnlineFeaturesResponse(google___protobuf___message___Message): DESCRIPTOR: google___protobuf___descriptor___Descriptor = ... diff --git a/sdk/python/feast/type_map.py b/sdk/python/feast/type_map.py index ca13c2573bc..af019c3fdbd 100644 --- a/sdk/python/feast/type_map.py +++ b/sdk/python/feast/type_map.py @@ -19,10 +19,7 @@ import pandas as pd import pyarrow as pa from feast.constants import DATETIME_COLUMN -from feast.types import ( - FeatureRow_pb2 as FeatureRowProto, - Field_pb2 as FieldProto, -) +from feast.types import FeatureRow_pb2 as FeatureRowProto, Field_pb2 as FieldProto from feast.types.Value_pb2 import ( Value as ProtoValue, ValueType as ProtoValueType, @@ -168,7 +165,7 @@ def convert_series_to_proto_values(row: pd.Series): def convert_dict_to_proto_values( - row: dict, df_datetime_dtype: pd.DataFrame.dtypes, feature_set + row: dict, df_datetime_dtype: pd.DataFrame.dtypes, feature_set ) -> FeatureRowProto.FeatureRow: """ Encode a dictionary describing a feature row into a FeatureRows object. @@ -186,7 +183,11 @@ def convert_dict_to_proto_values( event_timestamp=_pd_datetime_to_timestamp_proto( df_datetime_dtype, row[DATETIME_COLUMN] ), - feature_set=feature_set.name + ":" + str(feature_set.version), + feature_set=feature_set.project + + "/" + + feature_set.name + + ":" + + str(feature_set.version), ) for field_name, field in feature_set.fields.items(): @@ -219,14 +220,12 @@ def _pd_datetime_to_timestamp_proto(dtype, value) -> Timestamp: # If timestamp does not contain timezone, we assume it is of local # timezone and adjust it to UTC local_timezone = datetime.now(timezone.utc).astimezone().tzinfo - value = value.tz_localize(local_timezone).tz_convert("UTC").tz_localize( - None) + value = value.tz_localize(local_timezone).tz_convert("UTC").tz_localize(None) return Timestamp(seconds=int(value.timestamp())) if dtype.__str__() == "datetime64[ns, UTC]": return Timestamp(seconds=int(value.timestamp())) else: - return Timestamp( - seconds=np.datetime64(value).astype("int64") // 1000000) + return Timestamp(seconds=np.datetime64(value).astype("int64") // 1000000) def _type_err(item, dtype): @@ -355,6 +354,7 @@ def _python_value_to_proto_value(feast_value_type, value) -> ProtoValue: raise Exception(f"Unsupported data type: ${str(type(value))}") + def pa_to_feast_value_attr(pa_type: object): """ Returns the equivalent Feast ValueType string for the given pa.lib type. @@ -424,9 +424,7 @@ def pa_to_value_type(pa_type: object): return type_map[pa_type.__str__()] -def pa_to_feast_value_type( - value: object -) -> ValueType: +def pa_to_feast_value_type(value: object) -> ValueType: type_map = { "timestamp[ms]": ValueType.INT64, "int32": ValueType.INT32, @@ -447,46 +445,45 @@ def pa_to_feast_value_type( return type_map[value.type.__str__()] -def pa_column_to_timestamp_proto_column( - column: pa.lib.ChunkedArray -) -> Timestamp: +def pa_column_to_timestamp_proto_column(column: pa.lib.ChunkedArray) -> Timestamp: if not isinstance(column.type, TimestampType): raise Exception("Only TimestampType columns are allowed") proto_column = [] for val in column: timestamp = Timestamp() - timestamp.FromMicroseconds( - micros=int(val.as_py().timestamp() * 1_000_000)) + timestamp.FromMicroseconds(micros=int(val.as_py().timestamp() * 1_000_000)) proto_column.append(timestamp) return proto_column def pa_column_to_proto_column( - feast_value_type, - column: pa.lib.ChunkedArray + feast_value_type, column: pa.lib.ChunkedArray ) -> List[ProtoValue]: - type_map = {ValueType.INT32: "int32_val", - ValueType.INT64: "int64_val", - ValueType.FLOAT: "float_val", - ValueType.DOUBLE: "double_val", - ValueType.STRING: "string_val", - ValueType.BYTES: "bytes_val", - ValueType.BOOL: "bool_val", - ValueType.BOOL_LIST: {"bool_list_val": BoolList}, - ValueType.BYTES_LIST: {"bytes_list_val": BytesList}, - ValueType.STRING_LIST: {"string_list_val": StringList}, - ValueType.FLOAT_LIST: {"float_list_val": FloatList}, - ValueType.DOUBLE_LIST: {"double_list_val": DoubleList}, - ValueType.INT32_LIST: {"int32_list_val": Int32List}, - ValueType.INT64_LIST: {"int64_list_val": Int64List}, } + type_map = { + ValueType.INT32: "int32_val", + ValueType.INT64: "int64_val", + ValueType.FLOAT: "float_val", + ValueType.DOUBLE: "double_val", + ValueType.STRING: "string_val", + ValueType.BYTES: "bytes_val", + ValueType.BOOL: "bool_val", + ValueType.BOOL_LIST: {"bool_list_val": BoolList}, + ValueType.BYTES_LIST: {"bytes_list_val": BytesList}, + ValueType.STRING_LIST: {"string_list_val": StringList}, + ValueType.FLOAT_LIST: {"float_list_val": FloatList}, + ValueType.DOUBLE_LIST: {"double_list_val": DoubleList}, + ValueType.INT32_LIST: {"int32_list_val": Int32List}, + ValueType.INT64_LIST: {"int64_list_val": Int64List}, + } value = type_map[feast_value_type] # Process list types if type(value) == dict: list_param_name = list(value.keys())[0] - return [ProtoValue( - **{list_param_name: value[list_param_name](val=x.as_py())}) - for x in column] + return [ + ProtoValue(**{list_param_name: value[list_param_name](val=x.as_py())}) + for x in column + ] else: return [ProtoValue(**{value: x.as_py()}) for x in column] diff --git a/sdk/python/feast/types/__init__.py b/sdk/python/feast/types/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/sdk/python/setup.py b/sdk/python/setup.py index 9ac7225e80e..075ce500ac8 100644 --- a/sdk/python/setup.py +++ b/sdk/python/setup.py @@ -44,7 +44,7 @@ "pyarrow>=0.15.1", "numpy", "google", - "confluent_kafka" + "confluent_kafka", ] # README file from Feast repo root directory diff --git a/sdk/python/tests/fake_kafka.py b/sdk/python/tests/fake_kafka.py deleted file mode 100644 index c511ed1d278..00000000000 --- a/sdk/python/tests/fake_kafka.py +++ /dev/null @@ -1,21 +0,0 @@ -import queue -from typing import Dict - - -class FakeKafka: - def __init__(self): - self._messages = dict() # type: Dict[str, queue.Queue] - - def send(self, topic, message): - if topic not in self._messages: - self._messages[topic] = queue.Queue() - self._messages[topic].queue.append(message) - - def get(self, topic: str): - message = None - if self._messages[topic]: - message = self._messages[topic].get(block=False) - return message - - def flush(self, timeout): - return True diff --git a/sdk/python/tests/feast_core_server.py b/sdk/python/tests/feast_core_server.py index 61688f65044..7cc837a4f2d 100644 --- a/sdk/python/tests/feast_core_server.py +++ b/sdk/python/tests/feast_core_server.py @@ -42,11 +42,13 @@ def ListFeatureSets(self, request: ListFeatureSetsRequest, context): for fs in list(self._feature_sets.values()) if ( not request.filter.feature_set_name - or fs.name == request.filter.feature_set_name + or request.filter.feature_set_name == "*" + or fs.spec.name == request.filter.feature_set_name ) and ( not request.filter.feature_set_version - or str(fs.version) == request.filter.feature_set_version + or str(fs.spec.version) == request.filter.feature_set_version + or request.filter.feature_set_version == "*" ) ] @@ -54,7 +56,6 @@ def ListFeatureSets(self, request: ListFeatureSetsRequest, context): def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context): feature_set = request.feature_set - if feature_set.spec.version is None: feature_set.spec.version = 1 else: @@ -70,7 +71,9 @@ def ApplyFeatureSet(self, request: ApplyFeatureSetRequest, context): status=FeatureSetStatus.STATUS_READY, created_timestamp=Timestamp(seconds=10), ) - applied_feature_set = FeatureSetProto(spec=feature_set.spec, meta=feature_set_meta) + applied_feature_set = FeatureSetProto( + spec=feature_set.spec, meta=feature_set_meta + ) self._feature_sets[feature_set.spec.name] = applied_feature_set _logger.info( diff --git a/sdk/python/tests/feast_serving_server.py b/sdk/python/tests/feast_serving_server.py index 6c202b7d8ca..eb46bde1215 100644 --- a/sdk/python/tests/feast_serving_server.py +++ b/sdk/python/tests/feast_serving_server.py @@ -3,25 +3,16 @@ import logging import grpc -import threading import feast.serving.ServingService_pb2_grpc as Serving from feast.serving.ServingService_pb2 import ( - GetBatchFeaturesResponse, GetOnlineFeaturesRequest, GetOnlineFeaturesResponse, GetFeastServingInfoResponse, ) -import fake_kafka from typing import Dict -import sqlite3 from feast.core.CoreService_pb2_grpc import CoreServiceStub -from feast.core.CoreService_pb2 import ( - ListFeatureSetsResponse, - ListStoresRequest, - ListStoresResponse, -) +from feast.core.CoreService_pb2 import ListFeatureSetsResponse from feast.core import FeatureSet_pb2 as FeatureSetProto -import stores from feast.types import ( FeatureRow_pb2 as FeatureRowProto, Field_pb2 as FieldProto, @@ -33,19 +24,13 @@ class ServingServicer(Serving.ServingServiceServicer): - def __init__(self, kafka: fake_kafka = None, core_url: str = None): - if kafka and core_url: + def __init__(self, core_url: str = None): + if core_url: self.__core_channel = None self.__connect_core(core_url) self._feature_sets = ( dict() ) # type: Dict[str, FeatureSetProto.FeatureSetSpec] - self._kafka = kafka - self._store = stores.SQLiteDatabase() - - thread = threading.Thread(target=self.__consume, args=()) - thread.daemon = True - thread.start() def __connect_core(self, core_url: str): if not core_url: @@ -74,24 +59,6 @@ def __get_feature_sets_from_core(self): for feature_set in list(feature_sets.feature_sets): self._feature_sets[feature_set.name] = feature_set - def __consume(self): - """ - Consume message in the background from Fake Kafka - """ - while True: - self.__get_feature_sets_from_core() - self.__register_feature_sets_with_store() - for feature_set in list(self._feature_sets.values()): - message = self._kafka.get(feature_set.source.kafka_source_config.topic) - if message is None: - break - self._store.upsert_feature_row(feature_set, message) - time.sleep(1) - - def __register_feature_sets_with_store(self): - for feature_set in list(self._feature_sets.values()): - self._store.register_feature_set(feature_set) - def GetFeastServingVersion(self, request, context): return GetFeastServingInfoResponse(version="0.3.2") diff --git a/sdk/python/tests/stores.py b/sdk/python/tests/stores.py deleted file mode 100644 index e511cb1db19..00000000000 --- a/sdk/python/tests/stores.py +++ /dev/null @@ -1,98 +0,0 @@ -from feast.types import FeatureRow_pb2 as FeatureRowProto -from feast.core import FeatureSet_pb2 as FeatureSetProto -import sqlite3 -from typing import Dict, List -from feast.entity import Entity -from feast.value_type import ValueType -from feast.feature_set import FeatureSet, Feature - -from feast.types import ( - FeatureRow_pb2 as FeatureRowProto, - Field_pb2 as FieldProto, - Value_pb2 as ValueProto, -) -from google.protobuf.timestamp_pb2 import Timestamp - - -class Database: - pass - - -class SQLiteDatabase(Database): - def __init__(self): - self._conn = sqlite3.connect(":memory:") - self._c = self._conn.cursor() - - def register_feature_set(self, feature_set: FeatureSetProto.FeatureSetSpec): - query = build_sqlite_create_table_query(feature_set) - print(query) - self._c.execute(query) - self._c.execute("SELECT name FROM sqlite_master WHERE type='table';") - - available_table = self._c.fetchall() - print(available_table) - - def upsert_feature_row( - self, - feature_set: FeatureSetProto.FeatureSetSpec, - feature_row: FeatureRowProto.FeatureRow, - ): - values = (feature_row.event_timestamp,) - for entity in list(feature_set.entities): - values = values + (get_feature_row_value_by_name(feature_row, entity.name),) - values = values + (feature_row.SerializeToString(),) - self._c.execute(build_sqlite_insert_feature_row_query(feature_set), values) - - -def build_sqlite_create_table_query(feature_set: FeatureSetProto.FeatureSetSpec): - query = ( - """ - CREATE TABLE IF NOT EXISTS {} ( - {} - PRIMARY KEY ({}) - ); - """ - ).format( - get_table_name(feature_set), - " ".join([column + " text NOT NULL," for column in get_columns(feature_set)]), - ", ".join( - get_columns(feature_set)[1:] - ), # exclude event_timestamp column for online stores - ) - # Hyphens become three underscores - query = query.replace("-", "___") - return query - - -def build_sqlite_insert_feature_row_query(feature_set: FeatureSetProto.FeatureSetSpec): - return """ - INSERT OR REPLACE INTO {} ({}) - VALUES(?,?,?,?,?,?) - """.format( - get_table_name(feature_set), ",".join(get_columns(feature_set)) - ) - - -def get_columns(feature_set: FeatureSetProto.FeatureSetSpec) -> List[str]: - return ( - ["event_timestamp"] - + [field.name for field in list(feature_set.entities)] - + ["value"] - ) - - -def get_feature_row_value_by_name(feature_row, name): - values = [field.value for field in list(feature_row.fields) if field.name == name] - if len(values) != 1: - raise Exception( - "Invalid number of features with name {} in feature row {}".format( - name, feature_row.name - ) - ) - return values[0] - - -def get_table_name(feature_set: FeatureSetProto.FeatureSetSpec) -> str: - if not feature_set.name and not feature_set.version: - raise ValueError("Feature set name or version is missing") - return (feature_set.name + "_" + str(feature_set.version)).replace("-", "___") diff --git a/sdk/python/tests/test_client.py b/sdk/python/tests/test_client.py index c5a98485fef..123cbe47fd6 100644 --- a/sdk/python/tests/test_client.py +++ b/sdk/python/tests/test_client.py @@ -27,19 +27,17 @@ from feast.feature_set import Feature from feast.source import KafkaSource from feast.core.FeatureSet_pb2 import ( - FeatureSetSpec, - FeatureSpec, - EntitySpec, - FeatureSetMeta, - FeatureSetStatus, + FeatureSetSpec as FeatureSetSpecProto, + FeatureSpec as FeatureSpecProto, + EntitySpec as EntitySpecProto, + FeatureSetMeta as FeatureSetMetaProto, + FeatureSetStatus as FeatureSetStatusProto, + FeatureSet as FeatureSetProto, ) -from feast.core.FeatureSet_pb2 import FeatureSet as FeatureSetProto from feast.core.Source_pb2 import SourceType, KafkaSourceConfig, Source from feast.core.CoreService_pb2 import ( GetFeastCoreVersionResponse, - ListFeatureSetsResponse, GetFeatureSetResponse, - GetFeatureSetRequest, ) from feast.serving.ServingService_pb2 import ( GetFeastServingInfoResponse, @@ -134,7 +132,7 @@ def test_get_online_features(self, mock_client, mocker): fields = dict() for feature_num in range(1, 10): - fields["feature_set_1:1:feature_" + str(feature_num)] = ValueProto.Value( + fields[f"my_project/feature_{str(feature_num)}:1"] = ValueProto.Value( int64_val=feature_num ) field_values = GetOnlineFeaturesResponse.FieldValues(fields=fields) @@ -157,23 +155,22 @@ def test_get_online_features(self, mock_client, mocker): response = mock_client.get_online_features( entity_rows=entity_rows, - feature_ids=[ - "feature_set_1:1:feature_1", - "feature_set_1:1:feature_2", - "feature_set_1:1:feature_3", - "feature_set_1:1:feature_4", - "feature_set_1:1:feature_5", - "feature_set_1:1:feature_6", - "feature_set_1:1:feature_7", - "feature_set_1:1:feature_8", - "feature_set_1:1:feature_9", + feature_refs=[ + "my_project/feature_1:1", + "my_project/feature_2:1", + "my_project/feature_3:1", + "my_project/feature_4:1", + "my_project/feature_5:1", + "my_project/feature_6:1", + "my_project/feature_7:1", + "my_project/feature_8:1", + "my_project/feature_9:1", ], ) # type: GetOnlineFeaturesResponse assert ( - response.field_values[0].fields["feature_set_1:1:feature_1"].int64_val == 1 - and response.field_values[0].fields["feature_set_1:1:feature_9"].int64_val - == 9 + response.field_values[0].fields["my_project/feature_1:1"].int64_val == 1 + and response.field_values[0].fields["my_project/feature_9:1"].int64_val == 9 ) def test_get_feature_set(self, mock_client, mocker): @@ -186,22 +183,22 @@ def test_get_feature_set(self, mock_client, mocker): "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetProto( - spec=FeatureSetSpec( + spec=FeatureSetSpecProto( name="my_feature_set", version=2, max_age=Duration(seconds=3600), features=[ - FeatureSpec( + FeatureSpecProto( name="my_feature_1", value_type=ValueProto.ValueType.FLOAT, ), - FeatureSpec( + FeatureSpecProto( name="my_feature_2", value_type=ValueProto.ValueType.FLOAT, ), ], entities=[ - EntitySpec( + EntitySpecProto( name="my_entity_1", value_type=ValueProto.ValueType.INT64, ) @@ -212,11 +209,12 @@ def test_get_feature_set(self, mock_client, mocker): bootstrap_servers="localhost:9092", topic="topic" ), ), - ) + ), + meta=FeatureSetMetaProto(), ) ), ) - + mock_client.set_project("my_project") feature_set = mock_client.get_feature_set("my_feature_set", version=2) assert ( @@ -242,30 +240,31 @@ def test_get_batch_features(self, mock_client, mocker): "GetFeatureSet", return_value=GetFeatureSetResponse( feature_set=FeatureSetProto( - spec=FeatureSetSpec( + spec=FeatureSetSpecProto( name="customer_fs", version=1, + project="my_project", entities=[ - EntitySpec( + EntitySpecProto( name="customer", value_type=ValueProto.ValueType.INT64 ), - EntitySpec( + EntitySpecProto( name="transaction", value_type=ValueProto.ValueType.INT64, ), ], features=[ - FeatureSpec( + FeatureSpecProto( name="customer_feature_1", value_type=ValueProto.ValueType.FLOAT, ), - FeatureSpec( + FeatureSpecProto( name="customer_feature_2", value_type=ValueProto.ValueType.STRING, ), ], ), - meta=FeatureSetMeta(status=FeatureSetStatus.STATUS_READY), + meta=FeatureSetMetaProto(status=FeatureSetStatusProto.STATUS_READY), ) ), ) @@ -275,8 +274,8 @@ def test_get_batch_features(self, mock_client, mocker): "datetime": [datetime.utcnow() for _ in range(3)], "customer": [1001, 1002, 1003], "transaction": [1001, 1002, 1003], - "customer_fs:1:customer_feature_1": [1001, 1002, 1003], - "customer_fs:1:customer_feature_2": [1001, 1002, 1003], + "my_project/customer_feature_1:1": [1001, 1002, 1003], + "my_project/customer_feature_2:1": [1001, 1002, 1003], } ) @@ -320,6 +319,7 @@ def test_get_batch_features(self, mock_client, mocker): ), ) + mock_client.set_project("project1") response = mock_client.get_batch_features( entity_rows=pd.DataFrame( { @@ -330,9 +330,9 @@ def test_get_batch_features(self, mock_client, mocker): "transaction": [1001, 1002, 1003], } ), - feature_ids=[ - "customer_fs:1:customer_feature_1", - "customer_fs:1:customer_feature_2", + feature_refs=[ + "my_project/customer_feature_1:1", + "my_project/customer_feature_2:1", ], ) # type: Job @@ -341,15 +341,17 @@ def test_get_batch_features(self, mock_client, mocker): actual_dataframe = response.to_dataframe() assert actual_dataframe[ - ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"] + ["my_project/customer_feature_1:1", "my_project/customer_feature_2:1"] ].equals( expected_dataframe[ - ["customer_fs:1:customer_feature_1", "customer_fs:1:customer_feature_2"] + ["my_project/customer_feature_1:1", "my_project/customer_feature_2:1"] ] ) def test_apply_feature_set_success(self, client): + client.set_project("project1") + # Create Feature Sets fs1 = FeatureSet("my-feature-set-1") fs1.add(Feature(name="fs1-my-feature-1", dtype=ValueType.INT64)) @@ -378,7 +380,7 @@ def test_apply_feature_set_success(self, client): @pytest.mark.parametrize("dataframe", [dataframes.GOOD]) def test_feature_set_ingest_success(self, dataframe, client, mocker): - + client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test") ) @@ -390,7 +392,7 @@ def test_feature_set_ingest_success(self, dataframe, client, mocker): # Register with Feast core client.apply(driver_fs) driver_fs = driver_fs.to_proto() - driver_fs.meta.status = FeatureSetStatus.STATUS_READY + driver_fs.meta.status = FeatureSetStatusProto.STATUS_READY mocker.patch.object( client._core_service_stub, @@ -408,6 +410,7 @@ def test_feature_set_ingest_fail_if_pending( self, dataframe, exception, client, mocker ): with pytest.raises(exception): + client.set_project("project1") driver_fs = FeatureSet( "driver-feature-set", source=KafkaSource(brokers="kafka:9092", topic="test"), @@ -420,7 +423,7 @@ def test_feature_set_ingest_fail_if_pending( # Register with Feast core client.apply(driver_fs) driver_fs = driver_fs.to_proto() - driver_fs.meta.status = FeatureSetStatus.STATUS_PENDING + driver_fs.meta.status = FeatureSetStatusProto.STATUS_PENDING mocker.patch.object( client._core_service_stub, @@ -459,6 +462,8 @@ def test_feature_set_ingest_failure(self, client, dataframe, exception): @pytest.mark.parametrize("dataframe", [dataframes.ALL_TYPES]) def test_feature_set_types_success(self, client, dataframe, mocker): + client.set_project("project1") + all_types_fs = FeatureSet( name="all_types", entities=[Entity(name="user_id", dtype=ValueType.INT64)], diff --git a/sdk/python/tests/test_stores.py b/sdk/python/tests/test_stores.py deleted file mode 100644 index 330f272dacd..00000000000 --- a/sdk/python/tests/test_stores.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2019 The Feast Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import pytest -import stores -from feast.feature_set import FeatureSet -from feast.feature import Feature -from feast.entity import Entity -from feast.value_type import ValueType -from feast.types import ( - FeatureRow_pb2 as FeatureRowProto, - Field_pb2 as FieldProto, - Value_pb2 as ValueProto, -) -from google.protobuf.timestamp_pb2 import Timestamp - - -class TestStores: - @pytest.fixture(scope="module") - def sqlite_store(self): - return stores.SQLiteDatabase() - - def test_register_feature_set(self, sqlite_store): - fs = FeatureSet("my-feature-set") - fs.add(Feature(name="my-feature-1", dtype=ValueType.INT64)) - fs.add(Feature(name="my-feature-2", dtype=ValueType.INT64)) - fs.add(Entity(name="my-entity-1", dtype=ValueType.INT64)) - fs._version = 1 - feature_set_spec_proto = fs.to_proto().spec - - sqlite_store.register_feature_set(feature_set_spec_proto) - feature_row = FeatureRowProto.FeatureRow( - feature_set="feature_set_1", - event_timestamp=Timestamp(), - fields=[ - FieldProto.Field( - name="feature_1", value=ValueProto.Value(float_val=1.2) - ), - FieldProto.Field( - name="feature_2", value=ValueProto.Value(float_val=1.2) - ), - FieldProto.Field( - name="feature_3", value=ValueProto.Value(float_val=1.2) - ), - ], - ) - # sqlite_store.upsert_feature_row(feature_set_proto, feature_row) - assert True diff --git a/serving/sample_redis_config.yml b/serving/sample_redis_config.yml index d6008365e0f..b3461649a1d 100644 --- a/serving/sample_redis_config.yml +++ b/serving/sample_redis_config.yml @@ -5,4 +5,5 @@ redis_config: port: 6379 subscriptions: - name: "*" - version: ">0" + project: "*" + version: "*" diff --git a/serving/src/main/java/feast/serving/configuration/JobServiceConfig.java b/serving/src/main/java/feast/serving/configuration/JobServiceConfig.java index 6e02c3f383f..2afbdaf90d1 100644 --- a/serving/src/main/java/feast/serving/configuration/JobServiceConfig.java +++ b/serving/src/main/java/feast/serving/configuration/JobServiceConfig.java @@ -19,10 +19,10 @@ import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; -import feast.serving.service.CachedSpecService; import feast.serving.service.JobService; import feast.serving.service.NoopJobService; import feast.serving.service.RedisBackedJobService; +import feast.serving.specs.CachedSpecService; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import redis.clients.jedis.Jedis; diff --git a/serving/src/main/java/feast/serving/configuration/ServingServiceConfig.java b/serving/src/main/java/feast/serving/configuration/ServingServiceConfig.java index 08b9655e3e1..9380ded4c4e 100644 --- a/serving/src/main/java/feast/serving/configuration/ServingServiceConfig.java +++ b/serving/src/main/java/feast/serving/configuration/ServingServiceConfig.java @@ -29,11 +29,11 @@ import feast.serving.FeastProperties; import feast.serving.FeastProperties.JobProperties; import feast.serving.service.BigQueryServingService; -import feast.serving.service.CachedSpecService; import feast.serving.service.JobService; import feast.serving.service.NoopJobService; import feast.serving.service.RedisServingService; import feast.serving.service.ServingService; +import feast.serving.specs.CachedSpecService; import io.opentracing.Tracer; import java.util.Map; import org.slf4j.Logger; diff --git a/serving/src/main/java/feast/serving/configuration/SpecServiceConfig.java b/serving/src/main/java/feast/serving/configuration/SpecServiceConfig.java index a18e844dcbb..3c91c2765aa 100644 --- a/serving/src/main/java/feast/serving/configuration/SpecServiceConfig.java +++ b/serving/src/main/java/feast/serving/configuration/SpecServiceConfig.java @@ -17,8 +17,8 @@ package feast.serving.configuration; import feast.serving.FeastProperties; -import feast.serving.service.CachedSpecService; -import feast.serving.service.CoreSpecService; +import feast.serving.specs.CachedSpecService; +import feast.serving.specs.CoreSpecService; import java.nio.file.Path; import java.nio.file.Paths; import java.util.concurrent.Executors; diff --git a/serving/src/main/java/feast/serving/controller/HealthServiceController.java b/serving/src/main/java/feast/serving/controller/HealthServiceController.java index b3b5dc7de40..53728544656 100644 --- a/serving/src/main/java/feast/serving/controller/HealthServiceController.java +++ b/serving/src/main/java/feast/serving/controller/HealthServiceController.java @@ -18,8 +18,8 @@ import feast.core.StoreProto.Store; import feast.serving.ServingAPIProto.GetFeastServingInfoRequest; -import feast.serving.service.CachedSpecService; import feast.serving.service.ServingService; +import feast.serving.specs.CachedSpecService; import io.grpc.health.v1.HealthGrpc.HealthImplBase; import io.grpc.health.v1.HealthProto.HealthCheckRequest; import io.grpc.health.v1.HealthProto.HealthCheckResponse; diff --git a/serving/src/main/java/feast/serving/service/BigQueryServingService.java b/serving/src/main/java/feast/serving/service/BigQueryServingService.java index d1658bde54d..53d071b57d1 100644 --- a/serving/src/main/java/feast/serving/service/BigQueryServingService.java +++ b/serving/src/main/java/feast/serving/service/BigQueryServingService.java @@ -18,7 +18,6 @@ import static feast.serving.store.bigquery.QueryTemplater.createEntityTableUUIDQuery; import static feast.serving.store.bigquery.QueryTemplater.generateFullTableName; -import static feast.serving.util.Metrics.requestCount; import static feast.serving.util.Metrics.requestLatency; import com.google.cloud.bigquery.BigQuery; @@ -32,8 +31,8 @@ import com.google.cloud.bigquery.Schema; import com.google.cloud.bigquery.Table; import com.google.cloud.bigquery.TableId; +import com.google.cloud.bigquery.TableInfo; import com.google.cloud.storage.Storage; -import feast.core.FeatureSetProto.FeatureSetSpec; import feast.serving.ServingAPIProto; import feast.serving.ServingAPIProto.DataFormat; import feast.serving.ServingAPIProto.DatasetSource; @@ -48,6 +47,8 @@ import feast.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.serving.ServingAPIProto.JobStatus; import feast.serving.ServingAPIProto.JobType; +import feast.serving.specs.CachedSpecService; +import feast.serving.specs.FeatureSetRequest; import feast.serving.store.bigquery.BatchRetrievalQueryRunnable; import feast.serving.store.bigquery.QueryTemplater; import feast.serving.store.bigquery.model.FeatureSetInfo; @@ -56,10 +57,12 @@ import java.util.Optional; import java.util.UUID; import java.util.stream.Collectors; +import org.joda.time.Duration; import org.slf4j.Logger; public class BigQueryServingService implements ServingService { + public static final long TEMP_TABLE_EXPIRY_DURATION_MS = Duration.standardDays(1).getMillis(); private static final Logger log = org.slf4j.LoggerFactory.getLogger(BigQueryServingService.class); private final BigQuery bigquery; @@ -107,21 +110,8 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest getF @Override public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeaturesRequest) { long startTime = System.currentTimeMillis(); - List featureSetSpecs = - getFeaturesRequest.getFeatureSetsList().stream() - .map( - featureSet -> { - requestCount.labels(featureSet.getName()).inc(); - return specService.getFeatureSet(featureSet.getName(), featureSet.getVersion()); - }) - .collect(Collectors.toList()); - - if (getFeaturesRequest.getFeatureSetsList().size() != featureSetSpecs.size()) { - throw Status.INVALID_ARGUMENT - .withDescription( - "Some of the feature sets requested do not exist in Feast. Please check your request payload.") - .asRuntimeException(); - } + List featureSetRequests = + specService.getFeatureSets(getFeaturesRequest.getFeaturesList()); Table entityTable; String entityTableName; @@ -143,8 +133,7 @@ public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeat .filter(name -> !name.equals("event_timestamp")) .collect(Collectors.toList()); - List featureSetInfos = - QueryTemplater.getFeatureSetInfos(featureSetSpecs, getFeaturesRequest.getFeatureSetsList()); + List featureSetInfos = QueryTemplater.getFeatureSetInfos(featureSetRequests); String feastJobId = UUID.randomUUID().toString(); ServingAPIProto.Job feastJob = @@ -170,7 +159,9 @@ public GetBatchFeaturesResponse getBatchFeatures(GetBatchFeaturesRequest getFeat .build()) .start(); - requestLatency.labels("getBatchFeatures").observe(System.currentTimeMillis() - startTime); + requestLatency + .labels("getBatchFeatures") + .observe((System.currentTimeMillis() - startTime) / 1000); return GetBatchFeaturesResponse.newBuilder().setJob(feastJob).build(); } @@ -191,15 +182,17 @@ private Table loadEntities(DatasetSource datasetSource) { switch (datasetSource.getDatasetSourceCase()) { case FILE_SOURCE: try { - String tableName = generateTemporaryTableName(); - log.info("Loading entity dataset to table {}.{}.{}", projectId, datasetId, tableName); - TableId tableId = TableId.of(projectId, datasetId, tableName); - // Currently only avro supported + // Currently only AVRO format is supported + if (datasetSource.getFileSource().getDataFormat() != DataFormat.DATA_FORMAT_AVRO) { throw Status.INVALID_ARGUMENT - .withDescription("Invalid file format, only avro supported") + .withDescription("Invalid file format, only AVRO is supported.") .asRuntimeException(); } + + TableId tableId = TableId.of(projectId, datasetId, createTempTableName()); + log.info("Loading entity rows to: {}.{}.{}", projectId, datasetId, tableId.getTable()); + LoadJobConfiguration loadJobConfiguration = LoadJobConfiguration.of( tableId, datasetSource.getFileSource().getFileUrisList(), FormatOptions.avro()); @@ -207,6 +200,15 @@ private Table loadEntities(DatasetSource datasetSource) { loadJobConfiguration.toBuilder().setUseAvroLogicalTypes(true).build(); Job job = bigquery.create(JobInfo.of(loadJobConfiguration)); job.waitFor(); + + TableInfo expiry = + bigquery + .getTable(tableId) + .toBuilder() + .setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS) + .build(); + bigquery.update(expiry); + loadedEntityTable = bigquery.getTable(tableId); if (!loadedEntityTable.exists()) { throw new RuntimeException( @@ -216,7 +218,7 @@ private Table loadEntities(DatasetSource datasetSource) { } catch (Exception e) { log.error("Exception has occurred in loadEntities method: ", e); throw Status.INTERNAL - .withDescription("Failed to load entity dataset into store") + .withDescription("Failed to load entity dataset into store: " + e.toString()) .withCause(e) .asRuntimeException(); } @@ -228,20 +230,23 @@ private Table loadEntities(DatasetSource datasetSource) { } } - private String generateTemporaryTableName() { - String source = String.format("feastserving%d", System.currentTimeMillis()); - String guid = UUID.nameUUIDFromBytes(source.getBytes()).toString(); - String suffix = guid.substring(0, Math.min(guid.length(), 10)).replaceAll("-", ""); - return String.format("temp_%s", suffix); - } - private TableId generateUUIDs(Table loadedEntityTable) { try { String uuidQuery = createEntityTableUUIDQuery(generateFullTableName(loadedEntityTable.getTableId())); - QueryJobConfiguration queryJobConfig = QueryJobConfiguration.newBuilder(uuidQuery).build(); + QueryJobConfiguration queryJobConfig = + QueryJobConfiguration.newBuilder(uuidQuery) + .setDestinationTable(TableId.of(projectId, datasetId, createTempTableName())) + .build(); Job queryJob = bigquery.create(JobInfo.of(queryJobConfig)); queryJob.waitFor(); + TableInfo expiry = + bigquery + .getTable(queryJobConfig.getDestinationTable()) + .toBuilder() + .setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS) + .build(); + bigquery.update(expiry); queryJobConfig = queryJob.getConfiguration(); return queryJobConfig.getDestinationTable(); } catch (InterruptedException | BigQueryException e) { @@ -251,4 +256,8 @@ private TableId generateUUIDs(Table loadedEntityTable) { .asRuntimeException(); } } + + public static String createTempTableName() { + return "_" + UUID.randomUUID().toString().replace("-", ""); + } } diff --git a/serving/src/main/java/feast/serving/service/CachedSpecService.java b/serving/src/main/java/feast/serving/service/CachedSpecService.java deleted file mode 100644 index edf2da37a01..00000000000 --- a/serving/src/main/java/feast/serving/service/CachedSpecService.java +++ /dev/null @@ -1,199 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright 2018-2019 The Feast Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package feast.serving.service; - -import static feast.serving.util.mappers.YamlToProtoMapper.yamlToStoreProto; - -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.CacheLoader.InvalidCacheLoadException; -import com.google.common.cache.LoadingCache; -import feast.core.CoreServiceProto.ListFeatureSetsRequest; -import feast.core.CoreServiceProto.ListFeatureSetsRequest.Filter; -import feast.core.CoreServiceProto.ListFeatureSetsResponse; -import feast.core.CoreServiceProto.UpdateStoreRequest; -import feast.core.CoreServiceProto.UpdateStoreResponse; -import feast.core.FeatureSetProto.FeatureSet; -import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.core.StoreProto.Store; -import feast.core.StoreProto.Store.Subscription; -import feast.serving.exception.SpecRetrievalException; -import io.grpc.StatusRuntimeException; -import io.prometheus.client.Gauge; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutionException; -import org.slf4j.Logger; - -/** In-memory cache of specs. */ -public class CachedSpecService { - - private static final int MAX_SPEC_COUNT = 1000; - private static final Logger log = org.slf4j.LoggerFactory.getLogger(CachedSpecService.class); - - private final CoreSpecService coreService; - private final Path configPath; - - private final CacheLoader featureSetSpecCacheLoader; - private final LoadingCache featureSetSpecCache; - private Store store; - - private static Gauge featureSetsCount = - Gauge.build() - .name("feature_set_count") - .subsystem("feast_serving") - .help("number of feature sets served by this instance") - .register(); - private static Gauge cacheLastUpdated = - Gauge.build() - .name("cache_last_updated") - .subsystem("feast_serving") - .help("epoch time of the last time the cache was updated") - .register(); - - public CachedSpecService(CoreSpecService coreService, Path configPath) { - this.configPath = configPath; - this.coreService = coreService; - this.store = updateStore(readConfig(configPath)); - - Map featureSetSpecs = getFeatureSetSpecMap(); - featureSetSpecCacheLoader = CacheLoader.from((String key) -> featureSetSpecs.get(key)); - featureSetSpecCache = - CacheBuilder.newBuilder().maximumSize(MAX_SPEC_COUNT).build(featureSetSpecCacheLoader); - } - - /** - * Get the current store configuration. - * - * @return StoreProto.Store store configuration for this serving instance - */ - public Store getStore() { - return this.store; - } - - /** - * Get a single FeatureSetSpec matching the given name and version. - * - * @param name of the featureSet - * @param version to retrieve - * @return FeatureSetSpec of the matching FeatureSet - */ - public FeatureSetSpec getFeatureSet(String name, int version) { - String id = String.format("%s:%d", name, version); - try { - return featureSetSpecCache.get(id); - } catch (InvalidCacheLoadException e) { - // if not found, try to retrieve from core - ListFeatureSetsRequest request = - ListFeatureSetsRequest.newBuilder() - .setFilter( - Filter.newBuilder() - .setFeatureSetName(name) - .setFeatureSetVersion(String.valueOf(version))) - .build(); - ListFeatureSetsResponse featureSets = coreService.listFeatureSets(request); - if (featureSets.getFeatureSetsList().size() == 0) { - throw new SpecRetrievalException( - String.format( - "Unable to retrieve featureSet with id %s from core, featureSet does not exist", - id)); - } - return featureSets.getFeatureSets(0).getSpec(); - } catch (ExecutionException e) { - throw new SpecRetrievalException( - String.format("Unable to retrieve featureSet with id %s", id), e); - } - } - - /** - * Reload the store configuration from the given config path, then retrieve the necessary specs - * from core to preload the cache. - */ - public void populateCache() { - this.store = updateStore(readConfig(configPath)); - Map featureSetSpecMap = getFeatureSetSpecMap(); - featureSetSpecCache.putAll(featureSetSpecMap); - - featureSetsCount.set(featureSetSpecCache.size()); - cacheLastUpdated.set(System.currentTimeMillis()); - } - - public void scheduledPopulateCache() { - try { - populateCache(); - } catch (Exception e) { - log.warn("Error updating store configuration and specs: {}", e.getMessage()); - } - } - - private Map getFeatureSetSpecMap() { - HashMap featureSetSpecs = new HashMap<>(); - - for (Subscription subscription : this.store.getSubscriptionsList()) { - try { - ListFeatureSetsResponse featureSetsResponse = - coreService.listFeatureSets( - ListFeatureSetsRequest.newBuilder() - .setFilter( - ListFeatureSetsRequest.Filter.newBuilder() - .setFeatureSetName(subscription.getName()) - .setFeatureSetVersion(subscription.getVersion())) - .build()); - - for (FeatureSet featureSet : featureSetsResponse.getFeatureSetsList()) { - FeatureSetSpec featureSetSpec = featureSet.getSpec(); - featureSetSpecs.put( - String.format("%s:%s", featureSetSpec.getName(), featureSetSpec.getVersion()), - featureSetSpec); - } - } catch (StatusRuntimeException e) { - throw new RuntimeException( - String.format("Unable to retrieve specs matching subscription %s", subscription), e); - } - } - return featureSetSpecs; - } - - private Store readConfig(Path path) { - try { - List fileContents = Files.readAllLines(path); - String yaml = fileContents.stream().reduce("", (l1, l2) -> l1 + "\n" + l2); - log.info("loaded store config at {}: \n{}", path.toString(), yaml); - return yamlToStoreProto(yaml); - } catch (IOException e) { - throw new RuntimeException( - String.format("Unable to read store config at %s", path.toAbsolutePath()), e); - } - } - - private Store updateStore(Store store) { - UpdateStoreRequest request = UpdateStoreRequest.newBuilder().setStore(store).build(); - try { - UpdateStoreResponse updateStoreResponse = coreService.updateStore(request); - if (!updateStoreResponse.getStore().equals(store)) { - throw new RuntimeException("Core store config not matching current store config"); - } - return updateStoreResponse.getStore(); - } catch (Exception e) { - throw new RuntimeException("Unable to update store configuration", e); - } - } -} diff --git a/serving/src/main/java/feast/serving/service/RedisServingService.java b/serving/src/main/java/feast/serving/service/RedisServingService.java index 7c0d65dc42c..48fc485214d 100644 --- a/serving/src/main/java/feast/serving/service/RedisServingService.java +++ b/serving/src/main/java/feast/serving/service/RedisServingService.java @@ -20,6 +20,8 @@ import static feast.serving.util.Metrics.requestCount; import static feast.serving.util.Metrics.requestLatency; import static feast.serving.util.Metrics.staleKeyCount; +import static feast.serving.util.RefUtil.generateFeatureSetStringRef; +import static feast.serving.util.RefUtil.generateFeatureStringRef; import com.google.common.collect.Maps; import com.google.protobuf.AbstractMessageLite; @@ -28,7 +30,7 @@ import feast.core.FeatureSetProto.EntitySpec; import feast.core.FeatureSetProto.FeatureSetSpec; import feast.serving.ServingAPIProto.FeastServingType; -import feast.serving.ServingAPIProto.FeatureSetRequest; +import feast.serving.ServingAPIProto.FeatureReference; import feast.serving.ServingAPIProto.GetBatchFeaturesRequest; import feast.serving.ServingAPIProto.GetBatchFeaturesResponse; import feast.serving.ServingAPIProto.GetFeastServingInfoRequest; @@ -39,6 +41,9 @@ import feast.serving.ServingAPIProto.GetOnlineFeaturesRequest.EntityRow; import feast.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues; +import feast.serving.specs.CachedSpecService; +import feast.serving.specs.FeatureSetRequest; +import feast.serving.util.RefUtil; import feast.storage.RedisProto.RedisKey; import feast.types.FeatureRowProto.FeatureRow; import feast.types.FieldProto.Field; @@ -86,26 +91,18 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest requ List entityRows = request.getEntityRowsList(); Map> featureValuesMap = entityRows.stream() - .collect(Collectors.toMap(er -> er, er -> Maps.newHashMap(er.getFieldsMap()))); - - List featureSetRequests = request.getFeatureSetsList(); + .collect(Collectors.toMap(row -> row, row -> Maps.newHashMap(row.getFieldsMap()))); + List featureSetRequests = + specService.getFeatureSets(request.getFeaturesList()); for (FeatureSetRequest featureSetRequest : featureSetRequests) { - FeatureSetSpec featureSetSpec = - specService.getFeatureSet(featureSetRequest.getName(), featureSetRequest.getVersion()); - List featureSetEntityNames = - featureSetSpec.getEntitiesList().stream() + featureSetRequest.getSpec().getEntitiesList().stream() .map(EntitySpec::getName) .collect(Collectors.toList()); - Duration defaultMaxAge = featureSetSpec.getMaxAge(); - if (featureSetRequest.getMaxAge().equals(Duration.getDefaultInstance())) { - featureSetRequest = featureSetRequest.toBuilder().setMaxAge(defaultMaxAge).build(); - } - List redisKeys = - getRedisKeys(featureSetEntityNames, entityRows, featureSetRequest); + getRedisKeys(featureSetEntityNames, entityRows, featureSetRequest.getSpec()); try { sendAndProcessMultiGet(redisKeys, entityRows, featureValuesMap, featureSetRequest); @@ -118,9 +115,11 @@ public GetOnlineFeaturesResponse getOnlineFeatures(GetOnlineFeaturesRequest requ } List fieldValues = featureValuesMap.values().stream() - .map(m -> FieldValues.newBuilder().putAllFields(m).build()) + .map(valueMap -> FieldValues.newBuilder().putAllFields(valueMap).build()) .collect(Collectors.toList()); - requestLatency.labels("getOnlineFeatures").observe(System.currentTimeMillis() - startTime); + requestLatency + .labels("getOnlineFeatures") + .observe((System.currentTimeMillis() - startTime) / 1000); return getOnlineFeaturesResponseBuilder.addAllFieldValues(fieldValues).build(); } } @@ -140,19 +139,18 @@ public GetJobResponse getJob(GetJobRequest getJobRequest) { * * @param featureSetEntityNames entity names that actually belong to the featureSet * @param entityRows entity values to retrieve for - * @param featureSetRequest details of the requested featureSet + * @param featureSetSpec featureSetSpec of the features to retrieve * @return list of RedisKeys */ private List getRedisKeys( List featureSetEntityNames, List entityRows, - FeatureSetRequest featureSetRequest) { + FeatureSetSpec featureSetSpec) { try (Scope scope = tracer.buildSpan("Redis-makeRedisKeys").startActive(true)) { - String featureSetId = - String.format("%s:%s", featureSetRequest.getName(), featureSetRequest.getVersion()); + String featureSetRef = generateFeatureSetStringRef(featureSetSpec); List redisKeys = entityRows.stream() - .map(row -> makeRedisKey(featureSetId, featureSetEntityNames, row)) + .map(row -> makeRedisKey(featureSetRef, featureSetEntityNames, row)) .collect(Collectors.toList()); return redisKeys; } @@ -170,6 +168,7 @@ private RedisKey makeRedisKey( String featureSet, List featureSetEntityNames, EntityRow entityRow) { RedisKey.Builder builder = RedisKey.newBuilder().setFeatureSet(featureSet); Map fieldsMap = entityRow.getFieldsMap(); + featureSetEntityNames.sort(String::compareTo); for (int i = 0; i < featureSetEntityNames.size(); i++) { String entityName = featureSetEntityNames.get(i); @@ -198,14 +197,14 @@ private void sendAndProcessMultiGet( List jedisResps = sendMultiGet(redisKeys); long startTime = System.currentTimeMillis(); try (Scope scope = tracer.buildSpan("Redis-processResponse").startActive(true)) { - String featureSetId = - String.format("%s:%d", featureSetRequest.getName(), featureSetRequest.getVersion()); + FeatureSetSpec spec = featureSetRequest.getSpec(); Map nullValues = - featureSetRequest.getFeatureNamesList().stream() + featureSetRequest.getFeatureReferences().stream() .collect( Collectors.toMap( - name -> featureSetId + ":" + name, name -> Value.newBuilder().build())); + RefUtil::generateFeatureStringRef, + featureReference -> Value.newBuilder().build())); for (int i = 0; i < jedisResps.size(); i++) { EntityRow entityRow = entityRows.get(i); @@ -213,7 +212,16 @@ private void sendAndProcessMultiGet( byte[] jedisResponse = jedisResps.get(i); if (jedisResponse == null) { - missingKeyCount.labels(featureSetRequest.getName()).inc(); + featureSetRequest + .getFeatureReferences() + .parallelStream() + .forEach( + request -> + missingKeyCount + .labels( + spec.getProject(), + String.format("%s:%d", request.getName(), request.getVersion())) + .inc()); featureValues.putAll(nullValues); continue; } @@ -222,24 +230,55 @@ private void sendAndProcessMultiGet( boolean stale = isStale(featureSetRequest, entityRow, featureRow); if (stale) { - staleKeyCount.labels(featureSetRequest.getName()).inc(); + featureSetRequest + .getFeatureReferences() + .parallelStream() + .forEach( + request -> + staleKeyCount + .labels( + spec.getProject(), + String.format("%s:%d", request.getName(), request.getVersion())) + .inc()); featureValues.putAll(nullValues); continue; } - requestCount.labels(featureSetRequest.getName()).inc(); + featureSetRequest + .getFeatureReferences() + .parallelStream() + .forEach( + request -> + requestCount + .labels( + spec.getProject(), + String.format("%s:%d", request.getName(), request.getVersion())) + .inc()); + + Map featureNames = + featureSetRequest.getFeatureReferences().stream() + .collect( + Collectors.toMap( + FeatureReference::getName, featureReference -> featureReference)); featureRow.getFieldsList().stream() - .filter(f -> featureSetRequest.getFeatureNamesList().contains(f.getName())) - .forEach(f -> featureValues.put(featureSetId + ":" + f.getName(), f.getValue())); + .filter(field -> featureNames.keySet().contains(field.getName())) + .forEach( + field -> { + FeatureReference ref = featureNames.get(field.getName()); + String id = generateFeatureStringRef(ref); + featureValues.put(id, field.getValue()); + }); } } finally { - requestLatency.labels("processResponse").observe(System.currentTimeMillis() - startTime); + requestLatency + .labels("processResponse") + .observe((System.currentTimeMillis() - startTime) / 1000); } } private boolean isStale( FeatureSetRequest featureSetRequest, EntityRow entityRow, FeatureRow featureRow) { - if (featureSetRequest.getMaxAge().equals(Duration.getDefaultInstance())) { + if (featureSetRequest.getSpec().getMaxAge().equals(Duration.getDefaultInstance())) { return false; } long givenTimestamp = entityRow.getEntityTimestamp().getSeconds(); @@ -247,7 +286,7 @@ private boolean isStale( givenTimestamp = System.currentTimeMillis() / 1000; } long timeDifference = givenTimestamp - featureRow.getEventTimestamp().getSeconds(); - return timeDifference > featureSetRequest.getMaxAge().getSeconds(); + return timeDifference > featureSetRequest.getSpec().getMaxAge().getSeconds(); } /** @@ -272,7 +311,9 @@ private List sendMultiGet(List keys) { .withCause(e) .asRuntimeException(); } finally { - requestLatency.labels("sendMultiGet").observe(System.currentTimeMillis() - startTime); + requestLatency + .labels("sendMultiGet") + .observe((System.currentTimeMillis() - startTime) / 1000); } } } diff --git a/serving/src/main/java/feast/serving/specs/CachedSpecService.java b/serving/src/main/java/feast/serving/specs/CachedSpecService.java new file mode 100644 index 00000000000..040a870ffe1 --- /dev/null +++ b/serving/src/main/java/feast/serving/specs/CachedSpecService.java @@ -0,0 +1,262 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.specs; + +import static feast.serving.util.RefUtil.generateFeatureSetStringRef; +import static feast.serving.util.RefUtil.generateFeatureStringRef; +import static feast.serving.util.mappers.YamlToProtoMapper.yamlToStoreProto; +import static java.util.Comparator.comparingInt; +import static java.util.stream.Collectors.groupingBy; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import feast.core.CoreServiceProto.ListFeatureSetsRequest; +import feast.core.CoreServiceProto.ListFeatureSetsResponse; +import feast.core.CoreServiceProto.UpdateStoreRequest; +import feast.core.CoreServiceProto.UpdateStoreResponse; +import feast.core.FeatureSetProto.FeatureSet; +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSpec; +import feast.core.StoreProto.Store; +import feast.core.StoreProto.Store.Subscription; +import feast.serving.ServingAPIProto.FeatureReference; +import feast.serving.exception.SpecRetrievalException; +import io.grpc.StatusRuntimeException; +import io.prometheus.client.Gauge; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; +import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang3.tuple.Triple; +import org.slf4j.Logger; + +/** In-memory cache of specs. */ +public class CachedSpecService { + + private static final int MAX_SPEC_COUNT = 1000; + private static final Logger log = org.slf4j.LoggerFactory.getLogger(CachedSpecService.class); + + private final CoreSpecService coreService; + private final Path configPath; + + private final Map featureToFeatureSetMapping; + + private final CacheLoader featureSetCacheLoader; + private final LoadingCache featureSetCache; + private Store store; + + private static Gauge featureSetsCount = + Gauge.build() + .name("feature_set_count") + .subsystem("feast_serving") + .help("number of feature sets served by this instance") + .register(); + private static Gauge cacheLastUpdated = + Gauge.build() + .name("cache_last_updated") + .subsystem("feast_serving") + .help("epoch time of the last time the cache was updated") + .register(); + + public CachedSpecService(CoreSpecService coreService, Path configPath) { + this.configPath = configPath; + this.coreService = coreService; + this.store = updateStore(readConfig(configPath)); + + Map featureSets = getFeatureSetMap(); + featureToFeatureSetMapping = + new ConcurrentHashMap<>(getFeatureToFeatureSetMapping(featureSets)); + featureSetCacheLoader = CacheLoader.from(featureSets::get); + featureSetCache = + CacheBuilder.newBuilder().maximumSize(MAX_SPEC_COUNT).build(featureSetCacheLoader); + } + + /** + * Get the current store configuration. + * + * @return StoreProto.Store store configuration for this serving instance + */ + public Store getStore() { + return this.store; + } + + /** + * Get FeatureSetSpecs for the given features. + * + * @return FeatureSetRequest containing the specs, and their respective feature references + */ + public List getFeatureSets(List featureReferences) { + List featureSetRequests = new ArrayList<>(); + featureReferences.stream() + .map( + featureReference -> { + String featureSet = + featureToFeatureSetMapping.getOrDefault( + generateFeatureStringRef(featureReference), ""); + if (featureSet.isEmpty()) { + throw new SpecRetrievalException( + String.format("Unable to retrieve feature %s", featureReference)); + } + return Pair.of(featureSet, featureReference); + }) + .collect(groupingBy(Pair::getLeft)) + .forEach( + (fsName, featureRefs) -> { + try { + FeatureSetSpec featureSetSpec = featureSetCache.get(fsName); + List requestedFeatures = + featureRefs.stream().map(Pair::getRight).collect(Collectors.toList()); + FeatureSetRequest featureSetRequest = + FeatureSetRequest.newBuilder() + .setSpec(featureSetSpec) + .addAllFeatureReferences(requestedFeatures) + .build(); + featureSetRequests.add(featureSetRequest); + } catch (ExecutionException e) { + throw new SpecRetrievalException( + String.format("Unable to retrieve featureSet with id %s", fsName), e); + } + }); + return featureSetRequests; + } + + /** + * Reload the store configuration from the given config path, then retrieve the necessary specs + * from core to preload the cache. + */ + public void populateCache() { + this.store = updateStore(readConfig(configPath)); + Map featureSetMap = getFeatureSetMap(); + featureSetCache.putAll(featureSetMap); + featureToFeatureSetMapping.putAll(getFeatureToFeatureSetMapping(featureSetMap)); + + featureSetsCount.set(featureSetCache.size()); + cacheLastUpdated.set(System.currentTimeMillis()); + } + + public void scheduledPopulateCache() { + try { + populateCache(); + } catch (Exception e) { + log.warn("Error updating store configuration and specs: {}", e.getMessage()); + } + } + + private Map getFeatureSetMap() { + HashMap featureSets = new HashMap<>(); + + for (Subscription subscription : this.store.getSubscriptionsList()) { + try { + ListFeatureSetsResponse featureSetsResponse = + coreService.listFeatureSets( + ListFeatureSetsRequest.newBuilder() + .setFilter( + ListFeatureSetsRequest.Filter.newBuilder() + .setProject(subscription.getProject()) + .setFeatureSetName(subscription.getName()) + .setFeatureSetVersion(subscription.getVersion())) + .build()); + + for (FeatureSet featureSet : featureSetsResponse.getFeatureSetsList()) { + FeatureSetSpec spec = featureSet.getSpec(); + featureSets.put(generateFeatureSetStringRef(spec), spec); + } + } catch (StatusRuntimeException e) { + throw new RuntimeException( + String.format("Unable to retrieve specs matching subscription %s", subscription), e); + } + } + return featureSets; + } + + private Map getFeatureToFeatureSetMapping( + Map featureSets) { + HashMap mapping = new HashMap<>(); + + featureSets.values().stream() + .collect( + groupingBy( + featureSet -> + Triple.of( + featureSet.getProject(), featureSet.getName(), featureSet.getVersion()))) + .forEach( + (group, groupedFeatureSets) -> { + groupedFeatureSets = + groupedFeatureSets.stream() + .sorted(comparingInt(FeatureSetSpec::getVersion)) + .collect(Collectors.toList()); + for (int i = 0; i < groupedFeatureSets.size(); i++) { + FeatureSetSpec featureSetSpec = groupedFeatureSets.get(i); + for (FeatureSpec featureSpec : featureSetSpec.getFeaturesList()) { + FeatureReference featureRef = + FeatureReference.newBuilder() + .setProject(featureSetSpec.getProject()) + .setName(featureSpec.getName()) + .setVersion(featureSetSpec.getVersion()) + .build(); + mapping.put( + generateFeatureStringRef(featureRef), + generateFeatureSetStringRef(featureSetSpec)); + if (i == groupedFeatureSets.size() - 1) { + featureRef = + FeatureReference.newBuilder() + .setProject(featureSetSpec.getProject()) + .setName(featureSpec.getName()) + .build(); + mapping.put( + generateFeatureStringRef(featureRef), + generateFeatureSetStringRef(featureSetSpec)); + } + } + } + }); + return mapping; + } + + private Store readConfig(Path path) { + try { + List fileContents = Files.readAllLines(path); + String yaml = fileContents.stream().reduce("", (l1, l2) -> l1 + "\n" + l2); + log.info("loaded store config at {}: \n{}", path.toString(), yaml); + return yamlToStoreProto(yaml); + } catch (IOException e) { + throw new RuntimeException( + String.format("Unable to read store config at %s", path.toAbsolutePath()), e); + } + } + + private Store updateStore(Store store) { + UpdateStoreRequest request = UpdateStoreRequest.newBuilder().setStore(store).build(); + try { + UpdateStoreResponse updateStoreResponse = coreService.updateStore(request); + if (!updateStoreResponse.getStore().equals(store)) { + throw new RuntimeException("Core store config not matching current store config"); + } + return updateStoreResponse.getStore(); + } catch (Exception e) { + throw new RuntimeException("Unable to update store configuration", e); + } + } +} diff --git a/serving/src/main/java/feast/serving/service/CoreSpecService.java b/serving/src/main/java/feast/serving/specs/CoreSpecService.java similarity index 85% rename from serving/src/main/java/feast/serving/service/CoreSpecService.java rename to serving/src/main/java/feast/serving/specs/CoreSpecService.java index 438492d387d..2f5cef342e0 100644 --- a/serving/src/main/java/feast/serving/service/CoreSpecService.java +++ b/serving/src/main/java/feast/serving/specs/CoreSpecService.java @@ -14,9 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package feast.serving.service; +package feast.serving.specs; import feast.core.CoreServiceGrpc; +import feast.core.CoreServiceProto.GetFeatureSetRequest; +import feast.core.CoreServiceProto.GetFeatureSetResponse; import feast.core.CoreServiceProto.ListFeatureSetsRequest; import feast.core.CoreServiceProto.ListFeatureSetsResponse; import feast.core.CoreServiceProto.UpdateStoreRequest; @@ -37,6 +39,10 @@ public CoreSpecService(String feastCoreHost, int feastCorePort) { blockingStub = CoreServiceGrpc.newBlockingStub(channel); } + public GetFeatureSetResponse getFeatureSet(GetFeatureSetRequest getFeatureSetRequest) { + return blockingStub.getFeatureSet(getFeatureSetRequest); + } + public ListFeatureSetsResponse listFeatureSets(ListFeatureSetsRequest ListFeatureSetsRequest) { return blockingStub.listFeatureSets(ListFeatureSetsRequest); } diff --git a/serving/src/main/java/feast/serving/specs/FeatureSetRequest.java b/serving/src/main/java/feast/serving/specs/FeatureSetRequest.java new file mode 100644 index 00000000000..904630659d7 --- /dev/null +++ b/serving/src/main/java/feast/serving/specs/FeatureSetRequest.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.specs; + +import com.google.auto.value.AutoValue; +import com.google.common.collect.ImmutableSet; +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.serving.ServingAPIProto.FeatureReference; +import java.util.List; + +@AutoValue +public abstract class FeatureSetRequest { + public abstract FeatureSetSpec getSpec(); + + public abstract ImmutableSet getFeatureReferences(); + + public static Builder newBuilder() { + return new AutoValue_FeatureSetRequest.Builder(); + } + + @AutoValue.Builder + public abstract static class Builder { + public abstract Builder setSpec(FeatureSetSpec spec); + + abstract ImmutableSet.Builder featureReferencesBuilder(); + + public Builder addAllFeatureReferences(List featureReferenceList) { + featureReferencesBuilder().addAll(featureReferenceList); + return this; + } + + public Builder addFeatureReference(FeatureReference featureReference) { + featureReferencesBuilder().add(featureReference); + return this; + } + + public abstract FeatureSetRequest build(); + } +} diff --git a/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java b/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java index e16f5060c2a..d437294dfc3 100644 --- a/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java +++ b/serving/src/main/java/feast/serving/store/bigquery/BatchRetrievalQueryRunnable.java @@ -16,6 +16,8 @@ */ package feast.serving.store.bigquery; +import static feast.serving.service.BigQueryServingService.TEMP_TABLE_EXPIRY_DURATION_MS; +import static feast.serving.service.BigQueryServingService.createTempTableName; import static feast.serving.store.bigquery.QueryTemplater.createTimestampLimitQuery; import com.google.auto.value.AutoValue; @@ -27,6 +29,8 @@ import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobInfo; import com.google.cloud.bigquery.QueryJobConfiguration; +import com.google.cloud.bigquery.TableId; +import com.google.cloud.bigquery.TableInfo; import com.google.cloud.bigquery.TableResult; import com.google.cloud.storage.Blob; import com.google.cloud.storage.Storage; @@ -179,10 +183,13 @@ Job runBatchQuery(List featureSetQueries) for (int i = 0; i < featureSetQueries.size(); i++) { QueryJobConfiguration queryJobConfig = - QueryJobConfiguration.newBuilder(featureSetQueries.get(i)).build(); + QueryJobConfiguration.newBuilder(featureSetQueries.get(i)) + .setDestinationTable(TableId.of(projectId(), datasetId(), createTempTableName())) + .build(); Job subqueryJob = bigquery().create(JobInfo.of(queryJobConfig)); executorCompletionService.submit( SubqueryCallable.builder() + .setBigquery(bigquery()) .setFeatureSetInfo(featureSetInfos().get(i)) .setSubqueryJob(subqueryJob) .build()); @@ -214,10 +221,21 @@ Job runBatchQuery(List featureSetQueries) String joinQuery = QueryTemplater.createJoinQuery( featureSetInfos, entityTableColumnNames(), entityTableName()); - QueryJobConfiguration queryJobConfig = QueryJobConfiguration.newBuilder(joinQuery).build(); + QueryJobConfiguration queryJobConfig = + QueryJobConfiguration.newBuilder(joinQuery) + .setDestinationTable(TableId.of(projectId(), datasetId(), createTempTableName())) + .build(); queryJob = bigquery().create(JobInfo.of(queryJobConfig)); queryJob.waitFor(); + TableInfo expiry = + bigquery() + .getTable(queryJobConfig.getDestinationTable()) + .toBuilder() + .setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS) + .build(); + bigquery().update(expiry); + return queryJob; } @@ -248,10 +266,18 @@ private FieldValueList getTimestampLimits(String entityTableName) { QueryJobConfiguration getTimestampLimitsQuery = QueryJobConfiguration.newBuilder(createTimestampLimitQuery(entityTableName)) .setDefaultDataset(DatasetId.of(projectId(), datasetId())) + .setDestinationTable(TableId.of(projectId(), datasetId(), createTempTableName())) .build(); try { Job job = bigquery().create(JobInfo.of(getTimestampLimitsQuery)); TableResult getTimestampLimitsQueryResult = job.waitFor().getQueryResults(); + TableInfo expiry = + bigquery() + .getTable(getTimestampLimitsQuery.getDestinationTable()) + .toBuilder() + .setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS) + .build(); + bigquery().update(expiry); FieldValueList result = null; for (FieldValueList fields : getTimestampLimitsQueryResult.getValues()) { result = fields; diff --git a/serving/src/main/java/feast/serving/store/bigquery/QueryTemplater.java b/serving/src/main/java/feast/serving/store/bigquery/QueryTemplater.java index d0e6db67486..e3f1138db89 100644 --- a/serving/src/main/java/feast/serving/store/bigquery/QueryTemplater.java +++ b/serving/src/main/java/feast/serving/store/bigquery/QueryTemplater.java @@ -22,7 +22,8 @@ import com.mitchellbosecke.pebble.template.PebbleTemplate; import feast.core.FeatureSetProto.EntitySpec; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.serving.ServingAPIProto.FeatureSetRequest; +import feast.serving.ServingAPIProto.FeatureReference; +import feast.serving.specs.FeatureSetRequest; import feast.serving.store.bigquery.model.FeatureSetInfo; import java.io.IOException; import java.io.StringWriter; @@ -66,36 +67,30 @@ public static String createEntityTableUUIDQuery(String leftTableName) { * Generate the information necessary for the sql templating for point in time correctness join to * the entity dataset for each feature set requested. * - * @param featureSetSpecs List of feature set specs requested - * @param featureSetRequests List of feature set requests from the batch retrieval request + * @param featureSetRequests List of feature sets requested * @return List of FeatureSetInfos */ - public static List getFeatureSetInfos( - List featureSetSpecs, List featureSetRequests) + public static List getFeatureSetInfos(List featureSetRequests) throws IllegalArgumentException { - if (featureSetRequests.size() != featureSetSpecs.size()) { - throw new IllegalArgumentException( - "Number of feature sets not matching number of feature set requests"); - } - List featureSetInfos = new ArrayList<>(); - - for (int i = 0; i < featureSetRequests.size(); i++) { - FeatureSetSpec spec = featureSetSpecs.get(i); - FeatureSetRequest request = featureSetRequests.get(i); - Duration maxAge = getMaxAge(request, spec); + for (FeatureSetRequest featureSetRequest : featureSetRequests) { + FeatureSetSpec spec = featureSetRequest.getSpec(); + Duration maxAge = spec.getMaxAge(); List fsEntities = spec.getEntitiesList().stream().map(EntitySpec::getName).collect(Collectors.toList()); - String id = String.format("%s:%s", spec.getName(), spec.getVersion()); + List features = + featureSetRequest.getFeatureReferences().stream() + .map(FeatureReference::getName) + .collect(Collectors.toList()); featureSetInfos.add( new FeatureSetInfo( - id, + spec.getProject(), spec.getName(), spec.getVersion(), maxAge.getSeconds(), fsEntities, - request.getFeatureNamesList(), + features, "")); } return featureSetInfos; @@ -159,13 +154,6 @@ public static String createJoinQuery( return writer.toString(); } - private static Duration getMaxAge(FeatureSetRequest featureSet, FeatureSetSpec featureSetSpec) { - if (featureSet.getMaxAge() == Duration.getDefaultInstance()) { - return featureSetSpec.getMaxAge(); - } - return featureSet.getMaxAge(); - } - public static String generateFullTableName(TableId tableId) { return String.format( "%s.%s.%s", tableId.getProject(), tableId.getDataset(), tableId.getTable()); diff --git a/serving/src/main/java/feast/serving/store/bigquery/SubqueryCallable.java b/serving/src/main/java/feast/serving/store/bigquery/SubqueryCallable.java index 3c28194e7a3..e0b8f457986 100644 --- a/serving/src/main/java/feast/serving/store/bigquery/SubqueryCallable.java +++ b/serving/src/main/java/feast/serving/store/bigquery/SubqueryCallable.java @@ -16,13 +16,16 @@ */ package feast.serving.store.bigquery; +import static feast.serving.service.BigQueryServingService.TEMP_TABLE_EXPIRY_DURATION_MS; import static feast.serving.store.bigquery.QueryTemplater.generateFullTableName; import com.google.auto.value.AutoValue; +import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQueryException; import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.TableId; +import com.google.cloud.bigquery.TableInfo; import feast.serving.store.bigquery.model.FeatureSetInfo; import java.util.concurrent.Callable; @@ -33,6 +36,8 @@ @AutoValue public abstract class SubqueryCallable implements Callable { + public abstract BigQuery bigquery(); + public abstract FeatureSetInfo featureSetInfo(); public abstract Job subqueryJob(); @@ -44,6 +49,8 @@ public static Builder builder() { @AutoValue.Builder public abstract static class Builder { + public abstract Builder setBigquery(BigQuery bigquery); + public abstract Builder setFeatureSetInfo(FeatureSetInfo featureSetInfo); public abstract Builder setSubqueryJob(Job subqueryJob); @@ -57,6 +64,15 @@ public FeatureSetInfo call() throws BigQueryException, InterruptedException { subqueryJob().waitFor(); subqueryConfig = subqueryJob().getConfiguration(); TableId destinationTable = subqueryConfig.getDestinationTable(); + + TableInfo expiry = + bigquery() + .getTable(destinationTable) + .toBuilder() + .setExpirationTime(System.currentTimeMillis() + TEMP_TABLE_EXPIRY_DURATION_MS) + .build(); + bigquery().update(expiry); + String fullTablePath = generateFullTableName(destinationTable); return new FeatureSetInfo(featureSetInfo(), fullTablePath); diff --git a/serving/src/main/java/feast/serving/store/bigquery/model/FeatureSetInfo.java b/serving/src/main/java/feast/serving/store/bigquery/model/FeatureSetInfo.java index ddda1bf6a78..77c80ead0ea 100644 --- a/serving/src/main/java/feast/serving/store/bigquery/model/FeatureSetInfo.java +++ b/serving/src/main/java/feast/serving/store/bigquery/model/FeatureSetInfo.java @@ -20,7 +20,7 @@ public class FeatureSetInfo { - private final String id; + private final String project; private final String name; private final int version; private final long maxAge; @@ -29,14 +29,14 @@ public class FeatureSetInfo { private final String table; public FeatureSetInfo( - String id, + String project, String name, int version, long maxAge, List entities, List features, String table) { - this.id = id; + this.project = project; this.name = name; this.version = version; this.maxAge = maxAge; @@ -47,7 +47,7 @@ public FeatureSetInfo( public FeatureSetInfo(FeatureSetInfo featureSetInfo, String table) { - this.id = featureSetInfo.getId(); + this.project = featureSetInfo.getProject(); this.name = featureSetInfo.getName(); this.version = featureSetInfo.getVersion(); this.maxAge = featureSetInfo.getMaxAge(); @@ -56,8 +56,8 @@ public FeatureSetInfo(FeatureSetInfo featureSetInfo, String table) { this.table = table; } - public String getId() { - return id; + public String getProject() { + return project; } public String getName() { diff --git a/serving/src/main/java/feast/serving/util/Metrics.java b/serving/src/main/java/feast/serving/util/Metrics.java index ffd6d1a0d68..99f6353e742 100644 --- a/serving/src/main/java/feast/serving/util/Metrics.java +++ b/serving/src/main/java/feast/serving/util/Metrics.java @@ -23,10 +23,10 @@ public class Metrics { public static final Histogram requestLatency = Histogram.build() - .buckets(2, 4, 6, 8, 10, 15, 20, 25, 30, 35, 50) + .buckets(0.001, 0.002, 0.004, 0.006, 0.008, 0.01, 0.015, 0.02, 0.025, 0.03, 0.035, 0.05) .name("request_latency_ms") .subsystem("feast_serving") - .help("Request latency in milliseconds.") + .help("Request latency in seconds.") .labelNames("method") .register(); @@ -35,7 +35,7 @@ public class Metrics { .name("request_feature_count") .subsystem("feast_serving") .help("number of feature rows requested") - .labelNames("feature_set_name") + .labelNames("project", "feature_name") .register(); public static final Counter missingKeyCount = @@ -43,7 +43,7 @@ public class Metrics { .name("missing_feature_count") .subsystem("feast_serving") .help("number requested feature rows that were not found") - .labelNames("feature_set_name") + .labelNames("project", "feature_name") .register(); public static final Counter staleKeyCount = @@ -51,6 +51,6 @@ public class Metrics { .name("stale_feature_count") .subsystem("feast_serving") .help("number requested feature rows that were stale") - .labelNames("feature_set_name") + .labelNames("project", "feature_name") .register(); } diff --git a/serving/src/main/java/feast/serving/util/RefUtil.java b/serving/src/main/java/feast/serving/util/RefUtil.java new file mode 100644 index 00000000000..74de3e65620 --- /dev/null +++ b/serving/src/main/java/feast/serving/util/RefUtil.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright 2018-2019 The Feast Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package feast.serving.util; + +import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.serving.ServingAPIProto.FeatureReference; + +public class RefUtil { + public static String generateFeatureStringRef(FeatureReference featureReference) { + String ref = String.format("%s/%s", featureReference.getProject(), featureReference.getName()); + if (featureReference.getVersion() > 0) { + return ref + String.format(":%d", featureReference.getVersion()); + } + return ref; + } + + public static String generateFeatureSetStringRef(FeatureSetSpec featureSetSpec) { + String ref = String.format("%s/%s", featureSetSpec.getProject(), featureSetSpec.getName()); + if (featureSetSpec.getVersion() > 0) { + return ref + String.format(":%d", featureSetSpec.getVersion()); + } + return ref; + } +} diff --git a/serving/src/main/java/feast/serving/util/RequestHelper.java b/serving/src/main/java/feast/serving/util/RequestHelper.java index 4127b6afef8..e6e8e8629a1 100644 --- a/serving/src/main/java/feast/serving/util/RequestHelper.java +++ b/serving/src/main/java/feast/serving/util/RequestHelper.java @@ -16,9 +16,12 @@ */ package feast.serving.util; +import feast.serving.ServingAPIProto.FeatureReference; import feast.serving.ServingAPIProto.GetBatchFeaturesRequest; import feast.serving.ServingAPIProto.GetOnlineFeaturesRequest; import io.grpc.Status; +import java.util.Set; +import java.util.stream.Collectors; public class RequestHelper { @@ -43,5 +46,15 @@ public static void validateBatchRequest(GetBatchFeaturesRequest getFeaturesReque .withDescription("Dataset source must be provided: only file source supported") .asRuntimeException(); } + + Set uniqueFeatureNames = + getFeaturesRequest.getFeaturesList().stream() + .map(FeatureReference::getName) + .collect(Collectors.toSet()); + if (uniqueFeatureNames.size() != getFeaturesRequest.getFeaturesList().size()) { + throw Status.INVALID_ARGUMENT + .withDescription("Feature names must be unique within the request") + .asRuntimeException(); + } } } diff --git a/serving/src/main/resources/templates/join_featuresets.sql b/serving/src/main/resources/templates/join_featuresets.sql index f913a63dbd4..e57b0c10314 100644 --- a/serving/src/main/resources/templates/join_featuresets.sql +++ b/serving/src/main/resources/templates/join_featuresets.sql @@ -5,7 +5,7 @@ LEFT JOIN ( SELECT uuid, {% for featureName in featureSet.features %} - {{ featureSet.name }}_v{{ featureSet.version }}_{{ featureName }}{% if loop.last %}{% else %}, {% endif %} + {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM `{{ featureSet.table }}` ) USING (uuid) @@ -15,7 +15,7 @@ LEFT JOIN ( {{ entities | join(', ') }} {% for featureSet in featureSets %} {% for featureName in featureSet.features %} - ,{{ featureSet.name }}_v{{ featureSet.version }}_{{ featureName }} + ,{{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }} as {{ featureName }} {% endfor %} {% endfor %} FROM joined \ No newline at end of file diff --git a/serving/src/main/resources/templates/single_featureset_pit_join.sql b/serving/src/main/resources/templates/single_featureset_pit_join.sql index 38fb67db42f..c83735660f8 100644 --- a/serving/src/main/resources/templates/single_featureset_pit_join.sql +++ b/serving/src/main/resources/templates/single_featureset_pit_join.sql @@ -1,7 +1,7 @@ WITH union_features AS (SELECT uuid, event_timestamp, - NULL as {{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, + NULL as {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, NULL as created_timestamp, {{ featureSet.entities | join(', ')}}, true AS is_entity_table @@ -10,18 +10,18 @@ UNION ALL SELECT NULL as uuid, event_timestamp, - event_timestamp as {{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, + event_timestamp as {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, created_timestamp, {{ featureSet.entities | join(', ')}}, false AS is_entity_table -FROM `{{projectId}}.{{datasetId}}.{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) +FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) ) SELECT uuid, event_timestamp, {{ featureSet.entities | join(', ')}}, {% for featureName in featureSet.features %} - IF(event_timestamp >= {{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp AND Timestamp_sub(event_timestamp, interval {{ featureSet.maxAge }} second) < {{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, {{ featureSet.name }}_v{{ featureSet.version }}_{{ featureName }}, NULL) as {{ featureSet.name }}_v{{ featureSet.version }}_{{ featureName }}{% if loop.last %}{% else %}, {% endif %} + IF(event_timestamp >= {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp AND Timestamp_sub(event_timestamp, interval {{ featureSet.maxAge }} second) < {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}, NULL) as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} {% endfor %} FROM ( SELECT @@ -29,19 +29,19 @@ SELECT event_timestamp, {{ featureSet.entities | join(', ')}}, FIRST_VALUE(created_timestamp IGNORE NULLS) over w AS created_timestamp, - FIRST_VALUE({{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp IGNORE NULLS) over w AS {{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, + FIRST_VALUE({{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp IGNORE NULLS) over w AS {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, is_entity_table FROM union_features WINDOW w AS (PARTITION BY {{ featureSet.entities | join(', ') }} ORDER BY event_timestamp DESC, is_entity_table DESC, created_timestamp DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) ) LEFT JOIN ( SELECT - event_timestamp as {{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, + event_timestamp as {{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, created_timestamp, {{ featureSet.entities | join(', ')}}, {% for featureName in featureSet.features %} - {{ featureName }} as {{ featureSet.name }}_v{{ featureSet.version }}_{{ featureName }}{% if loop.last %}{% else %}, {% endif %} + {{ featureName }} as {{ featureSet.project }}_{{ featureName }}_v{{ featureSet.version }}{% if loop.last %}{% else %}, {% endif %} {% endfor %} -FROM `{{projectId}}.{{datasetId}}.{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) -) USING ({{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, created_timestamp, {{ featureSet.entities | join(', ')}}) +FROM `{{projectId}}.{{datasetId}}.{{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}` WHERE event_timestamp <= '{{maxTimestamp}}' AND event_timestamp >= Timestamp_sub(TIMESTAMP '{{ minTimestamp }}', interval {{ featureSet.maxAge }} second) +) USING ({{ featureSet.project }}_{{ featureSet.name }}_v{{ featureSet.version }}_feature_timestamp, created_timestamp, {{ featureSet.entities | join(', ')}}) WHERE is_entity_table \ No newline at end of file diff --git a/serving/src/test/java/feast/serving/controller/ServingServiceGRpcControllerTest.java b/serving/src/test/java/feast/serving/controller/ServingServiceGRpcControllerTest.java index 6dfc54ec2b1..f2c51bc7dde 100644 --- a/serving/src/test/java/feast/serving/controller/ServingServiceGRpcControllerTest.java +++ b/serving/src/test/java/feast/serving/controller/ServingServiceGRpcControllerTest.java @@ -18,10 +18,9 @@ import static org.mockito.MockitoAnnotations.initMocks; -import com.google.common.collect.Lists; import com.google.protobuf.Timestamp; import feast.serving.FeastProperties; -import feast.serving.ServingAPIProto.FeatureSetRequest; +import feast.serving.ServingAPIProto.FeatureReference; import feast.serving.ServingAPIProto.GetOnlineFeaturesRequest; import feast.serving.ServingAPIProto.GetOnlineFeaturesRequest.EntityRow; import feast.serving.ServingAPIProto.GetOnlineFeaturesResponse; @@ -52,11 +51,17 @@ public void setUp() { validRequest = GetOnlineFeaturesRequest.newBuilder() - .addFeatureSets( - FeatureSetRequest.newBuilder() - .setName("featureSet") + .addFeatures( + FeatureReference.newBuilder() + .setName("feature1") .setVersion(1) - .addAllFeatureNames(Lists.newArrayList("feature1", "feature2")) + .setProject("project") + .build()) + .addFeatures( + FeatureReference.newBuilder() + .setName("feature2") + .setVersion(1) + .setProject("project") .build()) .addEntityRows( EntityRow.newBuilder() diff --git a/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java b/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java index 5bd2038f2be..abeb44bd731 100644 --- a/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java +++ b/serving/src/test/java/feast/serving/service/CachedSpecServiceTest.java @@ -17,7 +17,8 @@ package feast.serving.service; import static org.hamcrest.CoreMatchers.equalTo; -import static org.junit.Assert.*; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.junit.Assert.assertThat; import static org.mockito.Mockito.when; import static org.mockito.MockitoAnnotations.initMocks; @@ -28,14 +29,20 @@ import feast.core.CoreServiceProto.UpdateStoreResponse; import feast.core.FeatureSetProto; import feast.core.FeatureSetProto.FeatureSetSpec; +import feast.core.FeatureSetProto.FeatureSpec; import feast.core.StoreProto.Store; import feast.core.StoreProto.Store.RedisConfig; import feast.core.StoreProto.Store.StoreType; import feast.core.StoreProto.Store.Subscription; +import feast.serving.ServingAPIProto.FeatureReference; +import feast.serving.specs.CachedSpecService; +import feast.serving.specs.CoreSpecService; +import feast.serving.specs.FeatureSetRequest; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -70,10 +77,12 @@ public void setUp() throws IOException { + " host: localhost\n" + " port: 6379\n" + "subscriptions:\n" - + "- name: fs1\n" - + " version: \">0\"\n" - + "- name: fs2\n" - + " version: \">0\""; + + "- project: project\n" + + " name: fs1\n" + + " version: \"*\"\n" + + "- project: project\n" + + " name: fs2\n" + + " version: \"*\""; BufferedWriter writer = new BufferedWriter(new FileWriter(configFile)); writer.write(yamlString); writer.close(); @@ -83,17 +92,49 @@ public void setUp() throws IOException { .setName("SERVING") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) - .addSubscriptions(Subscription.newBuilder().setName("fs1").setVersion(">0").build()) - .addSubscriptions(Subscription.newBuilder().setName("fs2").setVersion(">0").build()) + .addSubscriptions( + Subscription.newBuilder() + .setProject("project") + .setName("fs1") + .setVersion("*") + .build()) + .addSubscriptions( + Subscription.newBuilder() + .setProject("project") + .setName("fs2") + .setVersion("*") + .build()) .build(); when(coreService.updateStore(UpdateStoreRequest.newBuilder().setStore(store).build())) .thenReturn(UpdateStoreResponse.newBuilder().setStore(store).build()); featureSetSpecs = new LinkedHashMap<>(); - featureSetSpecs.put("fs1:1", FeatureSetSpec.newBuilder().setName("fs1").setVersion(1).build()); - featureSetSpecs.put("fs1:2", FeatureSetSpec.newBuilder().setName("fs1").setVersion(2).build()); - featureSetSpecs.put("fs2:1", FeatureSetSpec.newBuilder().setName("fs2").setVersion(1).build()); + featureSetSpecs.put( + "fs1:1", + FeatureSetSpec.newBuilder() + .setProject("project") + .setName("fs1") + .setVersion(1) + .addFeatures(FeatureSpec.newBuilder().setName("feature")) + .build()); + featureSetSpecs.put( + "fs1:2", + FeatureSetSpec.newBuilder() + .setProject("project") + .setName("fs1") + .setVersion(2) + .addFeatures(FeatureSpec.newBuilder().setName("feature")) + .addFeatures(FeatureSpec.newBuilder().setName("feature2")) + .build()); + featureSetSpecs.put( + "fs2:1", + FeatureSetSpec.newBuilder() + .setProject("project") + .setName("fs2") + .setVersion(1) + .addFeatures(FeatureSpec.newBuilder().setName("feature3")) + .build()); List fs1FeatureSets = Lists.newArrayList( @@ -106,8 +147,9 @@ public void setUp() throws IOException { ListFeatureSetsRequest.newBuilder() .setFilter( ListFeatureSetsRequest.Filter.newBuilder() + .setProject("project") .setFeatureSetName("fs1") - .setFeatureSetVersion(">0") + .setFeatureSetVersion("*") .build()) .build())) .thenReturn(ListFeatureSetsResponse.newBuilder().addAllFeatureSets(fs1FeatureSets).build()); @@ -115,8 +157,9 @@ public void setUp() throws IOException { ListFeatureSetsRequest.newBuilder() .setFilter( ListFeatureSetsRequest.Filter.newBuilder() + .setProject("project") .setFeatureSetName("fs2") - .setFeatureSetVersion(">0") + .setFeatureSetVersion("*") .build()) .build())) .thenReturn(ListFeatureSetsResponse.newBuilder().addAllFeatureSets(fs2FeatureSets).build()); @@ -139,8 +182,108 @@ public void shouldPopulateAndReturnStore() { @Test public void shouldPopulateAndReturnFeatureSets() { cachedSpecService.populateCache(); - assertThat(cachedSpecService.getFeatureSet("fs1", 1), equalTo(featureSetSpecs.get("fs1:1"))); - assertThat(cachedSpecService.getFeatureSet("fs1", 2), equalTo(featureSetSpecs.get("fs1:2"))); - assertThat(cachedSpecService.getFeatureSet("fs2", 1), equalTo(featureSetSpecs.get("fs2:1"))); + FeatureReference frv1 = + FeatureReference.newBuilder() + .setProject("project") + .setName("feature") + .setVersion(1) + .build(); + FeatureReference frv2 = + FeatureReference.newBuilder() + .setProject("project") + .setName("feature") + .setVersion(2) + .build(); + + assertThat( + cachedSpecService.getFeatureSets(Collections.singletonList(frv1)), + equalTo( + Lists.newArrayList( + FeatureSetRequest.newBuilder() + .addFeatureReference(frv1) + .setSpec(featureSetSpecs.get("fs1:1")) + .build()))); + assertThat( + cachedSpecService.getFeatureSets(Collections.singletonList(frv2)), + equalTo( + Lists.newArrayList( + FeatureSetRequest.newBuilder() + .addFeatureReference(frv2) + .setSpec(featureSetSpecs.get("fs1:2")) + .build()))); + } + + @Test + public void shouldPopulateAndReturnLatestFeatureSetIfVersionsNotSupplied() { + cachedSpecService.populateCache(); + FeatureReference frv1 = + FeatureReference.newBuilder().setProject("project").setName("feature").build(); + + assertThat( + cachedSpecService.getFeatureSets(Collections.singletonList(frv1)), + equalTo( + Lists.newArrayList( + FeatureSetRequest.newBuilder() + .addFeatureReference(frv1) + .setSpec(featureSetSpecs.get("fs1:2")) + .build()))); + } + + @Test + public void shouldPopulateAndReturnFeatureSetsGivenFeaturesFromDifferentFeatureSets() { + cachedSpecService.populateCache(); + FeatureReference frv1 = + FeatureReference.newBuilder() + .setProject("project") + .setName("feature") + .setVersion(1) + .build(); + FeatureReference fr3 = + FeatureReference.newBuilder() + .setProject("project") + .setName("feature3") + .setVersion(1) + .build(); + + assertThat( + cachedSpecService.getFeatureSets(Lists.newArrayList(frv1, fr3)), + containsInAnyOrder( + Lists.newArrayList( + FeatureSetRequest.newBuilder() + .addFeatureReference(frv1) + .setSpec(featureSetSpecs.get("fs1:1")) + .build(), + FeatureSetRequest.newBuilder() + .addFeatureReference(fr3) + .setSpec(featureSetSpecs.get("fs2:1")) + .build()) + .toArray())); + } + + @Test + public void shouldPopulateAndReturnFeatureSetGivenFeaturesFromSameFeatureSet() { + cachedSpecService.populateCache(); + FeatureReference fr1 = + FeatureReference.newBuilder() + .setProject("project") + .setName("feature") + .setVersion(2) + .build(); + FeatureReference fr2 = + FeatureReference.newBuilder() + .setProject("project") + .setName("feature2") + .setVersion(2) + .build(); + + assertThat( + cachedSpecService.getFeatureSets(Lists.newArrayList(fr1, fr2)), + equalTo( + Lists.newArrayList( + FeatureSetRequest.newBuilder() + .addFeatureReference(fr1) + .addFeatureReference(fr2) + .setSpec(featureSetSpecs.get("fs1:2")) + .build()))); } } diff --git a/serving/src/test/java/feast/serving/service/RedisServingServiceTest.java b/serving/src/test/java/feast/serving/service/RedisServingServiceTest.java index 890699db6d1..042107e1177 100644 --- a/serving/src/test/java/feast/serving/service/RedisServingServiceTest.java +++ b/serving/src/test/java/feast/serving/service/RedisServingServiceTest.java @@ -27,17 +27,20 @@ import com.google.protobuf.Timestamp; import feast.core.FeatureSetProto.EntitySpec; import feast.core.FeatureSetProto.FeatureSetSpec; -import feast.serving.ServingAPIProto.FeatureSetRequest; +import feast.serving.ServingAPIProto.FeatureReference; import feast.serving.ServingAPIProto.GetOnlineFeaturesRequest; import feast.serving.ServingAPIProto.GetOnlineFeaturesRequest.EntityRow; import feast.serving.ServingAPIProto.GetOnlineFeaturesResponse; import feast.serving.ServingAPIProto.GetOnlineFeaturesResponse.FieldValues; +import feast.serving.specs.CachedSpecService; +import feast.serving.specs.FeatureSetRequest; import feast.storage.RedisProto.RedisKey; import feast.types.FeatureRowProto.FeatureRow; import feast.types.FieldProto.Field; import feast.types.ValueProto.Value; import io.opentracing.Tracer; import io.opentracing.Tracer.SpanBuilder; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -70,14 +73,14 @@ public void setUp() { redisKeyList = Lists.newArrayList( RedisKey.newBuilder() - .setFeatureSet("featureSet:1") + .setFeatureSet("project/featureSet:1") .addAllEntities( Lists.newArrayList( Field.newBuilder().setName("entity1").setValue(intValue(1)).build(), Field.newBuilder().setName("entity2").setValue(strValue("a")).build())) .build(), RedisKey.newBuilder() - .setFeatureSet("featureSet:1") + .setFeatureSet("project/featureSet:1") .addAllEntities( Lists.newArrayList( Field.newBuilder().setName("entity1").setValue(intValue(2)).build(), @@ -93,11 +96,17 @@ public void setUp() { public void shouldReturnResponseWithValuesIfKeysPresent() { GetOnlineFeaturesRequest request = GetOnlineFeaturesRequest.newBuilder() - .addFeatureSets( - FeatureSetRequest.newBuilder() - .setName("featureSet") + .addFeatures( + FeatureReference.newBuilder() + .setName("feature1") .setVersion(1) - .addAllFeatureNames(Lists.newArrayList("feature1", "feature2")) + .setProject("project") + .build()) + .addFeatures( + FeatureReference.newBuilder() + .setName("feature2") + .setVersion(1) + .setProject("project") .build()) .addEntityRows( EntityRow.newBuilder() @@ -134,9 +143,16 @@ public void shouldReturnResponseWithValuesIfKeysPresent() { .setFeatureSet("featureSet:1") .build()); + FeatureSetRequest featureSetRequest = + FeatureSetRequest.newBuilder() + .addAllFeatureReferences(request.getFeaturesList()) + .setSpec(getFeatureSetSpec()) + .build(); + List featureRowBytes = featureRows.stream().map(AbstractMessageLite::toByteArray).collect(Collectors.toList()); - when(specService.getFeatureSet("featureSet", 1)).thenReturn(getFeatureSetSpec()); + when(specService.getFeatureSets(request.getFeaturesList())) + .thenReturn(Collections.singletonList(featureSetRequest)); when(jedisPool.getResource()).thenReturn(jedis); when(jedis.mget(redisKeyList)).thenReturn(featureRowBytes); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); @@ -147,14 +163,14 @@ public void shouldReturnResponseWithValuesIfKeysPresent() { FieldValues.newBuilder() .putFields("entity1", intValue(1)) .putFields("entity2", strValue("a")) - .putFields("featureSet:1:feature1", intValue(1)) - .putFields("featureSet:1:feature2", intValue(1))) + .putFields("project/feature1:1", intValue(1)) + .putFields("project/feature2:1", intValue(1))) .addFieldValues( FieldValues.newBuilder() .putFields("entity1", intValue(2)) .putFields("entity2", strValue("b")) - .putFields("featureSet:1:feature1", intValue(2)) - .putFields("featureSet:1:feature2", intValue(2))) + .putFields("project/feature1:1", intValue(2)) + .putFields("project/feature2:1", intValue(2))) .build(); GetOnlineFeaturesResponse actual = redisServingService.getOnlineFeatures(request); assertThat( @@ -165,11 +181,17 @@ public void shouldReturnResponseWithValuesIfKeysPresent() { public void shouldReturnResponseWithValuesWhenFeatureSetSpecHasUnspecifiedMaxAge() { GetOnlineFeaturesRequest request = GetOnlineFeaturesRequest.newBuilder() - .addFeatureSets( - FeatureSetRequest.newBuilder() - .setName("featureSet") + .addFeatures( + FeatureReference.newBuilder() + .setName("feature1") + .setVersion(1) + .setProject("project") + .build()) + .addFeatures( + FeatureReference.newBuilder() + .setName("feature2") .setVersion(1) - .addAllFeatureNames(Lists.newArrayList("feature1", "feature2")) + .setProject("project") .build()) .addEntityRows( EntityRow.newBuilder() @@ -206,9 +228,16 @@ public void shouldReturnResponseWithValuesWhenFeatureSetSpecHasUnspecifiedMaxAge .setFeatureSet("featureSet:1") .build()); + FeatureSetRequest featureSetRequest = + FeatureSetRequest.newBuilder() + .addAllFeatureReferences(request.getFeaturesList()) + .setSpec(getFeatureSetSpecWithNoMaxAge()) + .build(); + List featureRowBytes = featureRows.stream().map(AbstractMessageLite::toByteArray).collect(Collectors.toList()); - when(specService.getFeatureSet("featureSet", 1)).thenReturn(getFeatureSetSpecWithNoMaxAge()); + when(specService.getFeatureSets(request.getFeaturesList())) + .thenReturn(Collections.singletonList(featureSetRequest)); when(jedisPool.getResource()).thenReturn(jedis); when(jedis.mget(redisKeyList)).thenReturn(featureRowBytes); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); @@ -219,14 +248,14 @@ public void shouldReturnResponseWithValuesWhenFeatureSetSpecHasUnspecifiedMaxAge FieldValues.newBuilder() .putFields("entity1", intValue(1)) .putFields("entity2", strValue("a")) - .putFields("featureSet:1:feature1", intValue(1)) - .putFields("featureSet:1:feature2", intValue(1))) + .putFields("project/feature1:1", intValue(1)) + .putFields("project/feature2:1", intValue(1))) .addFieldValues( FieldValues.newBuilder() .putFields("entity1", intValue(2)) .putFields("entity2", strValue("b")) - .putFields("featureSet:1:feature1", intValue(2)) - .putFields("featureSet:1:feature2", intValue(2))) + .putFields("project/feature1:1", intValue(2)) + .putFields("project/feature2:1", intValue(2))) .build(); GetOnlineFeaturesResponse actual = redisServingService.getOnlineFeatures(request); assertThat( @@ -234,16 +263,17 @@ public void shouldReturnResponseWithValuesWhenFeatureSetSpecHasUnspecifiedMaxAge } @Test - public void shouldReturnResponseWithUnsetValuesIfKeysNotPresent() { - // some keys not present, should have empty values + public void shouldReturnKeysWithoutVersionifNotProvided() { GetOnlineFeaturesRequest request = GetOnlineFeaturesRequest.newBuilder() - .addFeatureSets( - FeatureSetRequest.newBuilder() - .setName("featureSet") + .addFeatures( + FeatureReference.newBuilder() + .setName("feature1") .setVersion(1) - .addAllFeatureNames(Lists.newArrayList("feature1", "feature2")) + .setProject("project") .build()) + .addFeatures( + FeatureReference.newBuilder().setName("feature2").setProject("project").build()) .addEntityRows( EntityRow.newBuilder() .setEntityTimestamp(Timestamp.newBuilder().setSeconds(100)) @@ -269,18 +299,26 @@ public void shouldReturnResponseWithUnsetValuesIfKeysNotPresent() { .setFeatureSet("featureSet:1") .build(), FeatureRow.newBuilder() - .setEventTimestamp(Timestamp.newBuilder()) + .setEventTimestamp(Timestamp.newBuilder().setSeconds(100)) .addAllFields( Lists.newArrayList( Field.newBuilder().setName("entity1").setValue(intValue(2)).build(), Field.newBuilder().setName("entity2").setValue(strValue("b")).build(), - Field.newBuilder().setName("feature1").build(), - Field.newBuilder().setName("feature2").build())) + Field.newBuilder().setName("feature1").setValue(intValue(2)).build(), + Field.newBuilder().setName("feature2").setValue(intValue(2)).build())) .setFeatureSet("featureSet:1") .build()); - List featureRowBytes = Lists.newArrayList(featureRows.get(0).toByteArray(), null); - when(specService.getFeatureSet("featureSet", 1)).thenReturn(getFeatureSetSpec()); + FeatureSetRequest featureSetRequest = + FeatureSetRequest.newBuilder() + .addAllFeatureReferences(request.getFeaturesList()) + .setSpec(getFeatureSetSpec()) + .build(); + + List featureRowBytes = + featureRows.stream().map(AbstractMessageLite::toByteArray).collect(Collectors.toList()); + when(specService.getFeatureSets(request.getFeaturesList())) + .thenReturn(Collections.singletonList(featureSetRequest)); when(jedisPool.getResource()).thenReturn(jedis); when(jedis.mget(redisKeyList)).thenReturn(featureRowBytes); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); @@ -291,14 +329,14 @@ public void shouldReturnResponseWithUnsetValuesIfKeysNotPresent() { FieldValues.newBuilder() .putFields("entity1", intValue(1)) .putFields("entity2", strValue("a")) - .putFields("featureSet:1:feature1", intValue(1)) - .putFields("featureSet:1:feature2", intValue(1))) + .putFields("project/feature1:1", intValue(1)) + .putFields("project/feature2", intValue(1))) .addFieldValues( FieldValues.newBuilder() .putFields("entity1", intValue(2)) .putFields("entity2", strValue("b")) - .putFields("featureSet:1:feature1", Value.newBuilder().build()) - .putFields("featureSet:1:feature2", Value.newBuilder().build())) + .putFields("project/feature1:1", intValue(2)) + .putFields("project/feature2", intValue(2))) .build(); GetOnlineFeaturesResponse actual = redisServingService.getOnlineFeatures(request); assertThat( @@ -306,16 +344,21 @@ public void shouldReturnResponseWithUnsetValuesIfKeysNotPresent() { } @Test - public void shouldReturnResponseWithUnsetValuesIfMaxAgeIsExceeded() { - // keys present, but too stale comp. to maxAge set in request + public void shouldReturnResponseWithUnsetValuesIfKeysNotPresent() { + // some keys not present, should have empty values GetOnlineFeaturesRequest request = GetOnlineFeaturesRequest.newBuilder() - .addFeatureSets( - FeatureSetRequest.newBuilder() - .setName("featureSet") + .addFeatures( + FeatureReference.newBuilder() + .setName("feature1") + .setVersion(1) + .setProject("project") + .build()) + .addFeatures( + FeatureReference.newBuilder() + .setName("feature2") .setVersion(1) - .setMaxAge(Duration.newBuilder().setSeconds(10)) - .addAllFeatureNames(Lists.newArrayList("feature1", "feature2")) + .setProject("project") .build()) .addEntityRows( EntityRow.newBuilder() @@ -342,20 +385,25 @@ public void shouldReturnResponseWithUnsetValuesIfMaxAgeIsExceeded() { .setFeatureSet("featureSet:1") .build(), FeatureRow.newBuilder() - .setEventTimestamp( - Timestamp.newBuilder().setSeconds(50)) // this value should be nulled + .setEventTimestamp(Timestamp.newBuilder()) .addAllFields( Lists.newArrayList( Field.newBuilder().setName("entity1").setValue(intValue(2)).build(), Field.newBuilder().setName("entity2").setValue(strValue("b")).build(), - Field.newBuilder().setName("feature1").setValue(intValue(2)).build(), - Field.newBuilder().setName("feature2").setValue(intValue(2)).build())) + Field.newBuilder().setName("feature1").build(), + Field.newBuilder().setName("feature2").build())) .setFeatureSet("featureSet:1") .build()); - List featureRowBytes = - featureRows.stream().map(AbstractMessageLite::toByteArray).collect(Collectors.toList()); - when(specService.getFeatureSet("featureSet", 1)).thenReturn(getFeatureSetSpec()); + FeatureSetRequest featureSetRequest = + FeatureSetRequest.newBuilder() + .addAllFeatureReferences(request.getFeaturesList()) + .setSpec(getFeatureSetSpec()) + .build(); + + List featureRowBytes = Lists.newArrayList(featureRows.get(0).toByteArray(), null); + when(specService.getFeatureSets(request.getFeaturesList())) + .thenReturn(Collections.singletonList(featureSetRequest)); when(jedisPool.getResource()).thenReturn(jedis); when(jedis.mget(redisKeyList)).thenReturn(featureRowBytes); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); @@ -366,14 +414,14 @@ public void shouldReturnResponseWithUnsetValuesIfMaxAgeIsExceeded() { FieldValues.newBuilder() .putFields("entity1", intValue(1)) .putFields("entity2", strValue("a")) - .putFields("featureSet:1:feature1", intValue(1)) - .putFields("featureSet:1:feature2", intValue(1))) + .putFields("project/feature1:1", intValue(1)) + .putFields("project/feature2:1", intValue(1))) .addFieldValues( FieldValues.newBuilder() .putFields("entity1", intValue(2)) .putFields("entity2", strValue("b")) - .putFields("featureSet:1:feature1", Value.newBuilder().build()) - .putFields("featureSet:1:feature2", Value.newBuilder().build())) + .putFields("project/feature1:1", Value.newBuilder().build()) + .putFields("project/feature2:1", Value.newBuilder().build())) .build(); GetOnlineFeaturesResponse actual = redisServingService.getOnlineFeatures(request); assertThat( @@ -381,15 +429,21 @@ public void shouldReturnResponseWithUnsetValuesIfMaxAgeIsExceeded() { } @Test - public void shouldReturnResponseWithUnsetValuesIfDefaultMaxAgeIsExceeded() { - // keys present, but too stale comp. to maxAge set in featureSetSpec + public void shouldReturnResponseWithUnsetValuesIfMaxAgeIsExceeded() { + // keys present, but too stale comp. to maxAge GetOnlineFeaturesRequest request = GetOnlineFeaturesRequest.newBuilder() - .addFeatureSets( - FeatureSetRequest.newBuilder() - .setName("featureSet") + .addFeatures( + FeatureReference.newBuilder() + .setName("feature1") + .setVersion(1) + .setProject("project") + .build()) + .addFeatures( + FeatureReference.newBuilder() + .setName("feature2") .setVersion(1) - .addAllFeatureNames(Lists.newArrayList("feature1", "feature2")) + .setProject("project") .build()) .addEntityRows( EntityRow.newBuilder() @@ -417,7 +471,7 @@ public void shouldReturnResponseWithUnsetValuesIfDefaultMaxAgeIsExceeded() { .build(), FeatureRow.newBuilder() .setEventTimestamp( - Timestamp.newBuilder().setSeconds(0)) // this value should be nulled + Timestamp.newBuilder().setSeconds(50)) // this value should be nulled .addAllFields( Lists.newArrayList( Field.newBuilder().setName("entity1").setValue(intValue(2)).build(), @@ -427,9 +481,18 @@ public void shouldReturnResponseWithUnsetValuesIfDefaultMaxAgeIsExceeded() { .setFeatureSet("featureSet:1") .build()); + FeatureSetSpec spec = + getFeatureSetSpec().toBuilder().setMaxAge(Duration.newBuilder().setSeconds(1)).build(); + FeatureSetRequest featureSetRequest = + FeatureSetRequest.newBuilder() + .addAllFeatureReferences(request.getFeaturesList()) + .setSpec(spec) + .build(); + List featureRowBytes = featureRows.stream().map(AbstractMessageLite::toByteArray).collect(Collectors.toList()); - when(specService.getFeatureSet("featureSet", 1)).thenReturn(getFeatureSetSpec()); + when(specService.getFeatureSets(request.getFeaturesList())) + .thenReturn(Collections.singletonList(featureSetRequest)); when(jedisPool.getResource()).thenReturn(jedis); when(jedis.mget(redisKeyList)).thenReturn(featureRowBytes); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); @@ -440,14 +503,14 @@ public void shouldReturnResponseWithUnsetValuesIfDefaultMaxAgeIsExceeded() { FieldValues.newBuilder() .putFields("entity1", intValue(1)) .putFields("entity2", strValue("a")) - .putFields("featureSet:1:feature1", intValue(1)) - .putFields("featureSet:1:feature2", intValue(1))) + .putFields("project/feature1:1", intValue(1)) + .putFields("project/feature2:1", intValue(1))) .addFieldValues( FieldValues.newBuilder() .putFields("entity1", intValue(2)) .putFields("entity2", strValue("b")) - .putFields("featureSet:1:feature1", Value.newBuilder().build()) - .putFields("featureSet:1:feature2", Value.newBuilder().build())) + .putFields("project/feature1:1", Value.newBuilder().build()) + .putFields("project/feature2:1", Value.newBuilder().build())) .build(); GetOnlineFeaturesResponse actual = redisServingService.getOnlineFeatures(request); assertThat( @@ -459,11 +522,11 @@ public void shouldFilterOutUndesiredRows() { // requested rows less than the rows available in the featureset GetOnlineFeaturesRequest request = GetOnlineFeaturesRequest.newBuilder() - .addFeatureSets( - FeatureSetRequest.newBuilder() - .setName("featureSet") + .addFeatures( + FeatureReference.newBuilder() + .setName("feature1") .setVersion(1) - .addAllFeatureNames(Lists.newArrayList("feature1")) + .setProject("project") .build()) .addEntityRows( EntityRow.newBuilder() @@ -500,9 +563,16 @@ public void shouldFilterOutUndesiredRows() { .setFeatureSet("featureSet:1") .build()); + FeatureSetRequest featureSetRequest = + FeatureSetRequest.newBuilder() + .addAllFeatureReferences(request.getFeaturesList()) + .setSpec(getFeatureSetSpec()) + .build(); + List featureRowBytes = featureRows.stream().map(AbstractMessageLite::toByteArray).collect(Collectors.toList()); - when(specService.getFeatureSet("featureSet", 1)).thenReturn(getFeatureSetSpec()); + when(specService.getFeatureSets(request.getFeaturesList())) + .thenReturn(Collections.singletonList(featureSetRequest)); when(jedisPool.getResource()).thenReturn(jedis); when(jedis.mget(redisKeyList)).thenReturn(featureRowBytes); when(tracer.buildSpan(ArgumentMatchers.any())).thenReturn(Mockito.mock(SpanBuilder.class)); @@ -513,12 +583,12 @@ public void shouldFilterOutUndesiredRows() { FieldValues.newBuilder() .putFields("entity1", intValue(1)) .putFields("entity2", strValue("a")) - .putFields("featureSet:1:feature1", intValue(1))) + .putFields("project/feature1:1", intValue(1))) .addFieldValues( FieldValues.newBuilder() .putFields("entity1", intValue(2)) .putFields("entity2", strValue("b")) - .putFields("featureSet:1:feature1", intValue(2))) + .putFields("project/feature1:1", intValue(2))) .build(); GetOnlineFeaturesResponse actual = redisServingService.getOnlineFeatures(request); assertThat( @@ -541,6 +611,9 @@ private Value strValue(String val) { private FeatureSetSpec getFeatureSetSpec() { return FeatureSetSpec.newBuilder() + .setProject("project") + .setName("featureSet") + .setVersion(1) .addEntities(EntitySpec.newBuilder().setName("entity1")) .addEntities(EntitySpec.newBuilder().setName("entity2")) .setMaxAge(Duration.newBuilder().setSeconds(30)) // default @@ -549,6 +622,9 @@ private FeatureSetSpec getFeatureSetSpec() { private FeatureSetSpec getFeatureSetSpecWithNoMaxAge() { return FeatureSetSpec.newBuilder() + .setProject("project") + .setName("featureSet") + .setVersion(1) .addEntities(EntitySpec.newBuilder().setName("entity1")) .addEntities(EntitySpec.newBuilder().setName("entity2")) .setMaxAge(Duration.newBuilder().setSeconds(0).setNanos(0).build()) diff --git a/serving/src/test/java/feast/serving/util/mappers/YamlToProtoMapperTest.java b/serving/src/test/java/feast/serving/util/mappers/YamlToProtoMapperTest.java index 9437aa03334..6f95f5307b2 100644 --- a/serving/src/test/java/feast/serving/util/mappers/YamlToProtoMapperTest.java +++ b/serving/src/test/java/feast/serving/util/mappers/YamlToProtoMapperTest.java @@ -37,15 +37,17 @@ public void shouldConvertYamlToProto() throws IOException { + " host: localhost\n" + " port: 6379\n" + "subscriptions:\n" - + "- name: \"*\"\n" - + " version: \">0\"\n"; + + "- project: \"*\"\n" + + " name: \"*\"\n" + + " version: \"*\"\n"; Store store = YamlToProtoMapper.yamlToStoreProto(yaml); Store expected = Store.newBuilder() .setName("test") .setType(StoreType.REDIS) .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379)) - .addSubscriptions(Subscription.newBuilder().setName("*").setVersion(">0")) + .addSubscriptions( + Subscription.newBuilder().setProject("*").setName("*").setVersion("*")) .build(); assertThat(store, equalTo(expected)); } diff --git a/tests/e2e/all_types_parquet/all_types_parquet.yaml b/tests/e2e/all_types_parquet/all_types_parquet.yaml index cf5ea702357..2043b6b473d 100644 --- a/tests/e2e/all_types_parquet/all_types_parquet.yaml +++ b/tests/e2e/all_types_parquet/all_types_parquet.yaml @@ -5,28 +5,28 @@ spec: - name: customer_id valueType: INT64 features: - - name: int32_feature + - name: int32_feature_parquet valueType: INT64 - - name: int64_feature + - name: int64_feature_parquet valueType: INT64 - - name: float_feature + - name: float_feature_parquet valueType: DOUBLE - - name: double_feature + - name: double_feature_parquet valueType: DOUBLE - - name: string_feature + - name: string_feature_parquet valueType: STRING - - name: bytes_feature + - name: bytes_feature_parquet valueType: BYTES - - name: int32_list_feature + - name: int32_list_feature_parquet valueType: INT64_LIST - - name: int64_list_feature + - name: int64_list_feature_parquet valueType: INT64_LIST - - name: float_list_feature + - name: float_list_feature_parquet valueType: DOUBLE_LIST - - name: double_list_feature + - name: double_list_feature_parquet valueType: DOUBLE_LIST - - name: string_list_feature + - name: string_list_feature_parquet valueType: STRING_LIST - - name: bytes_list_feature + - name: bytes_list_feature_parquet valueType: BYTES_LIST maxAge: 0s diff --git a/tests/e2e/basic-ingest-redis-serving.py b/tests/e2e/basic-ingest-redis-serving.py index f674363f36b..1aeccfa5a3a 100644 --- a/tests/e2e/basic-ingest-redis-serving.py +++ b/tests/e2e/basic-ingest-redis-serving.py @@ -20,9 +20,10 @@ import tempfile import os from feast.feature import Feature +import uuid FLOAT_TOLERANCE = 0.00001 - +PROJECT_NAME = 'basic_' + uuid.uuid4().hex.upper()[0:6] @pytest.fixture(scope='module') def core_url(pytestconfig): @@ -44,6 +45,8 @@ def allow_dirty(pytestconfig): def client(core_url, serving_url, allow_dirty): # Get client for core and serving client = Client(core_url=core_url, serving_url=serving_url) + client.create_project(PROJECT_NAME) + client.set_project(PROJECT_NAME) # Ensure Feast core is active, but empty if not allow_dirty: @@ -76,13 +79,11 @@ def test_basic_register_feature_set_success(client): # Load feature set from file cust_trans_fs_expected = FeatureSet.from_yaml("basic/cust_trans_fs.yaml") + client.set_project(PROJECT_NAME) + # Register feature set client.apply(cust_trans_fs_expected) - # Feast Core needs some time to fully commit the FeatureSet applied - # when there is no existing job yet for the Featureset - time.sleep(15) - cust_trans_fs_actual = client.get_feature_set(name="customer_transactions") assert cust_trans_fs_actual == cust_trans_fs_expected @@ -99,10 +100,13 @@ def test_basic_register_feature_set_success(client): @pytest.mark.timeout(300) @pytest.mark.run(order=11) def test_basic_ingest_success(client, basic_dataframe): + client.set_project(PROJECT_NAME) + cust_trans_fs = client.get_feature_set(name="customer_transactions") # Ingest customer transaction data client.ingest(cust_trans_fs, basic_dataframe) + time.sleep(5) @pytest.mark.timeout(45) @@ -112,6 +116,8 @@ def test_basic_retrieve_online_success(client, basic_dataframe): while True: time.sleep(1) + client.set_project(PROJECT_NAME) + response = client.get_online_features( entity_rows=[ GetOnlineFeaturesRequest.EntityRow( @@ -122,9 +128,9 @@ def test_basic_retrieve_online_success(client, basic_dataframe): } ) ], - feature_ids=[ - "customer_transactions:1:daily_transactions", - "customer_transactions:1:total_transactions", + feature_refs=[ + "daily_transactions", + "total_transactions", ], ) # type: GetOnlineFeaturesResponse @@ -133,7 +139,7 @@ def test_basic_retrieve_online_success(client, basic_dataframe): returned_daily_transactions = float( response.field_values[0] - .fields["customer_transactions:1:daily_transactions"] + .fields[PROJECT_NAME + "/daily_transactions"] .float_val ) sent_daily_transactions = float( @@ -216,6 +222,7 @@ def test_all_types_register_feature_set_success(client): Feature(name="bytes_feature", dtype=ValueType.BYTES), Feature(name="bool_feature", dtype=ValueType.BOOL), Feature(name="double_feature", dtype=ValueType.DOUBLE), + Feature(name="double_list_feature", dtype=ValueType.DOUBLE_LIST), Feature(name="float_list_feature", dtype=ValueType.FLOAT_LIST), Feature(name="int64_list_feature", dtype=ValueType.INT64_LIST), Feature(name="int32_list_feature", dtype=ValueType.INT32_LIST), @@ -256,7 +263,7 @@ def test_all_types_ingest_success(client, all_types_dataframe): client.ingest(all_types_fs, all_types_dataframe) -@pytest.mark.timeout(15) +@pytest.mark.timeout(45) @pytest.mark.run(order=22) def test_all_types_retrieve_online_success(client, all_types_dataframe): # Poll serving for feature values until the correct values are returned @@ -270,29 +277,30 @@ def test_all_types_retrieve_online_success(client, all_types_dataframe): int64_val=all_types_dataframe.iloc[0]["user_id"])} ) ], - feature_ids=[ - "all_types:1:float_feature", - "all_types:1:int64_feature", - "all_types:1:int32_feature", - "all_types:1:string_feature", - "all_types:1:bytes_feature", - "all_types:1:bool_feature", - "all_types:1:double_feature", - "all_types:1:float_list_feature", - "all_types:1:int64_list_feature", - "all_types:1:int32_list_feature", - "all_types:1:string_list_feature", - "all_types:1:bytes_list_feature", - "all_types:1:double_list_feature", + feature_refs=[ + "float_feature", + "int64_feature", + "int32_feature", + "string_feature", + "bytes_feature", + "bool_feature", + "double_feature", + "float_list_feature", + "int64_list_feature", + "int32_list_feature", + "string_list_feature", + "bytes_list_feature", + "double_list_feature", ], ) # type: GetOnlineFeaturesResponse if response is None: continue + returned_float_list = ( response.field_values[0] - .fields["all_types:1:float_list_feature"] + .fields[PROJECT_NAME+"/float_list_feature"] .float_list_val.val ) @@ -315,8 +323,8 @@ def large_volume_dataframe(): range(ROW_COUNT) ], "customer_id": [offset + inc for inc in range(ROW_COUNT)], - "daily_transactions": [np.random.rand() for _ in range(ROW_COUNT)], - "total_transactions": [256 for _ in range(ROW_COUNT)], + "daily_transactions_large": [np.random.rand() for _ in range(ROW_COUNT)], + "total_transactions_large": [256 for _ in range(ROW_COUNT)], } ) return customer_data @@ -376,9 +384,9 @@ def test_large_volume_retrieve_online_success(client, large_volume_dataframe): } ) ], - feature_ids=[ - "customer_transactions_large:1:daily_transactions", - "customer_transactions_large:1:total_transactions", + feature_refs=[ + "daily_transactions_large", + "total_transactions_large", ], ) # type: GetOnlineFeaturesResponse @@ -387,11 +395,11 @@ def test_large_volume_retrieve_online_success(client, large_volume_dataframe): returned_daily_transactions = float( response.field_values[0] - .fields["customer_transactions_large:1:daily_transactions"] + .fields[PROJECT_NAME + "/daily_transactions_large"] .float_val ) sent_daily_transactions = float( - large_volume_dataframe.iloc[0]["daily_transactions"]) + large_volume_dataframe.iloc[0]["daily_transactions_large"]) if math.isclose( sent_daily_transactions, @@ -410,42 +418,42 @@ def all_types_parquet_file(): "datetime": [datetime.utcnow() for _ in range(COUNT)], "customer_id": [np.int32(random.randint(0, 10000)) for _ in range(COUNT)], - "int32_feature": [np.int32(random.randint(0, 10000)) for _ in + "int32_feature_parquet": [np.int32(random.randint(0, 10000)) for _ in range(COUNT)], - "int64_feature": [np.int64(random.randint(0, 10000)) for _ in + "int64_feature_parquet": [np.int64(random.randint(0, 10000)) for _ in range(COUNT)], - "float_feature": [np.float(random.random()) for _ in range(COUNT)], - "double_feature": [np.float64(random.random()) for _ in + "float_feature_parquet": [np.float(random.random()) for _ in range(COUNT)], + "double_feature_parquet": [np.float64(random.random()) for _ in range(COUNT)], - "string_feature": ["one" + str(random.random()) for _ in + "string_feature_parquet": ["one" + str(random.random()) for _ in range(COUNT)], - "bytes_feature": [b"one" for _ in range(COUNT)], - "int32_list_feature": [ + "bytes_feature_parquet": [b"one" for _ in range(COUNT)], + "int32_list_feature_parquet": [ np.array([1, 2, 3, random.randint(0, 10000)], dtype=np.int32) for _ in range(COUNT) ], - "int64_list_feature": [ + "int64_list_feature_parquet": [ np.array([1, random.randint(0, 10000), 3, 4], dtype=np.int64) for _ in range(COUNT) ], - "float_list_feature": [ + "float_list_feature_parquet": [ np.array([1.1, 1.2, 1.3, random.random()], dtype=np.float32) for _ in range(COUNT) ], - "double_list_feature": [ + "double_list_feature_parquet": [ np.array([1.1, 1.2, 1.3, random.random()], dtype=np.float64) for _ in range(COUNT) ], - "string_list_feature": [ + "string_list_feature_parquet": [ np.array(["one", "two" + str(random.random()), "three"]) for _ in range(COUNT) ], - "bytes_list_feature": [ + "bytes_list_feature_parquet": [ np.array([b"one", b"two", b"three"]) for _ in range(COUNT) ], } diff --git a/tests/e2e/bq-batch-retrieval.py b/tests/e2e/bq-batch-retrieval.py index 3458eb4740b..8616dd37a92 100644 --- a/tests/e2e/bq-batch-retrieval.py +++ b/tests/e2e/bq-batch-retrieval.py @@ -4,6 +4,7 @@ from datetime import timedelta from urllib.parse import urlparse +import uuid import numpy as np import pandas as pd import pytest @@ -17,6 +18,10 @@ from google.protobuf.duration_pb2 import Duration from pandavro import to_avro +pd.set_option('display.max_columns', None) + +PROJECT_NAME = 'batch_' + uuid.uuid4().hex.upper()[0:6] + @pytest.fixture(scope="module") def core_url(pytestconfig): @@ -42,6 +47,8 @@ def gcs_path(pytestconfig): def client(core_url, serving_url, allow_dirty): # Get client for core and serving client = Client(core_url=core_url, serving_url=serving_url) + client.create_project(PROJECT_NAME) + client.set_project(PROJECT_NAME) # Ensure Feast core is active, but empty if not allow_dirty: @@ -51,16 +58,68 @@ def client(core_url, serving_url, allow_dirty): return client +@pytest.mark.first +def test_apply_all_featuresets(client): + client.set_project(PROJECT_NAME) -def test_get_batch_features_with_file(client): file_fs1 = FeatureSet( - "file_feature_set", - features=[Feature("feature_value", ValueType.STRING)], - entities=[Entity("entity_id", ValueType.INT64)], + "file_feature_set", + features=[Feature("feature_value1", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + client.apply(file_fs1) + + gcs_fs1 = FeatureSet( + "gcs_feature_set", + features=[Feature("feature_value2", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + client.apply(gcs_fs1) + + proc_time_fs = FeatureSet( + "processing_time", + features=[Feature("feature_value3", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + client.apply(proc_time_fs) + + add_cols_fs = FeatureSet( + "additional_columns", + features=[Feature("feature_value4", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + client.apply(add_cols_fs) + + historical_fs = FeatureSet( + "historical", + features=[Feature("feature_value5", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + client.apply(historical_fs) + + fs1 = FeatureSet( + "feature_set_1", + features=[Feature("feature_value6", ValueType.STRING)], + entities=[Entity("entity_id", ValueType.INT64)], + max_age=Duration(seconds=100), + ) + + fs2 = FeatureSet( + "feature_set_2", + features=[Feature("other_feature_value7", ValueType.INT64)], + entities=[Entity("other_entity_id", ValueType.INT64)], max_age=Duration(seconds=100), ) + client.apply(fs1) + client.apply(fs2) - client.apply(file_fs1) + +def test_get_batch_features_with_file(client): file_fs1 = client.get_feature_set(name="file_feature_set", version=1) N_ROWS = 10 @@ -69,7 +128,7 @@ def test_get_batch_features_with_file(client): { "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], - "feature_value": [f"{i}" for i in range(N_ROWS)], + "feature_value1": [f"{i}" for i in range(N_ROWS)], } ) client.ingest(file_fs1, features_1_df) @@ -77,27 +136,20 @@ def test_get_batch_features_with_file(client): # Rename column (datetime -> event_timestamp) features_1_df = features_1_df.rename(columns={"datetime": "event_timestamp"}) - to_avro(df=features_1_df, file_path_or_buffer="file_feature_set.avro") + to_avro(df=features_1_df[["event_timestamp", "entity_id"]], file_path_or_buffer="file_feature_set.avro") + time.sleep(15) feature_retrieval_job = client.get_batch_features( - entity_rows="file://file_feature_set.avro", feature_ids=["file_feature_set:1:feature_value"] + entity_rows="file://file_feature_set.avro", feature_refs=[f"{PROJECT_NAME}/feature_value1:1"] ) output = feature_retrieval_job.to_dataframe() print(output.head()) - assert output["entity_id"].to_list() == [int(i) for i in output["file_feature_set_v1_feature_value"].to_list()] + assert output["entity_id"].to_list() == [int(i) for i in output["feature_value1"].to_list()] def test_get_batch_features_with_gs_path(client, gcs_path): - gcs_fs1 = FeatureSet( - "gcs_feature_set", - features=[Feature("feature_value", ValueType.STRING)], - entities=[Entity("entity_id", ValueType.INT64)], - max_age=Duration(seconds=100), - ) - - client.apply(gcs_fs1) gcs_fs1 = client.get_feature_set(name="gcs_feature_set", version=1) N_ROWS = 10 @@ -106,7 +158,7 @@ def test_get_batch_features_with_gs_path(client, gcs_path): { "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], - "feature_value": [f"{i}" for i in range(N_ROWS)], + "feature_value2": [f"{i}" for i in range(N_ROWS)], } ) client.ingest(gcs_fs1, features_1_df) @@ -116,7 +168,7 @@ def test_get_batch_features_with_gs_path(client, gcs_path): # Output file to local file_name = "gcs_feature_set.avro" - to_avro(df=features_1_df, file_path_or_buffer=file_name) + to_avro(df=features_1_df[["event_timestamp", "entity_id"]], file_path_or_buffer=file_name) uri = urlparse(gcs_path) bucket = uri.hostname @@ -129,26 +181,19 @@ def test_get_batch_features_with_gs_path(client, gcs_path): blob = bucket.blob(remote_path) blob.upload_from_filename(file_name) + time.sleep(15) feature_retrieval_job = client.get_batch_features( entity_rows=f"{gcs_path}{ts}/*", - feature_ids=["gcs_feature_set:1:feature_value"] + feature_refs=[f"{PROJECT_NAME}/feature_value2:1"] ) output = feature_retrieval_job.to_dataframe() print(output.head()) - assert output["entity_id"].to_list() == [int(i) for i in output["gcs_feature_set_v1_feature_value"].to_list()] + assert output["entity_id"].to_list() == [int(i) for i in output["feature_value2"].to_list()] def test_order_by_creation_time(client): - proc_time_fs = FeatureSet( - "processing_time", - features=[Feature("feature_value", ValueType.STRING)], - entities=[Entity("entity_id", ValueType.INT64)], - max_age=Duration(seconds=100), - ) - client.apply(proc_time_fs) - time.sleep(10) proc_time_fs = client.get_feature_set(name="processing_time", version=1) time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) @@ -157,43 +202,35 @@ def test_order_by_creation_time(client): { "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], - "feature_value": ["WRONG"] * N_ROWS, + "feature_value3": ["WRONG"] * N_ROWS, } ) correct_df = pd.DataFrame( { "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], - "feature_value": ["CORRECT"] * N_ROWS, + "feature_value3": ["CORRECT"] * N_ROWS, } ) client.ingest(proc_time_fs, incorrect_df) - time.sleep(10) + time.sleep(15) client.ingest(proc_time_fs, correct_df) feature_retrieval_job = client.get_batch_features( - entity_rows=incorrect_df[["datetime", "entity_id"]], feature_ids=["processing_time:1:feature_value"] + entity_rows=incorrect_df[["datetime", "entity_id"]], feature_refs=[f"{PROJECT_NAME}/feature_value3:1"] ) output = feature_retrieval_job.to_dataframe() print(output.head()) - assert output["processing_time_v1_feature_value"].to_list() == ["CORRECT"] * N_ROWS + assert output["feature_value3"].to_list() == ["CORRECT"] * N_ROWS def test_additional_columns_in_entity_table(client): - add_cols_fs = FeatureSet( - "additional_columns", - features=[Feature("feature_value", ValueType.STRING)], - entities=[Entity("entity_id", ValueType.INT64)], - max_age=Duration(seconds=100), - ) - client.apply(add_cols_fs) - time.sleep(10) add_cols_fs = client.get_feature_set(name="additional_columns", version=1) N_ROWS = 10 time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) features_df = pd.DataFrame( - {"datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value": ["abc"] * N_ROWS} + {"datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], "feature_value4": ["abc"] * N_ROWS} ) client.ingest(add_cols_fs, features_df) @@ -205,26 +242,20 @@ def test_additional_columns_in_entity_table(client): "additional_float_col": [random.random() for i in range(N_ROWS)], } ) + + time.sleep(15) feature_retrieval_job = client.get_batch_features( - entity_rows=entity_df, feature_ids=["additional_columns:1:feature_value"] + entity_rows=entity_df, feature_refs=[f"{PROJECT_NAME}/feature_value4:1"] ) - output = feature_retrieval_job.to_dataframe() - print(output.head()) + output = feature_retrieval_job.to_dataframe().sort_values(by=["entity_id"]) + print(output.head(10)) assert np.allclose(output["additional_float_col"], entity_df["additional_float_col"]) assert output["additional_string_col"].to_list() == entity_df["additional_string_col"].to_list() - assert output["additional_columns_v1_feature_value"].to_list() == features_df["feature_value"].to_list() + assert output["feature_value4"].to_list() == features_df["feature_value4"].to_list() def test_point_in_time_correctness_join(client): - historical_fs = FeatureSet( - "historical", - features=[Feature("feature_value", ValueType.STRING)], - entities=[Entity("entity_id", ValueType.INT64)], - max_age=Duration(seconds=100), - ) - client.apply(historical_fs) - time.sleep(10) historical_fs = client.get_feature_set(name="historical", version=1) time_offset = datetime.utcnow().replace(tzinfo=pytz.utc) @@ -238,7 +269,7 @@ def test_point_in_time_correctness_join(client): ] * N_EXAMPLES, "entity_id": [i for i in range(N_EXAMPLES) for _ in range(3)], - "feature_value": ["WRONG", "WRONG", "CORRECT"] * N_EXAMPLES, + "feature_value5": ["WRONG", "WRONG", "CORRECT"] * N_EXAMPLES, } ) entity_df = pd.DataFrame( @@ -247,32 +278,16 @@ def test_point_in_time_correctness_join(client): client.ingest(historical_fs, historical_df) - feature_retrieval_job = client.get_batch_features(entity_rows=entity_df, feature_ids=["historical:1:feature_value"]) + time.sleep(15) + feature_retrieval_job = client.get_batch_features(entity_rows=entity_df, feature_refs=[f"{PROJECT_NAME}/feature_value5"]) output = feature_retrieval_job.to_dataframe() print(output.head()) - assert output["historical_v1_feature_value"].to_list() == ["CORRECT"] * N_EXAMPLES + assert output["feature_value5"].to_list() == ["CORRECT"] * N_EXAMPLES def test_multiple_featureset_joins(client): - fs1 = FeatureSet( - "feature_set_1", - features=[Feature("feature_value", ValueType.STRING)], - entities=[Entity("entity_id", ValueType.INT64)], - max_age=Duration(seconds=100), - ) - - fs2 = FeatureSet( - "feature_set_2", - features=[Feature("other_feature_value", ValueType.INT64)], - entities=[Entity("other_entity_id", ValueType.INT64)], - max_age=Duration(seconds=100), - ) - - client.apply(fs1) fs1 = client.get_feature_set(name="feature_set_1", version=1) - - client.apply(fs2) fs2 = client.get_feature_set(name="feature_set_2", version=1) N_ROWS = 10 @@ -281,7 +296,7 @@ def test_multiple_featureset_joins(client): { "datetime": [time_offset] * N_ROWS, "entity_id": [i for i in range(N_ROWS)], - "feature_value": [f"{i}" for i in range(N_ROWS)], + "feature_value6": [f"{i}" for i in range(N_ROWS)], } ) client.ingest(fs1, features_1_df) @@ -290,7 +305,7 @@ def test_multiple_featureset_joins(client): { "datetime": [time_offset] * N_ROWS, "other_entity_id": [i for i in range(N_ROWS)], - "other_feature_value": [i for i in range(N_ROWS)], + "other_feature_value7": [i for i in range(N_ROWS)], } ) client.ingest(fs2, features_2_df) @@ -302,11 +317,13 @@ def test_multiple_featureset_joins(client): "other_entity_id": [N_ROWS - 1 - i for i in range(N_ROWS)], } ) + + time.sleep(15) feature_retrieval_job = client.get_batch_features( - entity_rows=entity_df, feature_ids=["feature_set_1:1:feature_value", "feature_set_2:1:other_feature_value"] + entity_rows=entity_df, feature_refs=[f"{PROJECT_NAME}/feature_value6:1", f"{PROJECT_NAME}/other_feature_value7:1"] ) output = feature_retrieval_job.to_dataframe() print(output.head()) - assert output["entity_id"].to_list() == [int(i) for i in output["feature_set_1_v1_feature_value"].to_list()] - assert output["other_entity_id"].to_list() == output["feature_set_2_v1_other_feature_value"].to_list() + assert output["entity_id"].to_list() == [int(i) for i in output["feature_value6"].to_list()] + assert output["other_entity_id"].to_list() == output["other_feature_value7"].to_list() diff --git a/tests/e2e/large_volume/cust_trans_large_fs.yaml b/tests/e2e/large_volume/cust_trans_large_fs.yaml index 54bf4cac28e..7f361513927 100644 --- a/tests/e2e/large_volume/cust_trans_large_fs.yaml +++ b/tests/e2e/large_volume/cust_trans_large_fs.yaml @@ -5,8 +5,8 @@ spec: - name: customer_id valueType: INT64 features: - - name: daily_transactions + - name: daily_transactions_large valueType: FLOAT - - name: total_transactions + - name: total_transactions_large valueType: FLOAT maxAge: 3600s From 9324b7bc84de5a0f2d928660a4d540f3d6bfba96 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Fri, 27 Dec 2019 18:06:23 +0800 Subject: [PATCH 17/18] Use fixed 'dev' revision for test-e2e-batch (#395) --- .prow/scripts/test-end-to-end-batch.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.prow/scripts/test-end-to-end-batch.sh b/.prow/scripts/test-end-to-end-batch.sh index ac282a0c33a..ee7885b009b 100755 --- a/.prow/scripts/test-end-to-end-batch.sh +++ b/.prow/scripts/test-end-to-end-batch.sh @@ -3,6 +3,8 @@ set -e set -o pipefail +export REVISION=dev + if ! cat /etc/*release | grep -q stretch; then echo ${BASH_SOURCE} only supports Debian stretch. echo Please change your operating system to use this script. @@ -90,7 +92,7 @@ Building jars for Feast --output-dir /root/ # Build jars for Feast -mvn --quiet --batch-mode --define skipTests=true clean package +mvn --quiet --batch-mode --define skipTests=true --define revision=$REVISION clean package echo " ============================================================ @@ -142,7 +144,7 @@ management: enabled: false EOF -nohup java -jar core/target/feast-core-0.3.2-SNAPSHOT.jar \ +nohup java -jar core/target/feast-core-$REVISION.jar \ --spring.config.location=file:///tmp/core.application.yml \ &> /var/log/feast-core.log & sleep 35 @@ -196,7 +198,7 @@ spring: web-environment: false EOF -nohup java -jar serving/target/feast-serving-0.3.2-SNAPSHOT.jar \ +nohup java -jar serving/target/feast-serving-$REVISION.jar \ --spring.config.location=file:///tmp/serving.warehouse.application.yml \ &> /var/log/feast-serving-warehouse.log & sleep 15 From 00d5c5423580811ddd6d0f25562833ea5b74f3b7 Mon Sep 17 00:00:00 2001 From: Willem Pienaar Date: Sat, 28 Dec 2019 05:24:52 +0000 Subject: [PATCH 18/18] GitBook: [master] one page and one asset modified --- docs/SUMMARY.md | 1 + docs/docs/.gitbook/assets/basic-architecture-diagram.svg | 1 + 2 files changed, 2 insertions(+) create mode 100644 docs/docs/.gitbook/assets/basic-architecture-diagram.svg diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index 6eeec90fc41..964dfaf34b7 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -13,4 +13,5 @@ ## Reference * [Python SDK](https://api.docs.feast.dev/python/) +* [Go SDK](https://godoc.org/github.com/gojek/feast/sdk/go) diff --git a/docs/docs/.gitbook/assets/basic-architecture-diagram.svg b/docs/docs/.gitbook/assets/basic-architecture-diagram.svg new file mode 100644 index 00000000000..b707f490461 --- /dev/null +++ b/docs/docs/.gitbook/assets/basic-architecture-diagram.svg @@ -0,0 +1 @@ + \ No newline at end of file