msnidal
diff --git a/‎protos/feast/core/JobService.proto‎
Lines changed: 21 additions & 48 deletions b/‎protos/feast/core/JobService.proto‎
Lines changed: 21 additions & 48 deletions
diff --git a/‎sdk/python/feast/cli.py‎
Lines changed: 17 additions & 2 deletions b/‎sdk/python/feast/cli.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎sdk/python/feast/client.py‎
Lines changed: 51 additions & 11 deletions b/‎sdk/python/feast/client.py‎
Lines changed: 51 additions & 11 deletions
diff --git a/‎sdk/python/feast/constants.py‎
Lines changed: 5 additions & 1 deletion b/‎sdk/python/feast/constants.py‎
Lines changed: 5 additions & 1 deletion
@@ -23,7 +23,7 @@ option java_package = "feast.proto.core";
 
 import "google/protobuf/timestamp.proto";
 import "feast/core/DataSource.proto";
-import "feast/serving/ServingService.proto";
+
 
 service JobService {
     // Start job to ingest data from offline store into online store
@@ -38,8 +38,8 @@ service JobService {
     // List all types of jobs
     rpc ListJobs (ListJobsRequest) returns (ListJobsResponse);
 
-    // Stop a single job
-    rpc StopJob (StopJobRequest) returns (StopJobResponse);
+    // Cancel a single job
+    rpc CancelJob (CancelJobRequest) returns (CancelJobResponse);
 
     // Get details of a single job
     rpc GetJob (GetJobRequest) returns (GetJobResponse);
@@ -48,9 +48,9 @@ service JobService {
 
 enum JobType {
 	INVALID_JOB = 0;
-	OFFLINE_TO_ONLINE_JOB = 1;
-	STREAM_TO_ONLINE_JOB = 2;
-	EXPORT_JOB = 4;
+	BATCH_INGESTION_JOB = 1;
+	STREAM_INGESTION_JOB = 2;
+	RETRIEVAL_JOB = 4;
 }
 
 enum JobStatus {
@@ -68,42 +68,26 @@ enum JobStatus {
 message Job {
   // Identifier of the Job
   string id = 1;
-  // External Identifier of the Job assigned by the Spark executor
-  string external_id = 2;
   // Type of the Job
-  JobType type = 3;
+  JobType type = 2;
   // Current job status
-  JobStatus status = 4;
-  // Timestamp on when the job was is created
-  google.protobuf.Timestamp created_timestamp = 5;
-  // Timestamp on when the job has stopped.
-  google.protobuf.Timestamp stop_timestamp = 6;
-
-  message ExportJobMeta {
-    // Glob of the exported files that should be retrieved to reconstruct
-    // the dataframe with retrieved features.
-    repeated string file_glob = 1;
-    // The Historical Features request that triggered this export job
-    GetHistoricalFeaturesRequest request = 2;
+  JobStatus status = 3;
+
+  message RetrievalJobMeta {
+    string output_location = 4;
   }
 
   message OfflineToOnlineMeta {
-    // Reference to the Feature Table being populated by this job
-    string project = 1;
-    string table_name = 2;
   }
 
   message StreamToOnlineMeta {
-    // Reference to the Feature Table being populated by this job
-    string project = 1;
-    string table_name = 2;
   }
 
   // JobType specific metadata on the job
   oneof meta {
-    ExportJobMeta export = 7;
-    OfflineToOnlineMeta offline_to_online = 8;
-    StreamToOnlineMeta stream_to_online = 9;
+    RetrievalJobMeta retrieval = 5;
+    OfflineToOnlineMeta batch_ingestion = 6;
+    StreamToOnlineMeta stream_ingestion = 7;
   }
 }
 
@@ -127,13 +111,13 @@ message StartOfflineToOnlineIngestionJobResponse {
 
 message GetHistoricalFeaturesRequest {
   // List of features that are being retrieved
-  repeated feast.serving.FeatureReferenceV2 features = 1;
+  repeated string feature_refs = 1;
 
   // Batch DataSource that can be used to obtain entity values for historical retrieval.
   // For each entity value, a feature value will be retrieved for that value/timestamp
   // Only 'BATCH_*' source types are supported.
   // Currently only BATCH_FILE source type is supported.
-  DataSource entities_source = 2;
+  DataSource entity_source = 2;
 
   // Optional field to specify project name override. If specified, uses the
   // given project for retrieval. Overrides the projects specified in
@@ -143,12 +127,13 @@ message GetHistoricalFeaturesRequest {
   // Specifies the path in a bucket to write the exported feature data files
   // Export to AWS S3 - s3://path/to/features
   // Export to GCP GCS -  gs://path/to/features
-  string destination_path = 4;
+  string output_location = 4;
 }
 
 message GetHistoricalFeaturesResponse {
     // Export Job with ID assigned by Feast
     string id = 1;
+    string output_file_uri = 2;
 }
 
 message StartStreamToOnlineIngestionJobRequest {
@@ -163,13 +148,7 @@ message StartStreamToOnlineIngestionJobResponse {
 }
 
 message ListJobsRequest {
-  Filter filter = 1;
-  message Filter {
-    // Filter jobs by job type
-    JobType type = 1;
-    // Filter jobs by current job status
-    JobStatus status = 2;
-  }
+  bool include_terminated = 1;
 }
 
 message ListJobsResponse {
@@ -184,14 +163,8 @@ message GetJobResponse {
   Job job = 1;
 }
 
-message RestartJobRequest {
-  string job_id = 1;
-}
-
-message RestartJobResponse {}
-
-message StopJobRequest{
+message CancelJobRequest{
   string job_id = 1;
 }
 
-message StopJobResponse {}
+message CancelJobResponse {}
@@ -471,8 +471,16 @@ def list_jobs():
     help="Path to entity df in CSV format. It is assumed to have event_timestamp column and a header.",
     required=True,
 )
+@click.option(
+    "--entity-df-dtype",
+    "-d",
+    help="Dtypes for entity df, in JSON format",
+    required=False,
+)
 @click.option("--destination", "-d", help="Destination", default="")
-def get_historical_features(features: str, entity_df_path: str, destination: str):
+def get_historical_features(
+    features: str, entity_df_path: str, entity_df_dtype: str, destination: str
+):
     """
     Get historical features
     """
@@ -481,7 +489,14 @@ def get_historical_features(features: str, entity_df_path: str, destination: str
     client = Client()
 
     # TODO: clean this up
-    entity_df = pandas.read_csv(entity_df_path, sep=None, engine="python",)
+
+    if entity_df_dtype:
+        dtype = json.loads(entity_df_dtype)
+        entity_df = pandas.read_csv(
+            entity_df_path, sep=None, engine="python", dtype=dtype
+        )
+    else:
+        entity_df = pandas.read_csv(entity_df_path, sep=None, engine="python")
 
     entity_df["event_timestamp"] = pandas.to_datetime(entity_df["event_timestamp"])
 
 
@@ -66,6 +66,7 @@
     ListProjectsResponse,
 )
 from feast.core.CoreService_pb2_grpc import CoreServiceStub
+from feast.core.JobService_pb2 import GetHistoricalFeaturesRequest
 from feast.core.JobService_pb2_grpc import JobServiceStub
 from feast.data_format import ParquetFormat
 from feast.data_source import BigQuerySource, FileSource
@@ -86,12 +87,15 @@
 from feast.online_response import OnlineResponse, _infer_online_entity_rows
 from feast.pyspark.abc import RetrievalJob, SparkJob
 from feast.pyspark.launcher import (
+    get_job_by_id,
+    list_jobs,
     stage_dataframe,
     start_historical_feature_retrieval_job,
     start_historical_feature_retrieval_spark_session,
     start_offline_to_online_ingestion,
     start_stream_to_online_ingestion,
 )
+from feast.remote_job import RemoteRetrievalJob
 from feast.serving.ServingService_pb2 import (
     GetFeastServingInfoRequest,
     GetOnlineFeaturesRequestV2,
@@ -183,6 +187,10 @@ def _serving_service(self):
             self._serving_service_stub = ServingServiceStub(channel)
         return self._serving_service_stub
 
+    @property
+    def _use_job_service(self) -> bool:
+        return self._config.exists(CONFIG_JOB_SERVICE_URL_KEY)
+
     @property
     def _job_service(self):
         """
@@ -204,6 +212,12 @@ def _job_service(self):
             self._job_service_service_stub = JobServiceStub(channel)
         return self._job_service_service_stub
 
+    def _extra_grpc_params(self) -> Dict[str, Any]:
+        return dict(
+            timeout=self._config.getint(CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY),
+            metadata=self._get_grpc_metadata(),
+        )
+
     @property
     def core_url(self) -> str:
         """
@@ -854,6 +868,7 @@ def get_historical_features(
         feature_refs: List[str],
         entity_source: Union[pd.DataFrame, FileSource, BigQuerySource],
         project: str = None,
+        output_location: str = None,
     ) -> RetrievalJob:
         """
         Launch a historical feature retrieval job.
@@ -894,10 +909,12 @@ def get_historical_features(
         feature_tables = self._get_feature_tables_from_feature_refs(
             feature_refs, project
         )
-        output_location = os.path.join(
-            self._config.get(CONFIG_SPARK_HISTORICAL_FEATURE_OUTPUT_LOCATION),
-            str(uuid.uuid4()),
-        )
+
+        if output_location is None:
+            output_location = os.path.join(
+                self._config.get(CONFIG_SPARK_HISTORICAL_FEATURE_OUTPUT_LOCATION),
+                str(uuid.uuid4()),
+            )
         output_format = self._config.get(CONFIG_SPARK_HISTORICAL_FEATURE_OUTPUT_FORMAT)
 
         if isinstance(entity_source, pd.DataFrame):
@@ -920,13 +937,30 @@ def get_historical_features(
                     "event_timestamp", ParquetFormat(), entity_staging_uri.geturl(),
                 )
 
-        return start_historical_feature_retrieval_job(
-            self,
-            entity_source,
-            feature_tables,
-            output_format,
-            os.path.join(output_location, str(uuid.uuid4())),
-        )
+        if self._use_job_service:
+            response = self._job_service.GetHistoricalFeatures(
+                GetHistoricalFeaturesRequest(
+                    feature_refs=feature_refs,
+                    entity_source=entity_source.to_proto(),
+                    project=project,
+                    output_location=output_location,
+                ),
+                **self._extra_grpc_params(),
+            )
+            return RemoteRetrievalJob(
+                self._job_service,
+                self._extra_grpc_params,
+                response.id,
+                output_file_uri=response.output_file_uri,
+            )
+        else:
+            return start_historical_feature_retrieval_job(
+                self,
+                entity_source,
+                feature_tables,
+                output_format,
+                os.path.join(output_location, str(uuid.uuid4())),
+            )
 
     def get_historical_features_df(
         self,
@@ -1009,6 +1043,12 @@ def start_stream_to_online_ingestion(
     ) -> SparkJob:
         return start_stream_to_online_ingestion(feature_table, extra_jars or [], self)
 
+    def list_jobs(self, include_terminated: bool) -> List[SparkJob]:
+        return list_jobs(include_terminated, self)
+
+    def get_job_by_id(self, job_id: str) -> SparkJob:
+        return get_job_by_id(job_id, self)
+
     def stage_dataframe(
         self, df: pd.DataFrame, event_timestamp_column: str,
     ) -> FileSource:
 
@@ -116,7 +116,7 @@ class AuthProvider(Enum):
     # Path to certificate(s) to secure connection to Feast Serving
     CONFIG_SERVING_SERVER_SSL_CERT_KEY: "",
     # Default connection timeout to Feast Serving and Feast Core (in seconds)
-    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY: "3",
+    CONFIG_GRPC_CONNECTION_TIMEOUT_DEFAULT_KEY: "10",
     # Default gRPC connection timeout when sending an ApplyFeatureSet command to
     # Feast Core (in seconds)
     CONFIG_GRPC_CONNECTION_TIMEOUT_APPLY_KEY: "600",
@@ -133,4 +133,8 @@ class AuthProvider(Enum):
     CONFIG_REDIS_SSL: "False",
     CONFIG_SPARK_HISTORICAL_FEATURE_OUTPUT_FORMAT: "parquet",
     CONFIG_SPARK_EXTRA_OPTIONS: "",
+    # Enable or disable TLS/SSL to Feast Service
+    CONFIG_JOB_SERVICE_ENABLE_SSL_KEY: "False",
+    # Path to certificate(s) to secure connection to Feast Job Service
+    CONFIG_JOB_SERVICE_SERVER_SSL_CERT_KEY: "",
 }