Skip to content

Commit 2871ba6

Browse files
authored
Deprecate whitelist for non streaming jobs (#143)
Signed-off-by: Khor Shu Heng <khor.heng@gojek.com> Co-authored-by: Khor Shu Heng <khor.heng@gojek.com>
1 parent 899574a commit 2871ba6

File tree

8 files changed

+4
-298
lines changed

8 files changed

+4
-298
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
apiVersion: v1
22
description: Feast Extension for running Ingestion on Spark
33
name: feast-spark
4-
version: 0.2.24
4+
version: 0.2.29

infra/charts/feast-spark/charts/feast-jobservice/templates/_ingress.yaml

Lines changed: 0 additions & 68 deletions
This file was deleted.

infra/charts/feast-spark/charts/feast-jobservice/templates/deployment.yaml

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ spec:
4040
{{- toYaml . | nindent 8 }}
4141
{{- end }}
4242

43-
{{- if or .Values.secrets .Values.sparkOperator.enabled .Values.configMaps .Values.whitelist.enabled }}
43+
{{- if or .Values.secrets .Values.sparkOperator.enabled .Values.configMaps }}
4444
volumes:
4545
{{- end }}
4646
{{- range $secret := .Values.secrets }}
@@ -58,18 +58,13 @@ spec:
5858
configMap:
5959
name: {{ template "feast-jobservice.fullname" . }}-spark-template
6060
{{- end }}
61-
{{- if .Values.whitelist.enabled }}
62-
- name: {{ template "feast-jobservice.fullname" . }}-whitelist
63-
configMap:
64-
name: {{ template "feast-jobservice.fullname" . }}-whitelist
65-
{{- end }}
6661

6762
containers:
6863
- name: {{ .Chart.Name }}
6964
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
7065
imagePullPolicy: {{ .Values.image.pullPolicy }}
7166

72-
{{- if or .Values.secrets .Values.sparkOperator.enabled .Values.configMaps .Values.whitelist.enabled }}
67+
{{- if or .Values.secrets .Values.sparkOperator.enabled .Values.configMaps }}
7368
volumeMounts:
7469
{{- end }}
7570
{{- range $secret := .Values.secrets }}
@@ -86,10 +81,6 @@ spec:
8681
- name: {{ template "feast-jobservice.fullname" . }}-spark-template
8782
mountPath: "/etc/configs"
8883
{{- end }}
89-
{{- if .Values.whitelist.enabled }}
90-
- name: {{ template "feast-jobservice.fullname" . }}-whitelist
91-
mountPath: "/etc/whitelist"
92-
{{- end }}
9384

9485
env:
9586
{{- if .Values.sparkOperator.enabled }}
@@ -102,10 +93,6 @@ spec:
10293
- name: FEAST_SPARK_K8S_HISTORICAL_RETRIEVAL_TEMPLATE_PATH
10394
value: /etc/configs/historicalJobTemplate.yaml
10495
{{- end }}
105-
{{- if .Values.whitelist.enabled }}
106-
- name: FEAST_WHITELISTED_FEATURE_TABLES_PATH
107-
value: /etc/whitelist/whitelist.txt
108-
{{- end }}
10996
{{- range $key, $value := .Values.envOverrides }}
11097
- name: {{ printf "%s" $key | replace "." "_" | upper | quote }}
11198
{{- if eq (kindOf $value) "map" }}

infra/charts/feast-spark/charts/feast-jobservice/templates/ingress.yaml

Lines changed: 0 additions & 7 deletions
This file was deleted.

infra/charts/feast-spark/charts/feast-jobservice/templates/whitelist.yaml

Lines changed: 0 additions & 18 deletions
This file was deleted.

infra/charts/feast-spark/charts/feast-jobservice/values.yaml

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -81,48 +81,6 @@ service:
8181
# service.grpc.nodePort -- Port number that each cluster node will listen to
8282
nodePort:
8383

84-
ingress:
85-
grpc:
86-
# ingress.grpc.enabled -- Flag to create an ingress resource for the service
87-
enabled: false
88-
# ingress.grpc.class -- Which ingress controller to use
89-
class: nginx
90-
# ingress.grpc.hosts -- List of hostnames to match when routing requests
91-
hosts: []
92-
# ingress.grpc.annotations -- Extra annotations for the ingress
93-
annotations: {}
94-
https:
95-
# ingress.grpc.https.enabled -- Flag to enable HTTPS
96-
enabled: true
97-
# ingress.grpc.https.secretNames -- Map of hostname to TLS secret name
98-
secretNames: {}
99-
# ingress.grpc.whitelist -- Allowed client IP source ranges
100-
whitelist: ""
101-
auth:
102-
# ingress.grpc.auth.enabled -- Flag to enable auth
103-
enabled: false
104-
http:
105-
# ingress.http.enabled -- Flag to create an ingress resource for the service
106-
enabled: false
107-
# ingress.http.class -- Which ingress controller to use
108-
class: nginx
109-
# ingress.http.hosts -- List of hostnames to match when routing requests
110-
hosts: []
111-
# ingress.http.annotations -- Extra annotations for the ingress
112-
annotations: {}
113-
https:
114-
# ingress.http.https.enabled -- Flag to enable HTTPS
115-
enabled: true
116-
# ingress.http.https.secretNames -- Map of hostname to TLS secret name
117-
secretNames: {}
118-
# ingress.http.whitelist -- Allowed client IP source ranges
119-
whitelist: ""
120-
auth:
121-
# ingress.http.auth.enabled -- Flag to enable auth
122-
enabled: false
123-
# ingress.http.auth.authUrl -- URL to an existing authentication service
124-
authUrl: http://auth-server.auth-ns.svc.cluster.local/auth
125-
12684
# resources -- CPU/memory [resource requests/limit](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container)
12785
resources: {}
12886

@@ -140,10 +98,3 @@ secrets: []
14098

14199
# configMaps -- Arbitrary config maps to be mounted on the job service pod, on /etc/configs/<config name>
142100
configMaps: []
143-
144-
# whitelist -- If enabled, only <project>:<feature table> in the whitelist can be ingested
145-
whitelist:
146-
# whitelist.enabled -- Flag to create and mount whitelist as configmap
147-
enabled: false
148-
# whitelist.featureTables -- Whitelisted feature tables, in the form of <project>:<feature table>
149-
featureTables: []

python/feast_spark/job_service.py

Lines changed: 1 addition & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,12 @@
4141
)
4242
from feast_spark.constants import ConfigOptions as opt
4343
from feast_spark.lock_manager import JobOperation, JobOperationLock
44-
from feast_spark.metrics import (
45-
job_schedule_count,
46-
job_submission_count,
47-
job_whitelist_failure_count,
48-
)
44+
from feast_spark.metrics import job_schedule_count, job_submission_count
4945
from feast_spark.pyspark.abc import (
5046
BatchIngestionJob,
5147
RetrievalJob,
5248
SparkJob,
5349
SparkJobStatus,
54-
SparkJobType,
5550
StreamIngestionJob,
5651
)
5752
from feast_spark.pyspark.launcher import (
@@ -114,7 +109,6 @@ def _job_to_proto(spark_job: SparkJob) -> JobProto:
114109
class JobServiceServicer(JobService_pb2_grpc.JobServiceServicer):
115110
def __init__(self, client: Client):
116111
self.client = client
117-
self._whitelisted_project_feature_table_pairs_cached: List[Tuple[str, str]] = []
118112

119113
@property
120114
def _whitelisted_projects(self) -> Optional[List[str]]:
@@ -123,81 +117,20 @@ def _whitelisted_projects(self) -> Optional[List[str]]:
123117
return whitelisted_projects.split(",")
124118
return None
125119

126-
@property
127-
def _whitelisted_project_feature_table_pairs(
128-
self,
129-
) -> Optional[List[Tuple[str, str]]]:
130-
if self._whitelisted_project_feature_table_pairs_cached:
131-
return self._whitelisted_project_feature_table_pairs_cached
132-
133-
if self.client.config.exists(opt.WHITELISTED_FEATURE_TABLES_PATH):
134-
_whitelisted_feature_tables = self.client.config.get(
135-
opt.WHITELISTED_FEATURE_TABLES_PATH
136-
)
137-
with open(str(_whitelisted_feature_tables), "r") as whitelist:
138-
whitelist.seek(0)
139-
whitelisted_feature_tables = [
140-
(line.strip().split(":")[0], line.strip().split(":")[-1])
141-
for line in whitelist.readlines()
142-
]
143-
self._whitelisted_project_feature_table_pairs_cached = (
144-
whitelisted_feature_tables
145-
)
146-
return whitelisted_feature_tables
147-
return None
148-
149-
@property
150-
def _whitelisted_job_types(self) -> Optional[List[str]]:
151-
if self.client.config.exists(opt.WHITELISTED_JOB_TYPES):
152-
whitelisted_job_types = self.client.config.get(opt.WHITELISTED_JOB_TYPES)
153-
return whitelisted_job_types.split(",")
154-
return None
155-
156120
def is_whitelisted(self, project: str):
157121
# Whitelisted projects not specified, allow all projects
158122
if not self._whitelisted_projects:
159123
return True
160124
return project in self._whitelisted_projects
161125

162-
def is_feature_table_whitelisted(self, project: str, feature_table: str):
163-
if not self._whitelisted_project_feature_table_pairs:
164-
return True
165-
return (project, feature_table) in self._whitelisted_project_feature_table_pairs
166-
167-
def is_job_type_whitelisted(self, job_type: SparkJobType):
168-
if not self._whitelisted_job_types:
169-
return True
170-
return job_type.name in self._whitelisted_job_types
171-
172126
def StartOfflineToOnlineIngestionJob(
173127
self, request: StartOfflineToOnlineIngestionJobRequest, context
174128
):
175129
"""Start job to ingest data from offline store into online store"""
176-
if not self.is_job_type_whitelisted(SparkJobType.BATCH_INGESTION):
177-
raise ValueError(
178-
"This job service is not configured to accept batch ingestion"
179-
)
180-
181130
job_submission_count.labels(
182131
"batch_ingestion", request.project, request.table_name
183132
).inc()
184133

185-
if not self.is_whitelisted(request.project):
186-
job_whitelist_failure_count.labels(
187-
request.project, request.table_name
188-
).inc()
189-
raise ValueError(
190-
f"Project {request.project} is not whitelisted. Please contact your Feast administrator to whitelist it."
191-
)
192-
193-
if not self.is_feature_table_whitelisted(request.project, request.table_name):
194-
job_whitelist_failure_count.labels(
195-
request.project, request.table_name
196-
).inc()
197-
raise ValueError(
198-
f"Project {request.project}:{request.table_name} is not whitelisted. Please contact your Feast administrator to whitelist it."
199-
)
200-
201134
feature_table = self.client.feature_store.get_feature_table(
202135
request.table_name, request.project
203136
)
@@ -223,11 +156,6 @@ def ScheduleOfflineToOnlineIngestionJob(
223156
self, request: ScheduleOfflineToOnlineIngestionJobRequest, context
224157
):
225158
"""Schedule job to ingest data from offline store into online store periodically"""
226-
if not self.is_job_type_whitelisted(SparkJobType.SCHEDULED_BATCH_INGESTION):
227-
raise ValueError(
228-
"This job service is not configured to schedule batch ingestion"
229-
)
230-
231159
job_schedule_count.labels(request.project, request.table_name).inc()
232160
feature_table = self.client.feature_store.get_feature_table(
233161
request.table_name, request.project
@@ -245,10 +173,6 @@ def ScheduleOfflineToOnlineIngestionJob(
245173
def UnscheduleOfflineToOnlineIngestionJob(
246174
self, request: UnscheduleOfflineToOnlineIngestionJobRequest, context
247175
):
248-
if not self.is_job_type_whitelisted(SparkJobType.SCHEDULED_BATCH_INGESTION):
249-
raise ValueError(
250-
"This job service is not configured to unschedule ingestion job"
251-
)
252176
feature_table = self.client.feature_store.get_feature_table(
253177
request.table_name, request.project
254178
)
@@ -259,18 +183,8 @@ def UnscheduleOfflineToOnlineIngestionJob(
259183

260184
def GetHistoricalFeatures(self, request: GetHistoricalFeaturesRequest, context):
261185
"""Produce a training dataset, return a job id that will provide a file reference"""
262-
if not self.is_job_type_whitelisted(SparkJobType.HISTORICAL_RETRIEVAL):
263-
raise ValueError(
264-
"This job service is not configured to accept historical retrieval job"
265-
)
266-
267186
job_submission_count.labels("historical_retrieval", request.project, "").inc()
268187

269-
if not self.is_whitelisted(request.project):
270-
raise ValueError(
271-
f"Project {request.project} is not whitelisted. Please contact your Feast administrator to whitelist it."
272-
)
273-
274188
job = start_historical_feature_retrieval_job(
275189
client=self.client,
276190
project=request.project,
@@ -297,11 +211,6 @@ def StartStreamToOnlineIngestionJob(
297211
self, request: StartStreamToOnlineIngestionJobRequest, context
298212
):
299213
"""Start job to ingest data from stream into online store"""
300-
if not self.is_job_type_whitelisted(SparkJobType.STREAM_INGESTION):
301-
raise ValueError(
302-
"This job service is not configured to start streaming job"
303-
)
304-
305214
job_submission_count.labels(
306215
"streaming", request.project, request.table_name
307216
).inc()
@@ -356,11 +265,6 @@ def StartStreamToOnlineIngestionJob(
356265
def ListJobs(self, request, context):
357266
"""List all types of jobs"""
358267

359-
if not self.is_whitelisted(request.project):
360-
raise ValueError(
361-
f"Project {request.project} is not whitelisted. Please contact your Feast administrator to whitelist it."
362-
)
363-
364268
jobs = list_jobs(
365269
include_terminated=request.include_terminated,
366270
project=request.project,

0 commit comments

Comments
 (0)