-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathconstants.py
More file actions
211 lines (148 loc) · 7.39 KB
/
constants.py
File metadata and controls
211 lines (148 loc) · 7.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
from typing import Optional
from feast.constants import ConfigMeta
class ConfigOptions(metaclass=ConfigMeta):
#: Default Feast Job Service URL
JOB_SERVICE_URL: Optional[str] = None
#: Enable or disable TLS/SSL to Feast Job Service
JOB_SERVICE_ENABLE_SSL: str = "False"
#: Path to certificate(s) to secure connection to Feast Job Service
JOB_SERVICE_SERVER_SSL_CERT: str = ""
#: Enable or disable control loop for Feast Job Service
JOB_SERVICE_ENABLE_CONTROL_LOOP: str = "False"
#: If set to True, Control Loop will try to start failed streaming jobss
JOB_SERVICE_RETRY_FAILED_JOBS: str = "False"
#: Pause in seconds between starting new jobs in Control Loop
JOB_SERVICE_PAUSE_BETWEEN_JOBS: str = "5"
#: Port for which Prometheus metric server will be running on
JOB_SERVICE_PROMETHEUS_METRIC_PORT: int = 8080
#: Endpoint URL for S3 storage_client
S3_ENDPOINT_URL: Optional[str] = None
#: Spark Job launcher. The choice of storage is connected to the choice of SPARK_LAUNCHER.
#:
#: Options: "standalone", "dataproc", "emr"
SPARK_LAUNCHER: Optional[str] = None
#: Feast Spark Job ingestion jobs staging location. The choice of storage is connected to the choice of SPARK_LAUNCHER.
#:
#: Eg. gs://some-bucket/output/, s3://some-bucket/output/, file:///data/subfolder/
SPARK_STAGING_LOCATION: Optional[str] = None
#: Feast Spark Job ingestion jar file. The choice of storage is connected to the choice of SPARK_LAUNCHER.
#:
#: Eg. "dataproc" (http and gs), "emr" (http and s3), "standalone" (http and file)
SPARK_INGESTION_JAR: str = "https://storage.googleapis.com/feast-jobs/spark/ingestion/feast-ingestion-spark-develop.jar"
#: Spark resource manager master url
SPARK_STANDALONE_MASTER: str = "local[*]"
#: Directory where Spark is installed
SPARK_HOME: Optional[str] = None
#: The project id where the materialized view of BigQuerySource is going to be created
#: by default, use the same project where view is located
SPARK_BQ_MATERIALIZATION_PROJECT: Optional[str] = None
#: The dataset id where the materialized view of BigQuerySource is going to be created
#: by default, use the same dataset where view is located
SPARK_BQ_MATERIALIZATION_DATASET: Optional[str] = None
#: Dataproc cluster to run Feast Spark Jobs in
DATAPROC_CLUSTER_NAME: Optional[str] = None
#: Project of Dataproc cluster
DATAPROC_PROJECT: Optional[str] = None
#: Region of Dataproc cluster
DATAPROC_REGION: Optional[str] = None
#: No. of executor instances for Dataproc cluster
DATAPROC_EXECUTOR_INSTANCES = "2"
#: No. of executor cores for Dataproc cluster
DATAPROC_EXECUTOR_CORES = "2"
#: No. of executor memory for Dataproc cluster
DATAPROC_EXECUTOR_MEMORY = "2g"
# namespace to use for Spark jobs launched using k8s spark operator
SPARK_K8S_NAMESPACE = "default"
# expect k8s spark operator to be running in the same cluster as Feast
SPARK_K8S_USE_INCLUSTER_CONFIG = "True"
# SparkApplication resource template
SPARK_K8S_JOB_TEMPLATE_PATH = None
# SparkApplication resource template for Batch Ingestion Jobs
SPARK_K8S_BATCH_INGESTION_TEMPLATE_PATH: Optional[str] = ""
# SparkApplication resource template for Stream Ingestion Jobs
SPARK_K8S_STREAM_INGESTION_TEMPLATE_PATH: Optional[str] = ""
# SparkApplication resource template for Historical Retrieval Jobs
SPARK_K8S_HISTORICAL_RETRIEVAL_TEMPLATE_PATH: Optional[str] = ""
#: Default Redis host to Redis Instance which stores Spark Ingestion Job metrics
SPARK_METRICS_REDIS_HOST: Optional[str] = None
#: Default Redis port to Redis Instance which stores Spark Ingestion Job metrics
SPARK_METRICS_REDIS_PORT: Optional[str] = None
#: Host to Redis Instance which stores locks for job management
LOCK_MGR_REDIS_HOST: Optional[str] = None
#: Port to Redis Instance which stores locks for job management
LOCK_MGR_REDIS_PORT: Optional[str] = None
#: TTL for locks for job management
LOCK_EXPIRY: Optional[str] = "60"
#: File format of historical retrieval features
HISTORICAL_FEATURE_OUTPUT_FORMAT: str = "parquet"
#: File location of historical retrieval features
HISTORICAL_FEATURE_OUTPUT_LOCATION: Optional[str] = None
#: Default Redis host
REDIS_HOST: Optional[str] = ""
#: Default Redis port
REDIS_PORT: Optional[str] = ""
#: Redis credentials
REDIS_PASSWORD: Optional[str] = ""
#: Enable or disable TLS/SSL to Redis
REDIS_SSL: Optional[str] = "False"
#: BigTable Project ID
BIGTABLE_PROJECT: Optional[str] = ""
#: BigTable Instance ID
BIGTABLE_INSTANCE: Optional[str] = ""
#: Cassandra host. Can be a comma separated string
CASSANDRA_HOST: Optional[str] = ""
#: Cassandra port
CASSANDRA_PORT: Optional[str] = ""
#: Enable or disable StatsD
STATSD_ENABLED: str = "False"
#: Default StatsD port
STATSD_HOST: Optional[str] = None
#: Default StatsD port
STATSD_PORT: Optional[str] = None
#: Ingestion Job DeadLetter Destination. The choice of storage is connected to the choice of SPARK_LAUNCHER.
#:
#: Eg. gs://some-bucket/output/, s3://some-bucket/output/, file:///data/subfolder/
DEADLETTER_PATH: str = ""
#: Ingestion Job Checkpoint Location. Format same as for DeadLetter path
CHECKPOINT_PATH: str = ""
#: ProtoRegistry Address (currently only Stencil Server is supported as registry)
#: https://github.com/odpf/stencil
STENCIL_URL: str = ""
#: Bearer token used for authentication with Stencil Server
STENCIL_TOKEN: str = ""
#: If set to true rows that do not pass custom validation (see feast.contrib.validation)
#: won't be saved to Online Storage
INGESTION_DROP_INVALID_ROWS: str = "False"
#: EMR cluster to run Feast Spark Jobs in
EMR_CLUSTER_ID: Optional[str] = None
#: Region of EMR cluster
EMR_REGION: Optional[str] = None
#: Template path of EMR cluster
EMR_CLUSTER_TEMPLATE_PATH: Optional[str] = None
#: Log path of EMR cluster
EMR_LOG_LOCATION: Optional[str] = None
#: Whitelisted Feast Job Types
WHITELISTED_JOB_TYPES: Optional[str] = None
#: Whitelisted Feast projects
WHITELISTED_PROJECTS: Optional[str] = None
#: File path to a whitelist containing all the feature tables allowed for ingestion.
#: Each line in the file should be in the format of <project>:<feature table>
WHITELISTED_FEATURE_TABLES_PATH: Optional[str] = None
#: If set - streaming ingestion job will be consuming incoming rows not continuously,
#: but periodically with configured interval (in seconds).
#: That may help to control amount of write requests to storage
SPARK_STREAMING_TRIGGERING_INTERVAL: Optional[str] = None
#: GCP project of the BigQuery dataset used to stage the entities during historical
#: feature retrieval. If not set, the GCP project of the feature table batch source
#: will be used instead.
BQ_STAGING_PROJECT: Optional[str] = None
#: BigQuery dataset used to stage the entities during historical feature retrieval.
# If not set, the BigQuery dataset of the batch source will be used
#: instead.
BQ_STAGING_DATASET: Optional[str] = None
def defaults(self):
return {
k: getattr(self, k)
for k in self.__config_keys__
if getattr(self, k) is not None
}