Skip to content

Commit eee8af8

Browse files
authored
Bundle python dependencies for spark images (#137)
Signed-off-by: Khor Shu Heng <khor.heng@gojek.com> Co-authored-by: Khor Shu Heng <khor.heng@gojek.com>
1 parent ae6309e commit eee8af8

File tree

3 files changed

+10
-7
lines changed

3 files changed

+10
-7
lines changed

infra/docker/spark/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ RUN echo 'spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible
3636
RUN echo 'spark.executor.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
3737

3838
# python dependencies
39-
RUN pip3 install -U pip wheel
40-
RUN pip3 install pandas pyarrow==2.0.0 'numpy<1.20.0'
39+
RUN pip3 install pandas==1.3.5 great-expectations==0.13.2 pyarrow==2.0.0 Jinja2==3.0.3 datadog==0.44.0 'numpy<1.20.0'
4140

4241
# For logging to /dev/termination-log
4342
RUN mkdir -p /dev
4443

44+
4545
ENTRYPOINT [ "/opt/entrypoint.sh" ]

infra/docker/spark/dev.Dockerfile

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@ RUN mkdir -p /opt/spark/conf
2121
RUN echo 'spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
2222
RUN echo 'spark.driver.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
2323
RUN echo 'spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
24-
RUN echo 'spark.executor.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
24+
RUN echo 'spark.executor.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf \
2525

2626
# python dependencies
27-
RUN pip3 install -U pip wheel
28-
RUN pip3 install pandas pyarrow==2.0.0 'numpy<1.20.0'
27+
RUN pip3 install pandas==1.3.5 great-expectations==0.13.2 pyarrow==2.0.0 Jinja2==3.0.3 datadog==0.44.0 'numpy<1.20.0'
2928

3029
# For logging to /dev/termination-log
3130
RUN mkdir -p /dev

tests/e2e/test_validation.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@ def test_validation_with_ge(
5252
expectations = ge_ds.get_expectation_suite()
5353

5454
udf = create_validation_udf("testUDF", expectations, feature_table)
55-
apply_validation(feast_client, feature_table, udf, validation_window_secs=1)
55+
apply_validation(
56+
feast_client, feature_table, udf, validation_window_secs=1, include_py_libs=""
57+
)
5658

5759
job = start_job(feast_spark_client, feature_table, pytestconfig)
5860

@@ -123,7 +125,9 @@ def test_validation_reports_metrics(
123125
expectations = ge_ds.get_expectation_suite()
124126

125127
udf = create_validation_udf("testUDF", expectations, feature_table)
126-
apply_validation(feast_client, feature_table, udf, validation_window_secs=10)
128+
apply_validation(
129+
feast_client, feature_table, udf, validation_window_secs=10, include_py_libs=""
130+
)
127131

128132
job = start_job(feast_spark_client, feature_table, pytestconfig)
129133

0 commit comments

Comments
 (0)