Skip to content

Commit 5a36ad6

Browse files
committed
remove irrelevant code
1 parent d0e451f commit 5a36ad6

File tree

1 file changed

+1
-51
lines changed

1 file changed

+1
-51
lines changed

composer/workflows/dataproc/example_dataproc.py

Lines changed: 1 addition & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -25,26 +25,15 @@
2525
from airflow import models
2626
from airflow.providers.google.cloud.operators.dataproc import (
2727
DataprocCreateClusterOperator,
28-
DataprocCreateWorkflowTemplateOperator,
2928
DataprocDeleteClusterOperator,
30-
DataprocInstantiateWorkflowTemplateOperator,
3129
DataprocSubmitJobOperator,
32-
DataprocUpdateClusterOperator,
3330
)
3431
from airflow.providers.google.cloud.sensors.dataproc import DataprocJobSensor
3532
from airflow.utils.dates import days_ago
3633

3734
PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "leah-playground")
3835
CLUSTER_NAME = os.environ.get("GCP_DATAPROC_CLUSTER_NAME", "cluster-0c23")
3936
REGION = os.environ.get("GCP_LOCATION", "us-central1")
40-
# ZONE = os.environ.get("GCP_REGION", "europe-west1-b")
41-
# BUCKET = os.environ.get("GCP_DATAPROC_BUCKET", "dataproc-system-tests")
42-
# OUTPUT_FOLDER = "wordcount"
43-
# OUTPUT_PATH = f"gs://{BUCKET}/{OUTPUT_FOLDER}/"
44-
# PYSPARK_MAIN = os.environ.get("PYSPARK_MAIN", "hello_world.py")
45-
# PYSPARK_URI = f"gs://{BUCKET}/{PYSPARK_MAIN}"
46-
# SPARKR_MAIN = os.environ.get("SPARKR_MAIN", "hello_world.R")
47-
# SPARKR_URI = f"gs://{BUCKET}/{SPARKR_MAIN}"
4837

4938
# Cluster definition
5039
# [START how_to_cloud_dataproc_create_cluster]
@@ -64,24 +53,6 @@
6453

6554
# [END how_to_cloud_dataproc_create_cluster]
6655

67-
# # Update options
68-
# # [START how_to_cloud_dataproc_updatemask_cluster_operator]
69-
# CLUSTER_UPDATE = {
70-
# "config": {"worker_config": {"num_instances": 3}, "secondary_worker_config": {"num_instances": 3}}
71-
# }
72-
# UPDATE_MASK = {
73-
# "paths": ["config.worker_config.num_instances", "config.secondary_worker_config.num_instances"]
74-
# }
75-
# # [END how_to_cloud_dataproc_updatemask_cluster_operator]
76-
77-
# TIMEOUT = {"seconds": 1 * 24 * 60 * 60}
78-
79-
# [START how_to_cloud_dataproc_pyspark_config]
80-
# PYSPARK_JOB = {
81-
# "reference": {"project_id": PROJECT_ID},
82-
# "placement": {"cluster_name": CLUSTER_NAME},
83-
# "pyspark_job": {"main_python_file_uri": PYSPARK_URI},
84-
# }
8556
PYSPARK_JOB = {
8657
"reference": {"project_id": "leah-playground"},
8758
"placement": {"cluster_name": CLUSTER_NAME},
@@ -101,18 +72,6 @@
10172
)
10273
# # [END how_to_cloud_dataproc_create_cluster_operator]
10374

104-
# # [START how_to_cloud_dataproc_update_cluster_operator]
105-
# scale_cluster = DataprocUpdateClusterOperator(
106-
# task_id="scale_cluster",
107-
# cluster_name=CLUSTER_NAME,
108-
# cluster=CLUSTER_UPDATE,
109-
# update_mask=UPDATE_MASK,
110-
# graceful_decommission_timeout=TIMEOUT,
111-
# project_id=PROJECT_ID,
112-
# location=REGION,
113-
# )
114-
# # [END how_to_cloud_dataproc_update_cluster_operator]
115-
11675

11776
# [START how_to_cloud_dataproc_submit_job_to_cluster_operator]
11877
pyspark_task = DataprocSubmitJobOperator(
@@ -127,13 +86,4 @@
12786
)
12887
# # [END how_to_cloud_dataproc_delete_cluster_operator]
12988
create_cluster >> pyspark_task >> delete_cluster
130-
# create_cluster >> scale_cluster
131-
# scale_cluster >> create_workflow_template >> trigger_workflow >> delete_cluster
132-
# scale_cluster >> hive_task >> delete_cluster
133-
# scale_cluster >> pig_task >> delete_cluster
134-
# scale_cluster >> spark_sql_task >> delete_cluster
135-
# scale_cluster >> spark_task >> delete_cluster
136-
# scale_cluster >> spark_task_async >> spark_task_async_sensor >> delete_cluster
137-
# scale_cluster >> pyspark_task >> delete_cluster
138-
# scale_cluster >> sparkr_task >> delete_cluster
139-
# scale_cluster >> hadoop_task >> delete_cluster
89+

0 commit comments

Comments
 (0)