leahecole
diff --git a/‎data-science-onramp/ai-platform/modules/setup.py‎
Lines changed: 1 addition & 1 deletion b/‎data-science-onramp/ai-platform/modules/setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎data-science-onramp/ai-platform/modules/trainer/sklearn_model/task.py‎
Lines changed: 2 additions & 2 deletions b/‎data-science-onramp/ai-platform/modules/trainer/sklearn_model/task.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎data-science-onramp/ai-platform/modules/trainer/tfkeras_model/task.py‎
Lines changed: 2 additions & 2 deletions b/‎data-science-onramp/ai-platform/modules/trainer/tfkeras_model/task.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎data-science-onramp/ai-platform/noxfile_config.py‎
Lines changed: 1 addition & 1 deletion b/‎data-science-onramp/ai-platform/noxfile_config.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎data-science-onramp/ai-platform/sklearn_test.py‎
Lines changed: 24 additions & 26 deletions b/‎data-science-onramp/ai-platform/sklearn_test.py‎
Lines changed: 24 additions & 26 deletions
diff --git a/‎data-science-onramp/ai-platform/tfkeras_test.py‎
Lines changed: 24 additions & 26 deletions b/‎data-science-onramp/ai-platform/tfkeras_test.py‎
Lines changed: 24 additions & 26 deletions
diff --git a/‎…a-science-onramp/data-ingestion/setup.py‎ ‎…ience-onramp/data-ingestion/ingestion.py‎data-science-onramp/data-ingestion/setup.py renamed to data-science-onramp/data-ingestion/ingestion.py b/‎…a-science-onramp/data-ingestion/setup.py‎ ‎…ience-onramp/data-ingestion/ingestion.py‎data-science-onramp/data-ingestion/setup.py renamed to data-science-onramp/data-ingestion/ingestion.py
diff --git a/‎…a-science-onramp/data-ingestion/setup.sh‎ ‎…ience-onramp/data-ingestion/ingestion.sh‎data-science-onramp/data-ingestion/setup.sh renamed to data-science-onramp/data-ingestion/ingestion.sh b/‎…a-science-onramp/data-ingestion/setup.sh‎ ‎…ience-onramp/data-ingestion/ingestion.sh‎data-science-onramp/data-ingestion/setup.sh renamed to data-science-onramp/data-ingestion/ingestion.sh
diff --git a/‎…ence-onramp/data-ingestion/setup_test.py‎ ‎…-onramp/data-ingestion/ingestion_test.py‎data-science-onramp/data-ingestion/setup_test.py renamed to data-science-onramp/data-ingestion/ingestion_test.py
Lines changed: 5 additions & 6 deletions b/‎…ence-onramp/data-ingestion/setup_test.py‎ ‎…-onramp/data-ingestion/ingestion_test.py‎data-science-onramp/data-ingestion/setup_test.py renamed to data-science-onramp/data-ingestion/ingestion_test.py
Lines changed: 5 additions & 6 deletions
diff --git a/‎…a-science-onramp/data-cleaning/README.md‎ ‎…science-onramp/data-processing/README.md‎data-science-onramp/data-cleaning/README.md renamed to data-science-onramp/data-processing/README.md b/‎…a-science-onramp/data-cleaning/README.md‎ ‎…science-onramp/data-processing/README.md‎data-science-onramp/data-cleaning/README.md renamed to data-science-onramp/data-processing/README.md
@@ -23,7 +23,7 @@
     include_package_data=True,
     description="Tutorial Package",
     install_requires=[
-        "gcsfs==0.8.0"
+        "gcsfs"
     ]
 )
 # [END ai_platform_setup]
@@ -47,10 +47,10 @@ def get_args() -> argparse.Namespace:
         help="Regularization strength, default=0 (Standard Regression)",
     )
     parser.add_argument(
-        "--model_dir",
+        "--model-dir",
         type=str,
         help="Output directory for the model.",
-        default=os.environ["AIP_MODEL_DIR"],
+        default=os.getenv("AIP_MODEL_DIR"),
     )
     return parser.parse_args()
 # [END ai_platform_sklearn_task_args]
 
@@ -57,10 +57,10 @@ def get_args() -> argparse.Namespace:
         default="INFO",
     )
     parser.add_argument(
-        "--model_dir",
+        "--model-dir",
         type=str,
         help="Output directory for the model.",
-        default=os.environ["AIP_MODEL_DIR"],
+        default=os.getenv("AIP_MODEL_DIR"),
     )
     return parser.parse_args()
 # [END ai_platform_tfkeras_task_args]
 
@@ -22,7 +22,7 @@
 
 TEST_CONFIG_OVERRIDE = {
     # You can opt out from the test for specific Python versions.
-    'ignored_versions': ["2.7"],
+    "ignored_versions": ["2.7"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
     "enforce_type_hints": True,
 
@@ -35,13 +35,13 @@
 REGION = "us-central1"
 MODEL_NAME = f"sklearn-test-{uuid.uuid4()}"
 JOB_ID = f"sklearn_{str(uuid.uuid4())[:7]}"
-DEPLOY_IMAGE = "gcr.io/cloud-aiplatform/training/scikit-learn-cpu.0-23:latest"
+DEPLOY_IMAGE = "us-docker.pkg.dev/vertex-ai/training/scikit-learn-cpu.0-23:latest"
 
 TERMINAL_STATES = [
     aip.JobState.JOB_STATE_SUCCEEDED,
     aip.JobState.JOB_STATE_FAILED,
     aip.JobState.JOB_STATE_CANCELLING,
-    aip.JobState.JOB_STATE_CANCELLED
+    aip.JobState.JOB_STATE_CANCELLED,
 ]
 
 
@@ -53,21 +53,21 @@ def shared_state() -> dict:
 
 @pytest.fixture(autouse=True)
 def setup_teardown(
-        shared_state: dict
+    shared_state: dict,
 ) -> Tuple[storage.bucket.Bucket, aip.JobServiceClient]:
     storage_client = storage.Client()
     bucket = storage_client.create_bucket(STAGING_BUCKET, location=REGION)
-    bucket.blob(f"{INPUT_DIR}/{TRAIN_DATA}").upload_from_filename(TRAIN_DATA, timeout=600)
+    bucket.blob(f"{INPUT_DIR}/{TRAIN_DATA}").upload_from_filename(
+        TRAIN_DATA, timeout=600
+    )
 
     with tarfile.open(TRAINER_TAR, mode="x:gz") as tar:
         tar.add(f"{TRAINER_DIR}/")
 
     bucket.blob(TRAINER_TAR).upload_from_filename(TRAINER_TAR)
 
     aip_job_client = aip.JobServiceClient(
-        client_options={
-            "api_endpoint": f"{REGION}-aiplatform.googleapis.com"
-        }
+        client_options={"api_endpoint": f"{REGION}-aiplatform.googleapis.com"}
     )
 
     yield bucket, aip_job_client
@@ -85,40 +85,38 @@ def setup_teardown(
 @pytest.mark.timeout(1800)
 def test_sklearn(
     setup_teardown: Tuple[storage.bucket.Bucket, aip.JobServiceClient],
-    shared_state: dict
+    shared_state: dict,
 ) -> None:
     bucket, aip_job_client = setup_teardown
 
     custom_job = {
         "display_name": JOB_ID,
         "job_spec": {
             "base_output_directory": {"output_uri_prefix": f"gs://{STAGING_BUCKET}"},
-            "worker_pool_specs": [{
-                "replica_count": 1,
-                "machine_spec": {
-                    "machine_type": "n1-standard-4",
-                },
-                "python_package_spec": {
-                    "executor_image_uri": DEPLOY_IMAGE,
-                    "package_uris": [f"gs://{STAGING_BUCKET}/{TRAINER_TAR}"],
-                    "python_module": "trainer.sklearn_model.task",
-                    "args": [
-                        f"--input-path={TRAIN_DATA_PATH}"
-                    ]
+            "worker_pool_specs": [
+                {
+                    "replica_count": 1,
+                    "machine_spec": {
+                        "machine_type": "n1-standard-4",
+                    },
+                    "python_package_spec": {
+                        "executor_image_uri": DEPLOY_IMAGE,
+                        "package_uris": [f"gs://{STAGING_BUCKET}/{TRAINER_TAR}"],
+                        "python_module": "trainer.sklearn_model.task",
+                        "args": [f"--input-path={TRAIN_DATA_PATH}"],
+                    },
                 }
-            }]
-        }
+            ],
+        },
     }
 
     parent = f"projects/{PROJECT_ID}/locations/{REGION}"
-    response = aip_job_client.create_custom_job(
-        parent=parent, custom_job=custom_job
-    )
+    response = aip_job_client.create_custom_job(parent=parent, custom_job=custom_job)
     resource_name = response.name
     shared_state["model_name"] = resource_name
 
     # Subject to change with LRO availability
-    while (response.state not in TERMINAL_STATES):
+    while response.state not in TERMINAL_STATES:
         time.sleep(60)
         response = aip_job_client.get_custom_job(name=resource_name)
 
 
@@ -35,13 +35,13 @@
 REGION = "us-central1"
 MODEL_NAME = f"tfkeras-test-{uuid.uuid4()}"
 JOB_ID = f"tfkeras_{str(uuid.uuid4())[:7]}"
-DEPLOY_IMAGE = "gcr.io/cloud-aiplatform/training/tf-cpu.2-3:latest"
+DEPLOY_IMAGE = "us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-3:latest"
 
 TERMINAL_STATES = [
     aip.JobState.JOB_STATE_SUCCEEDED,
     aip.JobState.JOB_STATE_FAILED,
     aip.JobState.JOB_STATE_CANCELLING,
-    aip.JobState.JOB_STATE_CANCELLED
+    aip.JobState.JOB_STATE_CANCELLED,
 ]
 
 
@@ -53,22 +53,22 @@ def shared_state() -> dict:
 
 @pytest.fixture(autouse=True)
 def setup_teardown(
-        shared_state: dict
+    shared_state: dict,
 ) -> Tuple[storage.bucket.Bucket, aip.JobServiceClient]:
 
     storage_client = storage.Client()
     bucket = storage_client.create_bucket(STAGING_BUCKET, location=REGION)
-    bucket.blob(f"{INPUT_DIR}/{TRAIN_DATA}").upload_from_filename(TRAIN_DATA, timeout=600)
+    bucket.blob(f"{INPUT_DIR}/{TRAIN_DATA}").upload_from_filename(
+        TRAIN_DATA, timeout=600
+    )
 
     with tarfile.open(TRAINER_TAR, mode="x:gz") as tar:
         tar.add(f"{TRAINER_DIR}/")
 
     bucket.blob(TRAINER_TAR).upload_from_filename(TRAINER_TAR)
 
     aip_job_client = aip.JobServiceClient(
-        client_options={
-            "api_endpoint": f"{REGION}-aiplatform.googleapis.com"
-        }
+        client_options={"api_endpoint": f"{REGION}-aiplatform.googleapis.com"}
     )
 
     yield bucket, aip_job_client
@@ -86,40 +86,38 @@ def setup_teardown(
 @pytest.mark.timeout(1800)
 def test_tfkeras(
     setup_teardown: Tuple[storage.bucket.Bucket, aip.JobServiceClient],
-    shared_state: dict
+    shared_state: dict,
 ) -> None:
     bucket, aip_job_client = setup_teardown
 
     custom_job = {
         "display_name": JOB_ID,
         "job_spec": {
             "base_output_directory": {"output_uri_prefix": f"gs://{STAGING_BUCKET}"},
-            "worker_pool_specs": [{
-                "replica_count": 1,
-                "machine_spec": {
-                    "machine_type": "n1-standard-4",
-                },
-                "python_package_spec": {
-                    "executor_image_uri": DEPLOY_IMAGE,
-                    "package_uris": [f"gs://{STAGING_BUCKET}/{TRAINER_TAR}"],
-                    "python_module": "trainer.tfkeras_model.task",
-                    "args": [
-                        f"--input-path={TRAIN_DATA_PATH}"
-                    ]
+            "worker_pool_specs": [
+                {
+                    "replica_count": 1,
+                    "machine_spec": {
+                        "machine_type": "n1-standard-4",
+                    },
+                    "python_package_spec": {
+                        "executor_image_uri": DEPLOY_IMAGE,
+                        "package_uris": [f"gs://{STAGING_BUCKET}/{TRAINER_TAR}"],
+                        "python_module": "trainer.tfkeras_model.task",
+                        "args": [f"--input-path={TRAIN_DATA_PATH}"],
+                    },
                 }
-            }]
-        }
+            ],
+        },
     }
 
     parent = f"projects/{PROJECT_ID}/locations/{REGION}"
-    response = aip_job_client.create_custom_job(
-        parent=parent, custom_job=custom_job
-    )
+    response = aip_job_client.create_custom_job(parent=parent, custom_job=custom_job)
     resource_name = response.name
     shared_state["model_name"] = resource_name
 
     # Subject to change with LRO availability
-    while (response.state not in TERMINAL_STATES):
+    while response.state not in TERMINAL_STATES:
         time.sleep(60)
         response = aip_job_client.get_custom_job(name=resource_name)
 
 
@@ -31,8 +31,8 @@
 TEST_ID = uuid.uuid4()
 
 # Google Cloud Storage constants
-BUCKET_NAME = f"setup-test-{TEST_ID}"
-BUCKET_BLOB = "setup.py"
+BUCKET_NAME = f"ingestion-test-{TEST_ID}"
+BUCKET_BLOB = "ingestion.py"
 
 BQ_DATASET = f"setup-test-{TEST_ID}".replace("-", "_")
 BQ_CITIBIKE_TABLE = "RAW_DATA"
@@ -42,9 +42,9 @@
 ]
 
 # Dataproc constants
-DATAPROC_CLUSTER = f"setup-test-{TEST_ID}"
+DATAPROC_CLUSTER = f"ingestion-test-{TEST_ID}"
 CLUSTER_REGION = "us-central1"
-CLUSTER_IMAGE = "1.5.4-debian10"
+CLUSTER_IMAGE = "2.0-debian10"
 CLUSTER_CONFIG = {  # Dataproc cluster configuration
     "project_id": PROJECT_ID,
     "cluster_name": DATAPROC_CLUSTER,
@@ -58,7 +58,6 @@
         "worker_config": {"num_instances": 2, "machine_type_uri": "n1-standard-4"},
         "software_config": {
             "image_version": CLUSTER_IMAGE,
-            "optional_components": [5],
         },
     },
 }
@@ -103,7 +102,7 @@ def setup_and_teardown_bucket():
 
     # Upload file
     blob = bucket.blob(BUCKET_BLOB)
-    blob.upload_from_filename("setup.py")
+    blob.upload_from_filename("ingestion.py")
 
     yield
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@`
`23`	`23`	`include_package_data=True,`
`24`	`24`	`description="Tutorial Package",`
`25`	`25`	`install_requires=[`
`26`		`- "gcsfs==0.8.0"`
	`26`	`+ "gcsfs"`
`27`	`27`	`]`
`28`	`28`	`)`
`29`	`29`	`# [END ai_platform_setup]`
Original file line number	Diff line number	Diff line change
`@@ -47,10 +47,10 @@ def get_args() -> argparse.Namespace:`
`47`	`47`	`help="Regularization strength, default=0 (Standard Regression)",`
`48`	`48`	`)`
`49`	`49`	`parser.add_argument(`
`50`		`- "--model_dir",`
	`50`	`+ "--model-dir",`
`51`	`51`	`type=str,`
`52`	`52`	`help="Output directory for the model.",`
`53`		`- default=os.environ["AIP_MODEL_DIR"],`
	`53`	`+ default=os.getenv("AIP_MODEL_DIR"),`
`54`	`54`	`)`
`55`	`55`	`return parser.parse_args()`
`56`	`56`	`# [END ai_platform_sklearn_task_args]`
Original file line number	Diff line number	Diff line change
`@@ -57,10 +57,10 @@ def get_args() -> argparse.Namespace:`
`57`	`57`	`default="INFO",`
`58`	`58`	`)`
`59`	`59`	`parser.add_argument(`
`60`		`- "--model_dir",`
	`60`	`+ "--model-dir",`
`61`	`61`	`type=str,`
`62`	`62`	`help="Output directory for the model.",`
`63`		`- default=os.environ["AIP_MODEL_DIR"],`
	`63`	`+ default=os.getenv("AIP_MODEL_DIR"),`
`64`	`64`	`)`
`65`	`65`	`return parser.parse_args()`
`66`	`66`	`# [END ai_platform_tfkeras_task_args]`