Skip to content

Commit b96b935

Browse files
Stepan Rasputnym-strzelczykgcf-owl-bot[bot]
authored
feat: create batch with local ssd (GoogleCloudPlatform#11895)
* feat: create batch with local ssd * adding tests * correct naming and logging * fix: according to comments * Update batch/create/create_with_ssd.py Co-authored-by: Maciej Strzelczyk <strzelczyk@google.com> * fix: remove redundant line after accepting suggestion * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Maciej Strzelczyk <strzelczyk@google.com> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 36a3bb0 commit b96b935

File tree

2 files changed

+124
-1
lines changed

2 files changed

+124
-1
lines changed

batch/create/create_with_ssd.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# [START batch_create_local_ssd_job]
16+
from google.cloud import batch_v1
17+
18+
19+
def create_local_ssd_job(
20+
project_id: str, region: str, job_name: str, ssd_name: str
21+
) -> batch_v1.Job:
22+
"""
23+
This method shows how to create a sample Batch Job that will run
24+
a simple command on Cloud Compute instances with mounted local SSD.
25+
Note: local SSD does not guarantee Local SSD data persistence.
26+
More details here: https://cloud.google.com/compute/docs/disks/local-ssd#data_persistence
27+
28+
Args:
29+
project_id: project ID or project number of the Cloud project you want to use.
30+
region: name of the region you want to use to run the job. Regions that are
31+
available for Batch are listed on: https://cloud.google.com/batch/docs/get-started#locations
32+
job_name: the name of the job that will be created.
33+
It needs to be unique for each project and region pair.
34+
ssd_name: name of the local ssd to be mounted for your Job.
35+
36+
Returns:
37+
A job object representing the job created.
38+
"""
39+
client = batch_v1.BatchServiceClient()
40+
41+
# Define what will be done as part of the job.
42+
task = batch_v1.TaskSpec()
43+
runnable = batch_v1.Runnable()
44+
runnable.script = batch_v1.Runnable.Script()
45+
runnable.script.text = "echo Hello world! This is task ${BATCH_TASK_INDEX}. This job has a total of ${BATCH_TASK_COUNT} tasks."
46+
task.runnables = [runnable]
47+
task.max_retry_count = 2
48+
task.max_run_duration = "3600s"
49+
50+
volume = batch_v1.Volume()
51+
volume.device_name = ssd_name
52+
volume.mount_path = f"/mnt/disks/{ssd_name}"
53+
task.volumes = [volume]
54+
55+
# Tasks are grouped inside a job using TaskGroups.
56+
# Currently, it's possible to have only one task group.
57+
group = batch_v1.TaskGroup()
58+
group.task_count = 4
59+
group.task_spec = task
60+
61+
disk = batch_v1.AllocationPolicy.Disk()
62+
disk.type_ = "local-ssd"
63+
# The size of all the local SSDs in GB. Each local SSD is 375 GB,
64+
# so this value must be a multiple of 375 GB.
65+
# For example, for 2 local SSDs, set this value to 750 GB.
66+
disk.size_gb = 375
67+
assert disk.size_gb % 375 == 0
68+
69+
# Policies are used to define on what kind of virtual machines the tasks will run on.
70+
# The allowed number of local SSDs depends on the machine type for your job's VMs.
71+
# In this case, we tell the system to use "n1-standard-1" machine type, which require to attach local ssd manually.
72+
# Read more about local disks here: https://cloud.google.com/compute/docs/disks/local-ssd#lssd_disk_options
73+
policy = batch_v1.AllocationPolicy.InstancePolicy()
74+
policy.machine_type = "n1-standard-1"
75+
76+
attached_disk = batch_v1.AllocationPolicy.AttachedDisk()
77+
attached_disk.new_disk = disk
78+
attached_disk.device_name = ssd_name
79+
policy.disks = [attached_disk]
80+
81+
instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()
82+
instances.policy = policy
83+
84+
allocation_policy = batch_v1.AllocationPolicy()
85+
allocation_policy.instances = [instances]
86+
87+
job = batch_v1.Job()
88+
job.task_groups = [group]
89+
job.allocation_policy = allocation_policy
90+
job.labels = {"env": "testing", "type": "script"}
91+
# We use Cloud Logging as it's an out of the box available option
92+
job.logs_policy = batch_v1.LogsPolicy()
93+
job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING
94+
95+
create_request = batch_v1.CreateJobRequest()
96+
create_request.job = job
97+
create_request.job_id = job_name
98+
# The job's parent is the region in which the job will run
99+
create_request.parent = f"projects/{project_id}/locations/{region}"
100+
101+
return client.create_job(create_request)
102+
103+
104+
# [END batch_create_local_ssd_job]
105+
106+
if __name__ == "__main__":
107+
import google.auth
108+
109+
PROJECT = google.auth.default()[1]
110+
REGION = "europe-west4"
111+
job = create_local_ssd_job(PROJECT, REGION, "ssd-job-batch", "local-ssd-0")
112+
print(job)

batch/tests/test_basics.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from ..create.create_with_gpu_no_mounting import create_gpu_job
2929
from ..create.create_with_script_no_mounting import create_script_job
3030
from ..create.create_with_service_account import create_with_custom_service_account_job
31+
from ..create.create_with_ssd import create_local_ssd_job
3132

3233
from ..delete.delete_job import delete_job
3334
from ..get.get_job import get_job
@@ -39,7 +40,6 @@
3940
PROJECT = google.auth.default()[1]
4041
REGION = "europe-central2"
4142
ZONE = "europe-central2-b"
42-
4343
TIMEOUT = 600 # 10 minutes
4444

4545
WAIT_STATES = {
@@ -66,6 +66,11 @@ def service_account() -> str:
6666
return f"{project_number}-compute@developer.gserviceaccount.com"
6767

6868

69+
@pytest.fixture
70+
def disk_name():
71+
return f"test-ssd-{uuid.uuid4().hex[:10]}"
72+
73+
6974
def _test_body(test_job: batch_v1.Job, additional_test: Callable = None, region=REGION):
7075
start_time = time.time()
7176
try:
@@ -144,3 +149,9 @@ def test_service_account_job(job_name, service_account):
144149
_test_body(
145150
job, additional_test=lambda: _check_service_account(job, service_account)
146151
)
152+
153+
154+
@flaky(max_runs=3, min_passes=1)
155+
def test_ssd_job(job_name: str, disk_name: str, capsys: "pytest.CaptureFixture[str]"):
156+
job = create_local_ssd_job(PROJECT, REGION, job_name, disk_name)
157+
_test_body(job, additional_test=lambda: _check_logs(job, capsys))

0 commit comments

Comments
 (0)