@@ -1515,7 +1515,6 @@ def from_local_script(
15151515 staging_bucket = staging_bucket ,
15161516 )
15171517
1518- @base .optional_sync ()
15191518 def run (
15201519 self ,
15211520 service_account : Optional [str ] = None ,
@@ -1537,7 +1536,8 @@ def run(
15371536 Optional. The full name of the Compute Engine network to which the job
15381537 should be peered. For example, projects/12345/global/networks/myVPC.
15391538 Private services access must already be configured for the network.
1540- If left unspecified, the job is not peered with any network.
1539+ If left unspecified, the network set in aiplatform.init will be used.
1540+ Otherwise, the job is not peered with any network.
15411541 timeout (int):
15421542 The maximum job running time in seconds. The default is 7 days.
15431543 restart_job_on_worker_restart (bool):
@@ -1570,7 +1570,73 @@ def run(
15701570 create_request_timeout (float):
15711571 Optional. The timeout for the create request in seconds.
15721572 """
1573+ network = network or initializer .global_config .network
1574+
1575+ self ._run (
1576+ service_account = service_account ,
1577+ network = network ,
1578+ timeout = timeout ,
1579+ restart_job_on_worker_restart = restart_job_on_worker_restart ,
1580+ enable_web_access = enable_web_access ,
1581+ tensorboard = tensorboard ,
1582+ sync = sync ,
1583+ create_request_timeout = create_request_timeout ,
1584+ )
1585+
1586+ @base .optional_sync ()
1587+ def _run (
1588+ self ,
1589+ service_account : Optional [str ] = None ,
1590+ network : Optional [str ] = None ,
1591+ timeout : Optional [int ] = None ,
1592+ restart_job_on_worker_restart : bool = False ,
1593+ enable_web_access : bool = False ,
1594+ tensorboard : Optional [str ] = None ,
1595+ sync : bool = True ,
1596+ create_request_timeout : Optional [float ] = None ,
1597+ ) -> None :
1598+ """Helper method to ensure network synchronization and to run the configured CustomJob.
1599+
1600+ Args:
1601+ service_account (str):
1602+ Optional. Specifies the service account for workload run-as account.
1603+ Users submitting jobs must have act-as permission on this run-as account.
1604+ network (str):
1605+ Optional. The full name of the Compute Engine network to which the job
1606+ should be peered. For example, projects/12345/global/networks/myVPC.
1607+ Private services access must already be configured for the network.
1608+ timeout (int):
1609+ The maximum job running time in seconds. The default is 7 days.
1610+ restart_job_on_worker_restart (bool):
1611+ Restarts the entire CustomJob if a worker
1612+ gets restarted. This feature can be used by
1613+ distributed training jobs that are not resilient
1614+ to workers leaving and joining a job.
1615+ enable_web_access (bool):
1616+ Whether you want Vertex AI to enable interactive shell access
1617+ to training containers.
1618+ https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell
1619+ tensorboard (str):
1620+ Optional. The name of a Vertex AI
1621+ [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard]
1622+ resource to which this CustomJob will upload Tensorboard
1623+ logs. Format:
1624+ ``projects/{project}/locations/{location}/tensorboards/{tensorboard}``
15731625
1626+ The training script should write Tensorboard to following Vertex AI environment
1627+ variable:
1628+
1629+ AIP_TENSORBOARD_LOG_DIR
1630+
1631+ `service_account` is required with provided `tensorboard`.
1632+ For more information on configuring your service account please visit:
1633+ https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training
1634+ sync (bool):
1635+ Whether to execute this method synchronously. If False, this method
1636+ will unblock and it will be executed in a concurrent Future.
1637+ create_request_timeout (float):
1638+ Optional. The timeout for the create request in seconds.
1639+ """
15741640 if service_account :
15751641 self ._gca_resource .job_spec .service_account = service_account
15761642
@@ -1907,7 +1973,6 @@ def _log_web_access_uris(self):
19071973 )
19081974 self ._logged_web_access_uris .add (uri )
19091975
1910- @base .optional_sync ()
19111976 def run (
19121977 self ,
19131978 service_account : Optional [str ] = None ,
@@ -1929,7 +1994,8 @@ def run(
19291994 Optional. The full name of the Compute Engine network to which the job
19301995 should be peered. For example, projects/12345/global/networks/myVPC.
19311996 Private services access must already be configured for the network.
1932- If left unspecified, the job is not peered with any network.
1997+ If left unspecified, the network set in aiplatform.init will be used.
1998+ Otherwise, the job is not peered with any network.
19331999 timeout (int):
19342000 Optional. The maximum job running time in seconds. The default is 7 days.
19352001 restart_job_on_worker_restart (bool):
@@ -1962,7 +2028,73 @@ def run(
19622028 create_request_timeout (float):
19632029 Optional. The timeout for the create request in seconds.
19642030 """
2031+ network = network or initializer .global_config .network
2032+
2033+ self ._run (
2034+ service_account = service_account ,
2035+ network = network ,
2036+ timeout = timeout ,
2037+ restart_job_on_worker_restart = restart_job_on_worker_restart ,
2038+ enable_web_access = enable_web_access ,
2039+ tensorboard = tensorboard ,
2040+ sync = sync ,
2041+ create_request_timeout = create_request_timeout ,
2042+ )
2043+
2044+ @base .optional_sync ()
2045+ def _run (
2046+ self ,
2047+ service_account : Optional [str ] = None ,
2048+ network : Optional [str ] = None ,
2049+ timeout : Optional [int ] = None , # seconds
2050+ restart_job_on_worker_restart : bool = False ,
2051+ enable_web_access : bool = False ,
2052+ tensorboard : Optional [str ] = None ,
2053+ sync : bool = True ,
2054+ create_request_timeout : Optional [float ] = None ,
2055+ ) -> None :
2056+ """Helper method to ensure network synchronization and to run the configured CustomJob.
2057+
2058+ Args:
2059+ service_account (str):
2060+ Optional. Specifies the service account for workload run-as account.
2061+ Users submitting jobs must have act-as permission on this run-as account.
2062+ network (str):
2063+ Optional. The full name of the Compute Engine network to which the job
2064+ should be peered. For example, projects/12345/global/networks/myVPC.
2065+ Private services access must already be configured for the network.
2066+ timeout (int):
2067+ Optional. The maximum job running time in seconds. The default is 7 days.
2068+ restart_job_on_worker_restart (bool):
2069+ Restarts the entire CustomJob if a worker
2070+ gets restarted. This feature can be used by
2071+ distributed training jobs that are not resilient
2072+ to workers leaving and joining a job.
2073+ enable_web_access (bool):
2074+ Whether you want Vertex AI to enable interactive shell access
2075+ to training containers.
2076+ https://cloud.google.com/vertex-ai/docs/training/monitor-debug-interactive-shell
2077+ tensorboard (str):
2078+ Optional. The name of a Vertex AI
2079+ [Tensorboard][google.cloud.aiplatform.v1beta1.Tensorboard]
2080+ resource to which this CustomJob will upload Tensorboard
2081+ logs. Format:
2082+ ``projects/{project}/locations/{location}/tensorboards/{tensorboard}``
19652083
2084+ The training script should write Tensorboard to following Vertex AI environment
2085+ variable:
2086+
2087+ AIP_TENSORBOARD_LOG_DIR
2088+
2089+ `service_account` is required with provided `tensorboard`.
2090+ For more information on configuring your service account please visit:
2091+ https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-training
2092+ sync (bool):
2093+ Whether to execute this method synchronously. If False, this method
2094+ will unblock and it will be executed in a concurrent Future.
2095+ create_request_timeout (float):
2096+ Optional. The timeout for the create request in seconds.
2097+ """
19662098 if service_account :
19672099 self ._gca_resource .trial_job_spec .service_account = service_account
19682100
0 commit comments