Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions generative_ai/list_tuned_code_generation_models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_sdk_list_tuned_code_generation_models]

import vertexai
from vertexai.preview.language_models import CodeGenerationModel


def list_tuned_code_generation_models(
project_id: str,
location: str,
) -> None:
"""List tuned models."""
vertexai.init(project=project_id, location=location)
model = CodeGenerationModel.from_pretrained("code-bison@001")
tuned_model_names = model.list_tuned_model_names()
print(tuned_model_names)
# [END aiplatform_sdk_list_tuned_code_generation_models]
return tuned_model_names


if __name__ == "__main__":
list_tuned_code_generation_models()
42 changes: 42 additions & 0 deletions generative_ai/list_tuned_code_generation_models_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import backoff
from google.api_core.exceptions import ResourceExhausted
from google.cloud import aiplatform

import list_tuned_code_generation_models


_PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
_LOCATION = "us-central1"


@backoff.on_exception(backoff.expo, ResourceExhausted, max_time=10)
def test_list_tuned_code_generation_models() -> None:
tuned_model_names = list_tuned_code_generation_models.list_tuned_code_generation_models(
_PROJECT_ID,
_LOCATION,
)
filtered_models_counter = 0
for tuned_model_name in tuned_model_names:
model_registry = aiplatform.models.ModelRegistry(model=tuned_model_name)
if (
"Vertex LLM Test Fixture "
"(list_tuned_models_test.py::test_list_tuned_models)"
) in model_registry.get_version_info("1").model_display_name:
filtered_models_counter += 1
assert filtered_models_counter == 0
2 changes: 1 addition & 1 deletion generative_ai/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pandas==1.3.5; python_version == '3.7'
pandas==2.0.1; python_version > '3.7'
google-cloud-aiplatform[pipelines]==1.28.1
google-cloud-aiplatform[pipelines]==1.29.0
google-auth==2.17.3
70 changes: 70 additions & 0 deletions generative_ai/tune_code_generation_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# [START aiplatform_sdk_tune_code_generation_model]
from __future__ import annotations


from google.auth import default
import pandas as pd
import vertexai
from vertexai.preview.language_models import CodeGenerationModel

credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"])


def tune_code_generation_model(
project_id: str,
location: str,
training_data: pd.DataFrame | str,
train_steps: int = 300,
) -> None:
"""Tune a new model, based on a prompt-response data.

"training_data" can be either the GCS URI of a file formatted in JSONL format
(for example: training_data=f'gs://{bucket}/{filename}.jsonl'), or a pandas
DataFrame. Each training example should be JSONL record with two keys, for
example:
{
"input_text": <input prompt>,
"output_text": <associated output>
},
or the pandas DataFame should contain two columns:
['input_text', 'output_text']
with rows for each training example.

Args:
project_id: GCP Project ID, used to initialize vertexai
location: GCP Region, used to initialize vertexai
training_data: GCS URI of jsonl file or pandas dataframe of training data
train_steps: Number of training steps to use when tuning the model.
"""
vertexai.init(project=project_id, location=location, credentials=credentials)
model = CodeGenerationModel.from_pretrained("code-bison@001")

model.tune_model(
training_data=training_data,
# Optional:
train_steps=train_steps,
tuning_job_location="europe-west4",
tuned_model_location=location,
)

print(model._job.status)
# [END aiplatform_sdk_tune_code_generation_model]
return model


if __name__ == "__main__":
tune_code_generation_model()
107 changes: 107 additions & 0 deletions generative_ai/tune_code_generation_model_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import uuid

from google.cloud import aiplatform
from google.cloud import storage
from google.cloud.aiplatform.compat.types import pipeline_state
import pytest
from vertexai.preview.language_models import TextGenerationModel

import tune_code_generation_model

_PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
_LOCATION = "us-central1"
_BUCKET = os.environ["CLOUD_STORAGE_BUCKET"]


def get_model_display_name(tuned_model: TextGenerationModel) -> str:
language_model_tuning_job = tuned_model._job
pipeline_job = language_model_tuning_job._job
return dict(pipeline_job._gca_resource.runtime_config.parameter_values)[
"model_display_name"
]


def upload_to_gcs(bucket: str, name: str, data: str) -> None:
client = storage.Client()
bucket = client.get_bucket(bucket)
blob = bucket.blob(name)
blob.upload_from_string(data)


def download_from_gcs(bucket: str, name: str) -> str:
client = storage.Client()
bucket = client.get_bucket(bucket)
blob = bucket.blob(name)
data = blob.download_as_bytes()
return "\n".join(data.decode().splitlines()[:10])


def delete_from_gcs(bucket: str, name: str) -> None:
client = storage.Client()
bucket = client.get_bucket(bucket)
blob = bucket.blob(name)
blob.delete()


@pytest.fixture(scope="function")
def training_data_filename() -> str:
temp_filename = f"{uuid.uuid4()}.jsonl"
data = download_from_gcs(
"cloud-samples-data", "ai-platform/generative_ai/headline_classification.jsonl"
)
upload_to_gcs(_BUCKET, temp_filename, data)
try:
yield f"gs://{_BUCKET}/{temp_filename}"
finally:
delete_from_gcs(_BUCKET, temp_filename)


def teardown_model(
tuned_model: TextGenerationModel, training_data_filename: str
) -> None:
for tuned_model_name in tuned_model.list_tuned_model_names():
model_registry = aiplatform.models.ModelRegistry(model=tuned_model_name)
if (
training_data_filename
in model_registry.get_version_info("1").model_display_name
):
display_name = model_registry.get_version_info("1").model_display_name
for endpoint in aiplatform.Endpoint.list():
for _ in endpoint.list_models():
if endpoint.display_name == display_name:
endpoint.undeploy_all()
endpoint.delete()
aiplatform.Model(model_registry.model_resource_name).delete()


@pytest.mark.skip("Blocked on b/277959219")
def test_tuning_code_generation_model(training_data_filename: str) -> None:
"""Takes approx. 20 minutes."""
tuned_model = tune_code_generation_model.tune_code_generation_model(
training_data=training_data_filename,
project_id=_PROJECT_ID,
location=_LOCATION,
train_steps=1,
)
try:
assert (
tuned_model._job.status
== pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED
)
finally:
teardown_model(tuned_model, training_data_filename)