From 6ae4f26fd262cc04d742f6243abb72da3514fa9a Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 2 Apr 2024 16:44:58 -0500 Subject: [PATCH 01/19] Add Rag Sample --- generative_ai/rag.py | 258 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 generative_ai/rag.py diff --git a/generative_ai/rag.py b/generative_ai/rag.py new file mode 100644 index 00000000000..7631af8468b --- /dev/null +++ b/generative_ai/rag.py @@ -0,0 +1,258 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# [START generativeaionvertexai_rag_create_corpus] +# [START generativeaionvertexai_rag_get_corpus] +# [START generativeaionvertexai_rag_list_corpora] +# [START generativeaionvertexai_rag_upload_file] +# [START generativeaionvertexai_rag_import_files] +# [START generativeaionvertexai_rag_get_file] +# [START generativeaionvertexai_rag_list_files] +# [START generativeaionvertexai_rag_delete_file] +# [START generativeaionvertexai_rag_delete_corpus] +# [START generativeaionvertexai_rag_retrieval_query] +# [START generativeaionvertexai_rag_generate_content] + +from typing import List, Union, Optional +import vertexai + +from google.cloud.aiplatform.private_preview.vertex_rag import rag + +# [END generativeaionvertexai_rag_create_corpus] +# [END generativeaionvertexai_rag_get_corpus] +# [END generativeaionvertexai_rag_list_corpora] +# [END generativeaionvertexai_rag_upload_file] +# [END generativeaionvertexai_rag_import_files] +# [END generativeaionvertexai_rag_get_file] +# [END generativeaionvertexai_rag_list_files] +# [END generativeaionvertexai_rag_delete_file] +# [END generativeaionvertexai_rag_delete_corpus] +# [END generativeaionvertexai_rag_retrieval_query] +# [END generativeaionvertexai_rag_generate_content] + + +# [START generativeaionvertexai_rag_create_corpus] +def create_corpus( + project_id: str, + location: str, + display_name: Optional[str] = None, + description: Optional[str] = None, +): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + corpus = rag.create_corpus(display_name=display_name, description=description) + print(corpus) + return corpus + + +# [END generativeaionvertexai_rag_create_corpus] + + +# [START generativeaionvertexai_rag_get_corpus] +def get_corpus(project_id: str, location: str, corpus_name: str): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + corpus = rag.get_corpus(name=corpus_name) + print(corpus) + return corpus + + +# [END generativeaionvertexai_rag_get_corpus] + + +# [START generativeaionvertexai_rag_list_corpora] +def list_corpora(project_id: str, location: str): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + corpora = rag.list_corpora() + print(corpora) + return corpora + + +# [END generativeaionvertexai_rag_list_corpora] + + +# [START generativeaionvertexai_rag_upload_file] +def upload_file( + project_id: str, + location: str, + corpus_name: str, + path: str, + display_name: Optional[str] = None, + description: Optional[str] = None, +): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + rag_file = rag.upload_file( + corpus_name=corpus_name, + path=path, + display_name=display_name, + description=description, + ) + print(rag_file) + return rag_file + + +# [END generativeaionvertexai_rag_upload_file] + + +# [START generativeaionvertexai_rag_import_files] +def import_files( + project_id: str, + location: str, + corpus_name: str, + path: Union[str, List[str]], + chunk_size: Optional[int] = 1024, + chunk_overlap: Optional[int] = 200, +): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + response = rag.import_files( + corpus_name=corpus_name, + path=path, + chunk_size=chunk_size, + chunk_overlap=chunk_overlap, + ) + print(f"Imported {response.imported_rag_files_count} files.") + return response + + +# [END generativeaionvertexai_rag_import_files] + + +# [START generativeaionvertexai_rag_get_file] +def get_file(project_id: str, location: str, file_name: str): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + rag_file = rag.get_file(name=file_name) + print(rag_file) + return rag_file + + +# [END generativeaionvertexai_rag_get_file] + + +# [START generativeaionvertexai_rag_list_files] +def list_files(project_id: str, location: str, corpus_name: str): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + files = rag.list_files(corpus_name=corpus_name) + for file in files: + print(file) + return files + + +# [END generativeaionvertexai_rag_list_files] + + +# [START generativeaionvertexai_rag_delete_file] +def delete_file(project_id: str, location: str, file_name: str): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + rag.delete_file(name=file_name) + print(f"File {file_name} deleted.") + + +# [END generativeaionvertexai_rag_delete_file] + + +# [START generativeaionvertexai_rag_delete_corpus] +def delete_corpus(project_id: str, location: str, corpus_name: str): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + rag.delete_corpus(name=corpus_name) + print(f"Corpus {corpus_name} deleted.") + + +# [END generativeaionvertexai_rag_delete_corpus] + + +# [START generativeaionvertexai_rag_retrieval_query] +def retrieval_query( + project_id: str, + location: str, + rag_corpus: str, + text: str, + similarity_top_k: Optional[int] = 10, +): + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + response = rag.retrieval_query( + rag_corpora=rag_corpus, text=text, similarity_top_k=similarity_top_k + ) + print(response) + return response + + +# [END generativeaionvertexai_rag_retrieval_query] + + +# [START generativeaionvertexai_rag_generate_content] +from vertexai.preview.generative_models import GenerativeModel, Tool +from typing import List + + +def generate_content_with_rag( + project_id: str, location: str, corpus_display_name: str, paths: List[str] +): + """ + Creates and loads a RAG Corpus and generates text using Gemini. + + Args: + project_id (str): The project ID for Vertex AI. + location (str): The location for Vertex AI resources. + corpus_display_name (str): The display name for the corpus to be created. + paths (List[str]): List of file paths to import into the corpus. + Supports GCS URIs `gs://my-bucket/my_file` and Google Drive URLs + `https://drive.google.com/file/123` + """ + # Initialize Vertex AI + vertexai.init(project=project_id, location=location) + + corpus = rag.create_corpus(display_name=corpus_display_name) + + response = rag.import_files( + corpus.name, + paths, + chunk_size=1024, + chunk_overlap=200, + ) + + rag_retrieval_tool = Tool.from_retrieval( + retrieval=rag.Retrieval( + source=rag.VertexRagStore( + rag_corpora=corpus.name, # Only 1 corpus is allowed. + similarity_top_k=3, + ), + ) + ) + + rag_model = GenerativeModel("gemini-1.0-pro", tools=[rag_retrieval_tool]) + response = rag_model.generate_content("Why is the sky blue?") + print(response.text) + return response + + +# [END generativeaionvertexai_rag_generate_content] From c8ab44a1e1f9cd6144e6ac481cde62e7df3d445f Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 3 Apr 2024 10:15:43 -0500 Subject: [PATCH 02/19] Fix import - init rag_test --- generative_ai/rag.py | 1 - generative_ai/rag_test.py | 145 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 145 insertions(+), 1 deletion(-) create mode 100644 generative_ai/rag_test.py diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 7631af8468b..8d79ff1a486 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -211,7 +211,6 @@ def retrieval_query( # [START generativeaionvertexai_rag_generate_content] from vertexai.preview.generative_models import GenerativeModel, Tool -from typing import List def generate_content_with_rag( diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py new file mode 100644 index 00000000000..93ca58e001e --- /dev/null +++ b/generative_ai/rag_test.py @@ -0,0 +1,145 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +import vertexai + +import gemini_chat_example +import gemini_count_token_example +import gemini_grounding_example +import gemini_guide_example +import gemini_multi_image_example +import gemini_pro_basic_example +import gemini_pro_config_example +import gemini_safety_config_example +import gemini_single_turn_video_example + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +LOCATION = "us-central1" + +vertexai.init(project=PROJECT_ID, location=LOCATION) + + +def test_gemini_guide_example() -> None: + text = gemini_guide_example.generate_text(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert "scones" in text + + +def test_gemini_pro_basic_example() -> None: + text = gemini_pro_basic_example.generate_text(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert "recipe" in text or "ingredients" in text or "table" in text + + +def test_gemini_pro_config_example() -> None: + import urllib.request + + # download the image + fname = "scones.jpg" + url = "https://storage.googleapis.com/generativeai-downloads/images/scones.jpg" + urllib.request.urlretrieve(url, fname) + + if os.path.isfile(fname): + text = gemini_pro_config_example.generate_text(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert "recipe" in text or "table" in text + + # clean-up + os.remove(fname) + else: + raise Exception("File(scones.jpg) not found!") + + +def test_gemini_multi_image_example() -> None: + text = gemini_multi_image_example.generate_text_multimodal(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert "city" in text + assert "landmark" in text + + +def test_gemini_count_token_example() -> None: + text = gemini_count_token_example.generate_text(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert "sky" in text + + +def test_gemini_safety_config_example() -> None: + import http + import typing + import urllib + + from vertexai.preview.generative_models import Image + + def load_image_from_url(image_url: str) -> str: + with urllib.request.urlopen(image_url) as response: + response = typing.cast(http.client.HTTPResponse, response) + image_bytes = response.read() + return Image.from_bytes(image_bytes) + + # import base64 + + # base64_image_data = base64.b64encode( + # open('scones.jpg', 'rb').read()).decode("utf-8") + # image = generative_models.Part.from_data( + # data=base64.b64decode(base64_image_data), mime_type="image/png") + image = load_image_from_url( + "https://storage.googleapis.com/generativeai-downloads/images/scones.jpg" + ) + + vertexai.init(project=PROJECT_ID, location=LOCATION) + text = gemini_safety_config_example.generate_text(PROJECT_ID, LOCATION, image) + text = text.lower() + assert len(text) > 0 + assert any( + [_ in text for _ in ("scone", "blueberry", "coffee,", "flower", "table")] + ) + + +def test_gemini_single_turn_video_example() -> None: + text = gemini_single_turn_video_example.generate_text(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert any([_ in text for _ in ("zoo", "tiger", "leaf", "water")]) + + +def test_gemini_chat_example() -> None: + text = gemini_chat_example.chat_text_example(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert any([_ in text for _ in ("hi", "hello", "greeting")]) + + text = gemini_chat_example.chat_stream_example(PROJECT_ID, LOCATION) + text = text.lower() + assert len(text) > 0 + assert any([_ in text for _ in ("hi", "hello", "greeting")]) + + +@pytest.mark.skip( + "Unable to test Google Search grounding due to allowlist restrictions." +) +def test_gemini_grounding_example() -> None: + data_store_id = "test-search-engine_1689960780551" + data_store_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/default_collection/dataStores/{data_store_id}" + response = gemini_grounding_example.generate_text_with_grounding( + PROJECT_ID, LOCATION, data_store_path=data_store_path + ) + assert response From 24eed1bd9f3c8a30c04414a9080099ba2531d0a6 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 16 Apr 2024 17:31:24 +0000 Subject: [PATCH 03/19] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- generative_ai/embedding_model_tuning.py | 13 ++++++++----- generative_ai/embedding_model_tuning_test.py | 9 ++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/generative_ai/embedding_model_tuning.py b/generative_ai/embedding_model_tuning.py index 21f357cffed..55f58d17943 100644 --- a/generative_ai/embedding_model_tuning.py +++ b/generative_ai/embedding_model_tuning.py @@ -32,7 +32,7 @@ def tune_embedding_model( train_label_path: str = "gs://embedding-customization-pipeline/dataset/train.tsv", test_label_path: str = "gs://embedding-customization-pipeline/dataset/test.tsv", batch_size: int = 50, - iterations: int = 300 + iterations: int = 300, ) -> pipeline_jobs.PipelineJob: match = re.search(r"(.+)(-autopush|-staging)?-aiplatform.+", api_endpoint) location = match.group(1) if match else "us-central1" @@ -50,7 +50,8 @@ def tune_embedding_model( train_label_path=train_label_path, test_label_path=test_label_path, batch_size=batch_size, - iterations=iterations) + iterations=iterations, + ), ) job.submit() return job @@ -58,6 +59,8 @@ def tune_embedding_model( # [END aiplatform_sdk_embedding] if __name__ == "__main__": - tune_embedding_model(aiplatform_init.global_config.api_endpoint, - aiplatform_init.global_config.project, - aiplatform_init.global_config.staging_bucket) + tune_embedding_model( + aiplatform_init.global_config.api_endpoint, + aiplatform_init.global_config.project, + aiplatform_init.global_config.staging_bucket, + ) diff --git a/generative_ai/embedding_model_tuning_test.py b/generative_ai/embedding_model_tuning_test.py index 09eeea75647..7ddb6154fd4 100644 --- a/generative_ai/embedding_model_tuning_test.py +++ b/generative_ai/embedding_model_tuning_test.py @@ -35,16 +35,19 @@ def dispose(job: pipeline_jobs.PipelineJob) -> None: def test_tune_embedding_model() -> None: credentials, _ = google.auth.default( # Set explicit credentials with Oauth scopes. - scopes=["https://www.googleapis.com/auth/cloud-platform"]) + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) aiplatform.init( api_endpoint="us-central1-aiplatform.googleapis.com:443", project=os.getenv("GOOGLE_CLOUD_PROJECT"), staging_bucket="gs://ucaip-samples-us-central1/training_pipeline_output", - credentials=credentials) + credentials=credentials, + ) job = embedding_model_tuning.tune_embedding_model( aiplatform_init.global_config.api_endpoint, aiplatform_init.global_config.project, - aiplatform_init.global_config.staging_bucket) + aiplatform_init.global_config.staging_bucket, + ) try: assert job.state != "PIPELINE_STATE_FAILED" finally: From cfdba65f5625eecdb3a58510382c122378a168c1 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 16 Apr 2024 15:42:12 -0500 Subject: [PATCH 04/19] Change import order --- generative_ai/rag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 8d79ff1a486..8fb9814fca3 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -26,9 +26,9 @@ # [START generativeaionvertexai_rag_generate_content] from typing import List, Union, Optional -import vertexai from google.cloud.aiplatform.private_preview.vertex_rag import rag +import vertexai # [END generativeaionvertexai_rag_create_corpus] # [END generativeaionvertexai_rag_get_corpus] From dd6c0228f157856fd492b8c8420ec4e3877f4e44 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 16 Apr 2024 15:42:18 -0500 Subject: [PATCH 05/19] Update vertexai library version --- generative_ai/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generative_ai/requirements.txt b/generative_ai/requirements.txt index 975c55934e1..a242ab49c63 100644 --- a/generative_ai/requirements.txt +++ b/generative_ai/requirements.txt @@ -2,6 +2,6 @@ pandas==1.3.5; python_version == '3.7' pandas==2.0.1; python_version > '3.7' pillow==9.5.0; python_version < '3.8' pillow==10.0.1; python_version >= '3.8' -google-cloud-aiplatform[pipelines]==1.42.1 +google-cloud-aiplatform[pipelines]==1.47.0 google-auth==2.17.3 anthropic[vertex]==0.21.3 From e60421467ca470177ad0a4765cc2079534849d17 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Tue, 16 Apr 2024 15:44:06 -0500 Subject: [PATCH 06/19] Move rag tags --- generative_ai/rag.py | 39 +++++++++++++-------------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 8fb9814fca3..f7b7ef4a84d 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -55,12 +55,10 @@ def create_corpus( corpus = rag.create_corpus(display_name=display_name, description=description) print(corpus) + # [END generativeaionvertexai_rag_create_corpus] return corpus -# [END generativeaionvertexai_rag_create_corpus] - - # [START generativeaionvertexai_rag_get_corpus] def get_corpus(project_id: str, location: str, corpus_name: str): # Initialize Vertex AI @@ -68,12 +66,10 @@ def get_corpus(project_id: str, location: str, corpus_name: str): corpus = rag.get_corpus(name=corpus_name) print(corpus) + # [END generativeaionvertexai_rag_get_corpus] return corpus -# [END generativeaionvertexai_rag_get_corpus] - - # [START generativeaionvertexai_rag_list_corpora] def list_corpora(project_id: str, location: str): # Initialize Vertex AI @@ -81,12 +77,10 @@ def list_corpora(project_id: str, location: str): corpora = rag.list_corpora() print(corpora) + # [END generativeaionvertexai_rag_list_corpora] return corpora -# [END generativeaionvertexai_rag_list_corpora] - - # [START generativeaionvertexai_rag_upload_file] def upload_file( project_id: str, @@ -106,12 +100,10 @@ def upload_file( description=description, ) print(rag_file) + # [END generativeaionvertexai_rag_upload_file] return rag_file -# [END generativeaionvertexai_rag_upload_file] - - # [START generativeaionvertexai_rag_import_files] def import_files( project_id: str, @@ -131,12 +123,10 @@ def import_files( chunk_overlap=chunk_overlap, ) print(f"Imported {response.imported_rag_files_count} files.") + # [END generativeaionvertexai_rag_import_files] return response -# [END generativeaionvertexai_rag_import_files] - - # [START generativeaionvertexai_rag_get_file] def get_file(project_id: str, location: str, file_name: str): # Initialize Vertex AI @@ -144,10 +134,9 @@ def get_file(project_id: str, location: str, file_name: str): rag_file = rag.get_file(name=file_name) print(rag_file) - return rag_file + # [END generativeaionvertexai_rag_get_file] - -# [END generativeaionvertexai_rag_get_file] + return rag_file # [START generativeaionvertexai_rag_list_files] @@ -158,14 +147,13 @@ def list_files(project_id: str, location: str, corpus_name: str): files = rag.list_files(corpus_name=corpus_name) for file in files: print(file) - return files - + # [END generativeaionvertexai_rag_list_files] -# [END generativeaionvertexai_rag_list_files] + return files # [START generativeaionvertexai_rag_delete_file] -def delete_file(project_id: str, location: str, file_name: str): +def delete_file(project_id: str, location: str, file_name: str) -> None: # Initialize Vertex AI vertexai.init(project=project_id, location=location) @@ -177,7 +165,7 @@ def delete_file(project_id: str, location: str, file_name: str): # [START generativeaionvertexai_rag_delete_corpus] -def delete_corpus(project_id: str, location: str, corpus_name: str): +def delete_corpus(project_id: str, location: str, corpus_name: str) -> None: # Initialize Vertex AI vertexai.init(project=project_id, location=location) @@ -251,7 +239,6 @@ def generate_content_with_rag( rag_model = GenerativeModel("gemini-1.0-pro", tools=[rag_retrieval_tool]) response = rag_model.generate_content("Why is the sky blue?") print(response.text) - return response + # [END generativeaionvertexai_rag_generate_content] - -# [END generativeaionvertexai_rag_generate_content] + return response From 188efd65e2b802d4153474e6078b7110ab084656 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 17 Apr 2024 10:27:36 -0500 Subject: [PATCH 07/19] Add real tests for RAG --- generative_ai/rag_test.py | 147 +++++++++++++------------------------- 1 file changed, 51 insertions(+), 96 deletions(-) diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py index 93ca58e001e..4cd8ba1e2ff 100644 --- a/generative_ai/rag_test.py +++ b/generative_ai/rag_test.py @@ -1,4 +1,4 @@ -# Copyright 2023 Google LLC +# Copyright 2024 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,129 +17,84 @@ import pytest import vertexai -import gemini_chat_example -import gemini_count_token_example -import gemini_grounding_example -import gemini_guide_example -import gemini_multi_image_example -import gemini_pro_basic_example -import gemini_pro_config_example -import gemini_safety_config_example -import gemini_single_turn_video_example +import rag PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") LOCATION = "us-central1" +CORPUS_NAME = "test_corpus" +FILE_PATH = "./hello_world.txt" # Replace with a valid file path vertexai.init(project=PROJECT_ID, location=LOCATION) -def test_gemini_guide_example() -> None: - text = gemini_guide_example.generate_text(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert "scones" in text +@pytest.fixture(scope="module") +def test_file(): + file_path = "./hello.txt" + file_path.write_text("Hello World") + yield file_path + file_path.unlink() # Delete the file after tests -def test_gemini_pro_basic_example() -> None: - text = gemini_pro_basic_example.generate_text(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert "recipe" in text or "ingredients" in text or "table" in text +@pytest.fixture(scope="module") +def test_corpus(): + """Creates a corpus for testing and deletes it after tests are complete.""" + corpus = rag.create_corpus(PROJECT_ID, LOCATION, CORPUS_NAME) + yield corpus + rag.delete_corpus(PROJECT_ID, LOCATION, corpus.name) -def test_gemini_pro_config_example() -> None: - import urllib.request +@pytest.fixture +def uploaded_file(test_corpus): + """Uploads a file to the corpus and deletes it after the test.""" + rag_file = rag.upload_file(PROJECT_ID, LOCATION, test_corpus.name, test_file) + yield rag_file + rag.delete_file(PROJECT_ID, LOCATION, rag_file.name) - # download the image - fname = "scones.jpg" - url = "https://storage.googleapis.com/generativeai-downloads/images/scones.jpg" - urllib.request.urlretrieve(url, fname) - if os.path.isfile(fname): - text = gemini_pro_config_example.generate_text(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert "recipe" in text or "table" in text +def test_create_corpus(): + corpus = rag.create_corpus(PROJECT_ID, LOCATION, "test_create_corpus") + assert corpus.display_name == "test_create_corpus" + rag.delete_corpus(PROJECT_ID, LOCATION, corpus.name) - # clean-up - os.remove(fname) - else: - raise Exception("File(scones.jpg) not found!") +def test_get_corpus(test_corpus): + retrieved_corpus = rag.get_corpus(PROJECT_ID, LOCATION, test_corpus.name) + assert retrieved_corpus.name == test_corpus.name -def test_gemini_multi_image_example() -> None: - text = gemini_multi_image_example.generate_text_multimodal(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert "city" in text - assert "landmark" in text +def test_list_corpora(): + corpora = rag.list_corpora(PROJECT_ID, LOCATION) + assert any(c.name == test_corpus.name for c in corpora) -def test_gemini_count_token_example() -> None: - text = gemini_count_token_example.generate_text(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert "sky" in text +def test_upload_file(test_corpus): + rag_file = rag.upload_file(PROJECT_ID, LOCATION, test_corpus.name, test_file) + assert rag_file -def test_gemini_safety_config_example() -> None: - import http - import typing - import urllib - from vertexai.preview.generative_models import Image +def test_import_files(test_corpus): + response = rag.import_files(PROJECT_ID, LOCATION, test_corpus.name, test_file) + assert response.imported_rag_files_count > 0 - def load_image_from_url(image_url: str) -> str: - with urllib.request.urlopen(image_url) as response: - response = typing.cast(http.client.HTTPResponse, response) - image_bytes = response.read() - return Image.from_bytes(image_bytes) - # import base64 +def test_get_file(uploaded_file): + retrieved_file = rag.get_file(PROJECT_ID, LOCATION, uploaded_file.name) + assert retrieved_file.name == uploaded_file.name - # base64_image_data = base64.b64encode( - # open('scones.jpg', 'rb').read()).decode("utf-8") - # image = generative_models.Part.from_data( - # data=base64.b64decode(base64_image_data), mime_type="image/png") - image = load_image_from_url( - "https://storage.googleapis.com/generativeai-downloads/images/scones.jpg" - ) - - vertexai.init(project=PROJECT_ID, location=LOCATION) - text = gemini_safety_config_example.generate_text(PROJECT_ID, LOCATION, image) - text = text.lower() - assert len(text) > 0 - assert any( - [_ in text for _ in ("scone", "blueberry", "coffee,", "flower", "table")] - ) - - -def test_gemini_single_turn_video_example() -> None: - text = gemini_single_turn_video_example.generate_text(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert any([_ in text for _ in ("zoo", "tiger", "leaf", "water")]) +def test_list_files(test_corpus, uploaded_file): + files = rag.list_files(PROJECT_ID, LOCATION, test_corpus.name) + assert any(f.name == uploaded_file.name for f in files) -def test_gemini_chat_example() -> None: - text = gemini_chat_example.chat_text_example(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert any([_ in text for _ in ("hi", "hello", "greeting")]) - text = gemini_chat_example.chat_stream_example(PROJECT_ID, LOCATION) - text = text.lower() - assert len(text) > 0 - assert any([_ in text for _ in ("hi", "hello", "greeting")]) +def test_retrieval_query(test_corpus): + response = rag.retrieval_query(PROJECT_ID, LOCATION, test_corpus.name, "test query") + assert len(response.results) > 0 -@pytest.mark.skip( - "Unable to test Google Search grounding due to allowlist restrictions." -) -def test_gemini_grounding_example() -> None: - data_store_id = "test-search-engine_1689960780551" - data_store_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/collections/default_collection/dataStores/{data_store_id}" - response = gemini_grounding_example.generate_text_with_grounding( - PROJECT_ID, LOCATION, data_store_path=data_store_path +def test_generate_content_with_rag(): + corpus, response = rag.generate_content_with_rag( + PROJECT_ID, LOCATION, "test_corpus_generate_content", [test_file] ) assert response + rag.delete_corpus(PROJECT_ID, LOCATION, corpus.name) From acbc2876fbbd8768429142b0e50a1492546a0917 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 17 Apr 2024 10:27:48 -0500 Subject: [PATCH 08/19] Change import for rag --- generative_ai/rag.py | 4 ++-- generative_ai/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index f7b7ef4a84d..7ccdde67d18 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -27,7 +27,7 @@ from typing import List, Union, Optional -from google.cloud.aiplatform.private_preview.vertex_rag import rag +from vertexai.preview import rag import vertexai # [END generativeaionvertexai_rag_create_corpus] @@ -241,4 +241,4 @@ def generate_content_with_rag( print(response.text) # [END generativeaionvertexai_rag_generate_content] - return response + return corpus, response diff --git a/generative_ai/requirements.txt b/generative_ai/requirements.txt index 04539f4bc11..13a6bd5075f 100644 --- a/generative_ai/requirements.txt +++ b/generative_ai/requirements.txt @@ -2,6 +2,6 @@ pandas==1.3.5; python_version == '3.7' pandas==2.0.1; python_version > '3.7' pillow==9.5.0; python_version < '3.8' pillow==10.3.0; python_version >= '3.8' -google-cloud-aiplatform[pipelines]==1.47.0 +google-cloud-aiplatform[pipelines]==1.48.0 google-auth==2.17.3 anthropic[vertex]==0.21.3 From f39d9314157d56d076a41a0bd05bf382b68c7cd8 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 17 Apr 2024 10:55:35 -0500 Subject: [PATCH 09/19] Add lint ignore --- generative_ai/rag.py | 3 ++- generative_ai/rag_test.py | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 7ccdde67d18..2f21486daaf 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +# flake8: noqa ANN001, ANN201 # [START generativeaionvertexai_rag_create_corpus] # [START generativeaionvertexai_rag_get_corpus] @@ -25,7 +26,7 @@ # [START generativeaionvertexai_rag_retrieval_query] # [START generativeaionvertexai_rag_generate_content] -from typing import List, Union, Optional +from typing import List, Optional, Union from vertexai.preview import rag import vertexai diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py index 4cd8ba1e2ff..1adb7439c12 100644 --- a/generative_ai/rag_test.py +++ b/generative_ai/rag_test.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +# flake8: noqa ANN001, ANN201 + import os import pytest From d2656e4c6f6a51bfe19fb149ee4636cc297c44e2 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 17 Apr 2024 13:53:54 -0500 Subject: [PATCH 10/19] Addressed comments and changed structure to follow new guidelines --- generative_ai/rag.py | 257 ++++++++++++++++++++++++++------------ generative_ai/rag_test.py | 44 ++++--- 2 files changed, 203 insertions(+), 98 deletions(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 2f21486daaf..642ebcbdc1a 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -14,19 +14,21 @@ # flake8: noqa ANN001, ANN201 +from typing import List, Optional + # [START generativeaionvertexai_rag_create_corpus] # [START generativeaionvertexai_rag_get_corpus] # [START generativeaionvertexai_rag_list_corpora] # [START generativeaionvertexai_rag_upload_file] # [START generativeaionvertexai_rag_import_files] +# [START generativeaionvertexai_rag_import_files_async] # [START generativeaionvertexai_rag_get_file] # [START generativeaionvertexai_rag_list_files] # [START generativeaionvertexai_rag_delete_file] # [START generativeaionvertexai_rag_delete_corpus] # [START generativeaionvertexai_rag_retrieval_query] # [START generativeaionvertexai_rag_generate_content] - -from typing import List, Optional, Union +# [START generativeaionvertexai_rag_quickstart] from vertexai.preview import rag import vertexai @@ -36,23 +38,29 @@ # [END generativeaionvertexai_rag_list_corpora] # [END generativeaionvertexai_rag_upload_file] # [END generativeaionvertexai_rag_import_files] +# [END generativeaionvertexai_rag_import_files_async] # [END generativeaionvertexai_rag_get_file] # [END generativeaionvertexai_rag_list_files] # [END generativeaionvertexai_rag_delete_file] # [END generativeaionvertexai_rag_delete_corpus] # [END generativeaionvertexai_rag_retrieval_query] # [END generativeaionvertexai_rag_generate_content] +# [END generativeaionvertexai_rag_quickstart] -# [START generativeaionvertexai_rag_create_corpus] def create_corpus( project_id: str, - location: str, display_name: Optional[str] = None, description: Optional[str] = None, ): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) + # [START generativeaionvertexai_rag_create_corpus] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # display_name = "test_corpus" + # description = "Corpus Description" + + vertexai.init(project=project_id, location="us-central1") corpus = rag.create_corpus(display_name=display_name, description=description) print(corpus) @@ -60,10 +68,14 @@ def create_corpus( return corpus -# [START generativeaionvertexai_rag_get_corpus] -def get_corpus(project_id: str, location: str, corpus_name: str): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) +def get_corpus(project_id: str, corpus_name: str): + # [START generativeaionvertexai_rag_get_corpus] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + + vertexai.init(project=project_id, location="us-central1") corpus = rag.get_corpus(name=corpus_name) print(corpus) @@ -71,10 +83,13 @@ def get_corpus(project_id: str, location: str, corpus_name: str): return corpus -# [START generativeaionvertexai_rag_list_corpora] -def list_corpora(project_id: str, location: str): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) +def list_corpora(project_id: str): + # [START generativeaionvertexai_rag_list_corpora] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + + vertexai.init(project=project_id, location="us-central1") corpora = rag.list_corpora() print(corpora) @@ -82,17 +97,22 @@ def list_corpora(project_id: str, location: str): return corpora -# [START generativeaionvertexai_rag_upload_file] def upload_file( project_id: str, - location: str, corpus_name: str, path: str, display_name: Optional[str] = None, description: Optional[str] = None, ): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) + # [START generativeaionvertexai_rag_upload_file] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + # display_name = "file_display_name" + # description = "file description" + + vertexai.init(project=project_id, location="us-central1") rag_file = rag.upload_file( corpus_name=corpus_name, @@ -105,33 +125,68 @@ def upload_file( return rag_file -# [START generativeaionvertexai_rag_import_files] def import_files( project_id: str, - location: str, corpus_name: str, - path: Union[str, List[str]], - chunk_size: Optional[int] = 1024, - chunk_overlap: Optional[int] = 200, + paths: List[str], ): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) + # [START generativeaionvertexai_rag_import_files] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] # Supports Google Cloud Storage and Google Drive Links + + vertexai.init(project=project_id, location="us-central1") response = rag.import_files( corpus_name=corpus_name, - path=path, - chunk_size=chunk_size, - chunk_overlap=chunk_overlap, + paths=paths, + chunk_size=512, # Optional + chunk_overlap=100, # Optional ) print(f"Imported {response.imported_rag_files_count} files.") # [END generativeaionvertexai_rag_import_files] return response -# [START generativeaionvertexai_rag_get_file] -def get_file(project_id: str, location: str, file_name: str): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) +async def import_files_async( + project_id: str, + corpus_name: str, + paths: List[str], +): + # [START generativeaionvertexai_rag_import_files_async] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + + # Supports Google Cloud Storage and Google Drive Links + # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] + + vertexai.init(project=project_id, location="us-central1") + + response = await rag.import_files_async( + corpus_name=corpus_name, + paths=paths, + chunk_size=512, # Optional + chunk_overlap=100, # Optional + ) + + await response.result() + print(f"Imported {response.imported_rag_files_count} files.") + # [END generativeaionvertexai_rag_import_files_async] + return response + + +def get_file(project_id: str, file_name: str): + # [START generativeaionvertexai_rag_get_file] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" + + vertexai.init(project=project_id, location="us-central1") rag_file = rag.get_file(name=file_name) print(rag_file) @@ -140,10 +195,14 @@ def get_file(project_id: str, location: str, file_name: str): return rag_file -# [START generativeaionvertexai_rag_list_files] -def list_files(project_id: str, location: str, corpus_name: str): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) +def list_files(project_id: str, corpus_name: str): + # [START generativeaionvertexai_rag_list_files] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + + vertexai.init(project=project_id, location="us-central1") files = rag.list_files(corpus_name=corpus_name) for file in files: @@ -153,86 +212,123 @@ def list_files(project_id: str, location: str, corpus_name: str): return files -# [START generativeaionvertexai_rag_delete_file] -def delete_file(project_id: str, location: str, file_name: str) -> None: - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) +def delete_file(project_id: str, file_name: str) -> None: + # [START generativeaionvertexai_rag_delete_file] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" + + vertexai.init(project=project_id, location="us-central1") rag.delete_file(name=file_name) print(f"File {file_name} deleted.") + # [END generativeaionvertexai_rag_delete_file] -# [END generativeaionvertexai_rag_delete_file] +def delete_corpus(project_id: str, corpus_name: str) -> None: + # [START generativeaionvertexai_rag_delete_corpus] + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" -# [START generativeaionvertexai_rag_delete_corpus] -def delete_corpus(project_id: str, location: str, corpus_name: str) -> None: - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) + vertexai.init(project=project_id, location="us-central1") rag.delete_corpus(name=corpus_name) print(f"Corpus {corpus_name} deleted.") + # [END generativeaionvertexai_rag_delete_corpus] -# [END generativeaionvertexai_rag_delete_corpus] - - -# [START generativeaionvertexai_rag_retrieval_query] def retrieval_query( project_id: str, - location: str, - rag_corpus: str, + rag_corpora: List[str], text: str, - similarity_top_k: Optional[int] = 10, ): - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) + # [START generativeaionvertexai_rag_retrieval_query] + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # rag_corpora = ["9183965540115283968"] # Only one corpus is supported at this time + # text = "Your Query" + + vertexai.init(project=project_id, location="us-central1") response = rag.retrieval_query( - rag_corpora=rag_corpus, text=text, similarity_top_k=similarity_top_k + rag_corpora=rag_corpora, + text=text, + similarity_top_k=10, # Optional ) print(response) + # [END generativeaionvertexai_rag_retrieval_query] + return response -# [END generativeaionvertexai_rag_retrieval_query] +def generate_content_with_rag( + project_id: str, + rag_corpora: List[str], +): + # [START generativeaionvertexai_rag_generate_content] + from vertexai.preview.generative_models import GenerativeModel, Tool -# [START generativeaionvertexai_rag_generate_content] -from vertexai.preview.generative_models import GenerativeModel, Tool + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # rag_corpora = ["9183965540115283968"] # Only one corpus is supported at this time + vertexai.init(project=project_id, location="us-central1") -def generate_content_with_rag( - project_id: str, location: str, corpus_display_name: str, paths: List[str] + rag_retrieval_tool = Tool.from_retrieval( + retrieval=rag.Retrieval( + source=rag.VertexRagStore( + rag_corpora=rag_corpora, + similarity_top_k=3, # Optional + vector_distance_threshold=0.3, # Optional + ), + ) + ) + + rag_model = GenerativeModel("gemini-1.0-pro", tools=[rag_retrieval_tool]) + response = rag_model.generate_content("Why is the sky blue?") + print(response.text) + # [END generativeaionvertexai_rag_generate_content] + + return response + + +def quickstart( + project_id: str, + display_name: str, + paths: List[str], ): - """ - Creates and loads a RAG Corpus and generates text using Gemini. - - Args: - project_id (str): The project ID for Vertex AI. - location (str): The location for Vertex AI resources. - corpus_display_name (str): The display name for the corpus to be created. - paths (List[str]): List of file paths to import into the corpus. - Supports GCS URIs `gs://my-bucket/my_file` and Google Drive URLs - `https://drive.google.com/file/123` - """ - # Initialize Vertex AI - vertexai.init(project=project_id, location=location) - - corpus = rag.create_corpus(display_name=corpus_display_name) + # [START generativeaionvertexai_rag_quickstart] + from vertexai.preview.generative_models import GenerativeModel, Tool + + # Create a RAG Corpus, Import Files, and Generate a response + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # display_name = "test_corpus" + # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] # Supports Google Cloud Storage and Google Drive Links + + vertexai.init(project=project_id, location="us-central1") + + corpus = rag.create_corpus(display_name=display_name) response = rag.import_files( corpus.name, paths, - chunk_size=1024, - chunk_overlap=200, + chunk_size=512, # Optional + chunk_overlap=100, # Optional ) rag_retrieval_tool = Tool.from_retrieval( retrieval=rag.Retrieval( source=rag.VertexRagStore( - rag_corpora=corpus.name, # Only 1 corpus is allowed. - similarity_top_k=3, + rag_corpora=[corpus.name], + similarity_top_k=3, # Optional + vector_distance_threshold=0.4, # Optional ), ) ) @@ -240,6 +336,5 @@ def generate_content_with_rag( rag_model = GenerativeModel("gemini-1.0-pro", tools=[rag_retrieval_tool]) response = rag_model.generate_content("Why is the sky blue?") print(response.text) - # [END generativeaionvertexai_rag_generate_content] - + # [END generativeaionvertexai_rag_quickstart] return corpus, response diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py index 1adb7439c12..26aa35e78a4 100644 --- a/generative_ai/rag_test.py +++ b/generative_ai/rag_test.py @@ -40,63 +40,73 @@ def test_file(): @pytest.fixture(scope="module") def test_corpus(): """Creates a corpus for testing and deletes it after tests are complete.""" - corpus = rag.create_corpus(PROJECT_ID, LOCATION, CORPUS_NAME) + corpus = rag.create_corpus(PROJECT_ID, CORPUS_NAME) yield corpus - rag.delete_corpus(PROJECT_ID, LOCATION, corpus.name) + rag.delete_corpus(PROJECT_ID, corpus.name) @pytest.fixture def uploaded_file(test_corpus): """Uploads a file to the corpus and deletes it after the test.""" - rag_file = rag.upload_file(PROJECT_ID, LOCATION, test_corpus.name, test_file) + rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) yield rag_file - rag.delete_file(PROJECT_ID, LOCATION, rag_file.name) + rag.delete_file(PROJECT_ID, rag_file.name) def test_create_corpus(): - corpus = rag.create_corpus(PROJECT_ID, LOCATION, "test_create_corpus") + corpus = rag.create_corpus(PROJECT_ID, "test_create_corpus") assert corpus.display_name == "test_create_corpus" - rag.delete_corpus(PROJECT_ID, LOCATION, corpus.name) + rag.delete_corpus(PROJECT_ID, corpus.name) def test_get_corpus(test_corpus): - retrieved_corpus = rag.get_corpus(PROJECT_ID, LOCATION, test_corpus.name) + retrieved_corpus = rag.get_corpus(PROJECT_ID, test_corpus.name) assert retrieved_corpus.name == test_corpus.name def test_list_corpora(): - corpora = rag.list_corpora(PROJECT_ID, LOCATION) + corpora = rag.list_corpora(PROJECT_ID) assert any(c.name == test_corpus.name for c in corpora) def test_upload_file(test_corpus): - rag_file = rag.upload_file(PROJECT_ID, LOCATION, test_corpus.name, test_file) + rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) assert rag_file def test_import_files(test_corpus): - response = rag.import_files(PROJECT_ID, LOCATION, test_corpus.name, test_file) + response = rag.import_files(PROJECT_ID, test_corpus.name, test_file) + assert response.imported_rag_files_count > 0 + + +def test_import_files_async(test_corpus): + response = rag.import_files_async(PROJECT_ID, test_corpus.name, test_file) assert response.imported_rag_files_count > 0 def test_get_file(uploaded_file): - retrieved_file = rag.get_file(PROJECT_ID, LOCATION, uploaded_file.name) + retrieved_file = rag.get_file(PROJECT_ID, uploaded_file.name) assert retrieved_file.name == uploaded_file.name def test_list_files(test_corpus, uploaded_file): - files = rag.list_files(PROJECT_ID, LOCATION, test_corpus.name) + files = rag.list_files(PROJECT_ID, test_corpus.name) assert any(f.name == uploaded_file.name for f in files) def test_retrieval_query(test_corpus): - response = rag.retrieval_query(PROJECT_ID, LOCATION, test_corpus.name, "test query") + response = rag.retrieval_query(PROJECT_ID, test_corpus.name, "test query") assert len(response.results) > 0 -def test_generate_content_with_rag(): - corpus, response = rag.generate_content_with_rag( - PROJECT_ID, LOCATION, "test_corpus_generate_content", [test_file] +def test_generate_content_with_rag(test_corpus): + response = rag.generate_content_with_rag(PROJECT_ID, [test_corpus.name]) + assert response + + +def test_quickstart(): + corpus, response = rag.quickstart( + PROJECT_ID, "test_corpus_generate_content", [test_file] ) assert response - rag.delete_corpus(PROJECT_ID, LOCATION, corpus.name) + rag.delete_corpus(PROJECT_ID, corpus.name) From bb19ce0642ec71adbddcd29dee482c32fe30fd57 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Wed, 17 Apr 2024 16:36:29 -0500 Subject: [PATCH 11/19] Attempts at fixing tests --- generative_ai/rag_test.py | 48 ++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py index 26aa35e78a4..b3f4e2a4a40 100644 --- a/generative_ai/rag_test.py +++ b/generative_ai/rag_test.py @@ -15,38 +15,37 @@ # flake8: noqa ANN001, ANN201 import os +from pathlib import Path import pytest -import vertexai - import rag +import vertexai PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") LOCATION = "us-central1" -CORPUS_NAME = "test_corpus" -FILE_PATH = "./hello_world.txt" # Replace with a valid file path +GCS_FILE = "gs://cloud-samples-data/generative-ai/pdf/earnings_statement.pdf" vertexai.init(project=PROJECT_ID, location=LOCATION) -@pytest.fixture(scope="module") -def test_file(): - file_path = "./hello.txt" - file_path.write_text("Hello World") - yield file_path +@pytest.fixture(scope="module", name="test_file") +def test_file_fixture(): + file_path = Path("./hello.txt") + file_path.write_text("Hello World", encoding="utf-8") + yield file_path.absolute().as_posix() file_path.unlink() # Delete the file after tests -@pytest.fixture(scope="module") -def test_corpus(): +@pytest.fixture(scope="module", name="test_corpus") +def test_corpus_fixture(): """Creates a corpus for testing and deletes it after tests are complete.""" - corpus = rag.create_corpus(PROJECT_ID, CORPUS_NAME) + corpus = rag.create_corpus(PROJECT_ID, "test_corpus") yield corpus rag.delete_corpus(PROJECT_ID, corpus.name) -@pytest.fixture -def uploaded_file(test_corpus): +@pytest.fixture(scope="module", name="uploaded_file") +def uploaded_file_fixture(test_corpus, test_file): """Uploads a file to the corpus and deletes it after the test.""" rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) yield rag_file @@ -64,24 +63,24 @@ def test_get_corpus(test_corpus): assert retrieved_corpus.name == test_corpus.name -def test_list_corpora(): +def test_list_corpora(test_corpus): corpora = rag.list_corpora(PROJECT_ID) assert any(c.name == test_corpus.name for c in corpora) -def test_upload_file(test_corpus): +def test_upload_file(test_corpus, test_file): rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) assert rag_file def test_import_files(test_corpus): - response = rag.import_files(PROJECT_ID, test_corpus.name, test_file) + response = rag.import_files(PROJECT_ID, test_corpus.name, [GCS_FILE]) assert response.imported_rag_files_count > 0 -def test_import_files_async(test_corpus): - response = rag.import_files_async(PROJECT_ID, test_corpus.name, test_file) - assert response.imported_rag_files_count > 0 +# def test_import_files_async(test_corpus): +# response = rag.import_files_async(PROJECT_ID, test_corpus.name, [GCS_FILE]) +# assert response def test_get_file(uploaded_file): @@ -95,18 +94,21 @@ def test_list_files(test_corpus, uploaded_file): def test_retrieval_query(test_corpus): - response = rag.retrieval_query(PROJECT_ID, test_corpus.name, "test query") - assert len(response.results) > 0 + response = rag.retrieval_query(PROJECT_ID, [test_corpus.name], "test query") + assert response + assert response.contexts def test_generate_content_with_rag(test_corpus): response = rag.generate_content_with_rag(PROJECT_ID, [test_corpus.name]) assert response + assert response.text def test_quickstart(): corpus, response = rag.quickstart( - PROJECT_ID, "test_corpus_generate_content", [test_file] + PROJECT_ID, "test_corpus_generate_content", [GCS_FILE] ) assert response + assert response.text rag.delete_corpus(PROJECT_ID, corpus.name) From 1ead635daaae971e283dac805139a0de978b307a Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 19 Apr 2024 13:03:34 -0500 Subject: [PATCH 12/19] Add fixes for tests - One change is due to b/335864815 --- generative_ai/rag_test.py | 14 +++++++------- generative_ai/requirements-test.txt | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py index b3f4e2a4a40..96ff58818bf 100644 --- a/generative_ai/rag_test.py +++ b/generative_ai/rag_test.py @@ -48,8 +48,8 @@ def test_corpus_fixture(): def uploaded_file_fixture(test_corpus, test_file): """Uploads a file to the corpus and deletes it after the test.""" rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) + rag_file.name = rag_file.name.replace("RagFile", "ragFiles") yield rag_file - rag.delete_file(PROJECT_ID, rag_file.name) def test_create_corpus(): @@ -71,6 +71,7 @@ def test_list_corpora(test_corpus): def test_upload_file(test_corpus, test_file): rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) assert rag_file + rag.delete_file(PROJECT_ID, rag_file.name) def test_import_files(test_corpus): @@ -78,9 +79,10 @@ def test_import_files(test_corpus): assert response.imported_rag_files_count > 0 -# def test_import_files_async(test_corpus): -# response = rag.import_files_async(PROJECT_ID, test_corpus.name, [GCS_FILE]) -# assert response +@pytest.mark.asyncio +async def test_import_files_async(test_corpus): + response = await rag.import_files_async(PROJECT_ID, test_corpus.name, [GCS_FILE]) + assert response.imported_rag_files_count > 0 def test_get_file(uploaded_file): @@ -106,9 +108,7 @@ def test_generate_content_with_rag(test_corpus): def test_quickstart(): - corpus, response = rag.quickstart( - PROJECT_ID, "test_corpus_generate_content", [GCS_FILE] - ) + corpus, response = rag.quickstart(PROJECT_ID, "test_corpus_quickstart", [GCS_FILE]) assert response assert response.text rag.delete_corpus(PROJECT_ID, corpus.name) diff --git a/generative_ai/requirements-test.txt b/generative_ai/requirements-test.txt index 27f70f46f2a..6ebf6cef00c 100644 --- a/generative_ai/requirements-test.txt +++ b/generative_ai/requirements-test.txt @@ -1,3 +1,4 @@ backoff==2.2.1 google-api-core==2.17.1 pytest==6.2.4 +pytest-asyncio==0.23.6 From d0b6b2f0fd00c9ca582cdd634f5500aeda64c103 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 19 Apr 2024 13:08:43 -0500 Subject: [PATCH 13/19] Fixes to Async import function --- generative_ai/rag.py | 6 +++--- generative_ai/rag_test.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 642ebcbdc1a..20c204f281a 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -173,10 +173,10 @@ async def import_files_async( chunk_overlap=100, # Optional ) - await response.result() - print(f"Imported {response.imported_rag_files_count} files.") + result = await response.result() + print(f"Imported {result.imported_rag_files_count} files.") # [END generativeaionvertexai_rag_import_files_async] - return response + return result def get_file(project_id: str, file_name: str): diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py index 96ff58818bf..7ace6a1be69 100644 --- a/generative_ai/rag_test.py +++ b/generative_ai/rag_test.py @@ -81,8 +81,8 @@ def test_import_files(test_corpus): @pytest.mark.asyncio async def test_import_files_async(test_corpus): - response = await rag.import_files_async(PROJECT_ID, test_corpus.name, [GCS_FILE]) - assert response.imported_rag_files_count > 0 + result = await rag.import_files_async(PROJECT_ID, test_corpus.name, [GCS_FILE]) + assert result.imported_rag_files_count > 0 def test_get_file(uploaded_file): From 2e626298caf6861216f6965847086dade9c74894 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 19 Apr 2024 13:47:01 -0500 Subject: [PATCH 14/19] Add comment to init vertex ai once per session --- generative_ai/rag.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 20c204f281a..2def6f80855 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -60,6 +60,7 @@ def create_corpus( # display_name = "test_corpus" # description = "Corpus Description" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") corpus = rag.create_corpus(display_name=display_name, description=description) @@ -75,6 +76,7 @@ def get_corpus(project_id: str, corpus_name: str): # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") corpus = rag.get_corpus(name=corpus_name) @@ -89,6 +91,7 @@ def list_corpora(project_id: str): # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") corpora = rag.list_corpora() @@ -112,6 +115,7 @@ def upload_file( # display_name = "file_display_name" # description = "file description" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") rag_file = rag.upload_file( @@ -137,6 +141,7 @@ def import_files( # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] # Supports Google Cloud Storage and Google Drive Links + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") response = rag.import_files( @@ -164,6 +169,7 @@ async def import_files_async( # Supports Google Cloud Storage and Google Drive Links # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") response = await rag.import_files_async( @@ -186,6 +192,7 @@ def get_file(project_id: str, file_name: str): # project_id = "PROJECT_ID" # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") rag_file = rag.get_file(name=file_name) @@ -202,6 +209,7 @@ def list_files(project_id: str, corpus_name: str): # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") files = rag.list_files(corpus_name=corpus_name) @@ -219,6 +227,7 @@ def delete_file(project_id: str, file_name: str) -> None: # project_id = "PROJECT_ID" # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") rag.delete_file(name=file_name) @@ -233,6 +242,7 @@ def delete_corpus(project_id: str, corpus_name: str) -> None: # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") rag.delete_corpus(name=corpus_name) @@ -252,6 +262,7 @@ def retrieval_query( # rag_corpora = ["9183965540115283968"] # Only one corpus is supported at this time # text = "Your Query" + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") response = rag.retrieval_query( @@ -277,6 +288,7 @@ def generate_content_with_rag( # project_id = "PROJECT_ID" # rag_corpora = ["9183965540115283968"] # Only one corpus is supported at this time + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") rag_retrieval_tool = Tool.from_retrieval( @@ -312,6 +324,7 @@ def quickstart( # display_name = "test_corpus" # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] # Supports Google Cloud Storage and Google Drive Links + # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") corpus = rag.create_corpus(display_name=display_name) From 665bafb9e9ee1af6aca7603c5988ded779a2c6d4 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 19 Apr 2024 13:56:30 -0500 Subject: [PATCH 15/19] Update quickstart --- generative_ai/rag.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 2def6f80855..37ca1971df0 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -327,27 +327,41 @@ def quickstart( # Initialize Vertex AI API once per session vertexai.init(project=project_id, location="us-central1") - corpus = rag.create_corpus(display_name=display_name) + # Create RagCorpus + rag_corpus = rag.create_corpus(display_name=display_name) + # Import Files to the RagCorpus response = rag.import_files( - corpus.name, + rag_corpus.name, paths, chunk_size=512, # Optional chunk_overlap=100, # Optional ) + # Direct context retrieval + response = rag.retrieval_query( + rag_corpora=[rag_corpus.name], + text="What is RAG and why it is helpful?", + similarity_top_k=10, + ) + print(response) + + # Enhance generation + # Create a RAG retrieval tool rag_retrieval_tool = Tool.from_retrieval( retrieval=rag.Retrieval( source=rag.VertexRagStore( - rag_corpora=[corpus.name], + rag_corpora=[rag_corpus.name], # Currently only 1 corpus is allowed. similarity_top_k=3, # Optional vector_distance_threshold=0.4, # Optional ), ) ) - + # Create a gemini-pro model instance rag_model = GenerativeModel("gemini-1.0-pro", tools=[rag_retrieval_tool]) - response = rag_model.generate_content("Why is the sky blue?") + + # Generate response + response = rag_model.generate_content("What is RAG and why it is helpful?") print(response.text) # [END generativeaionvertexai_rag_quickstart] - return corpus, response + return rag_corpus, response From c1203b51219cdc68049dbcce22948ed666caf8ac Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 19 Apr 2024 14:10:37 -0500 Subject: [PATCH 16/19] Update pytest --- generative_ai/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generative_ai/requirements-test.txt b/generative_ai/requirements-test.txt index 6ebf6cef00c..fceefa0d35e 100644 --- a/generative_ai/requirements-test.txt +++ b/generative_ai/requirements-test.txt @@ -1,4 +1,4 @@ backoff==2.2.1 google-api-core==2.17.1 -pytest==6.2.4 +pytest==8.1.1 pytest-asyncio==0.23.6 From 427cb41da20e7340b4387cc56254a95b81d21983 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Fri, 19 Apr 2024 15:33:10 -0500 Subject: [PATCH 17/19] Move general imports to fit with indentation --- generative_ai/rag.py | 68 ++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/generative_ai/rag.py b/generative_ai/rag.py index 37ca1971df0..20a66c3890a 100644 --- a/generative_ai/rag.py +++ b/generative_ai/rag.py @@ -16,37 +16,6 @@ from typing import List, Optional -# [START generativeaionvertexai_rag_create_corpus] -# [START generativeaionvertexai_rag_get_corpus] -# [START generativeaionvertexai_rag_list_corpora] -# [START generativeaionvertexai_rag_upload_file] -# [START generativeaionvertexai_rag_import_files] -# [START generativeaionvertexai_rag_import_files_async] -# [START generativeaionvertexai_rag_get_file] -# [START generativeaionvertexai_rag_list_files] -# [START generativeaionvertexai_rag_delete_file] -# [START generativeaionvertexai_rag_delete_corpus] -# [START generativeaionvertexai_rag_retrieval_query] -# [START generativeaionvertexai_rag_generate_content] -# [START generativeaionvertexai_rag_quickstart] - -from vertexai.preview import rag -import vertexai - -# [END generativeaionvertexai_rag_create_corpus] -# [END generativeaionvertexai_rag_get_corpus] -# [END generativeaionvertexai_rag_list_corpora] -# [END generativeaionvertexai_rag_upload_file] -# [END generativeaionvertexai_rag_import_files] -# [END generativeaionvertexai_rag_import_files_async] -# [END generativeaionvertexai_rag_get_file] -# [END generativeaionvertexai_rag_list_files] -# [END generativeaionvertexai_rag_delete_file] -# [END generativeaionvertexai_rag_delete_corpus] -# [END generativeaionvertexai_rag_retrieval_query] -# [END generativeaionvertexai_rag_generate_content] -# [END generativeaionvertexai_rag_quickstart] - def create_corpus( project_id: str, @@ -55,6 +24,9 @@ def create_corpus( ): # [START generativeaionvertexai_rag_create_corpus] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # display_name = "test_corpus" @@ -72,6 +44,9 @@ def create_corpus( def get_corpus(project_id: str, corpus_name: str): # [START generativeaionvertexai_rag_get_corpus] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" @@ -88,6 +63,9 @@ def get_corpus(project_id: str, corpus_name: str): def list_corpora(project_id: str): # [START generativeaionvertexai_rag_list_corpora] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" @@ -109,6 +87,9 @@ def upload_file( ): # [START generativeaionvertexai_rag_upload_file] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" @@ -136,6 +117,9 @@ def import_files( ): # [START generativeaionvertexai_rag_import_files] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" @@ -162,6 +146,9 @@ async def import_files_async( ): # [START generativeaionvertexai_rag_import_files_async] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" @@ -188,6 +175,9 @@ async def import_files_async( def get_file(project_id: str, file_name: str): # [START generativeaionvertexai_rag_get_file] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" @@ -205,6 +195,9 @@ def get_file(project_id: str, file_name: str): def list_files(project_id: str, corpus_name: str): # [START generativeaionvertexai_rag_list_files] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" @@ -223,6 +216,9 @@ def list_files(project_id: str, corpus_name: str): def delete_file(project_id: str, file_name: str) -> None: # [START generativeaionvertexai_rag_delete_file] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" @@ -238,6 +234,9 @@ def delete_file(project_id: str, file_name: str) -> None: def delete_corpus(project_id: str, corpus_name: str) -> None: # [START generativeaionvertexai_rag_delete_corpus] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" @@ -257,6 +256,9 @@ def retrieval_query( ): # [START generativeaionvertexai_rag_retrieval_query] + from vertexai.preview import rag + import vertexai + # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" # rag_corpora = ["9183965540115283968"] # Only one corpus is supported at this time @@ -282,7 +284,9 @@ def generate_content_with_rag( ): # [START generativeaionvertexai_rag_generate_content] + from vertexai.preview import rag from vertexai.preview.generative_models import GenerativeModel, Tool + import vertexai # TODO(developer): Update and un-comment below lines # project_id = "PROJECT_ID" @@ -315,7 +319,9 @@ def quickstart( paths: List[str], ): # [START generativeaionvertexai_rag_quickstart] + from vertexai.preview import rag from vertexai.preview.generative_models import GenerativeModel, Tool + import vertexai # Create a RAG Corpus, Import Files, and Generate a response From a3d631a9914bfba587cabba9b2d4efca44ac32f5 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 22 Apr 2024 14:59:34 +0000 Subject: [PATCH 18/19] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- generative_ai/imagen/edit_image_inpainting_insert_mask.py | 1 - generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py | 1 - generative_ai/imagen/edit_image_inpainting_remove_mask.py | 1 - generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py | 1 - generative_ai/imagen/edit_image_mask.py | 1 - generative_ai/imagen/edit_image_mask_free.py | 1 - generative_ai/imagen/edit_image_outpainting_mask.py | 1 - generative_ai/imagen/edit_image_product_image.py | 1 - generative_ai/imagen/get_short_form_image_captions.py | 1 - generative_ai/imagen/get_short_form_image_responses.py | 1 - 10 files changed, 10 deletions(-) diff --git a/generative_ai/imagen/edit_image_inpainting_insert_mask.py b/generative_ai/imagen/edit_image_inpainting_insert_mask.py index 72902698887..d36fd6fd5fe 100644 --- a/generative_ai/imagen/edit_image_inpainting_insert_mask.py +++ b/generative_ai/imagen/edit_image_inpainting_insert_mask.py @@ -27,7 +27,6 @@ def edit_image_inpainting_insert_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_insert_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py b/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py index a09d796297d..7922a929c4b 100644 --- a/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py +++ b/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py @@ -28,7 +28,6 @@ def edit_image_inpainting_insert_mask_mode( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_insert_mask_mode] import vertexai diff --git a/generative_ai/imagen/edit_image_inpainting_remove_mask.py b/generative_ai/imagen/edit_image_inpainting_remove_mask.py index 6c7074b49ff..b6fdf4e4d31 100644 --- a/generative_ai/imagen/edit_image_inpainting_remove_mask.py +++ b/generative_ai/imagen/edit_image_inpainting_remove_mask.py @@ -26,7 +26,6 @@ def edit_image_inpainting_remove_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_remove_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py b/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py index e525e9fd962..d7e3e40c7d9 100644 --- a/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py +++ b/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py @@ -28,7 +28,6 @@ def edit_image_inpainting_remove_mask_mode( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_remove_mask_mode] import vertexai diff --git a/generative_ai/imagen/edit_image_mask.py b/generative_ai/imagen/edit_image_mask.py index 4437bc9e838..9a33ca1a47a 100644 --- a/generative_ai/imagen/edit_image_mask.py +++ b/generative_ai/imagen/edit_image_mask.py @@ -26,7 +26,6 @@ def edit_image_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_mask_free.py b/generative_ai/imagen/edit_image_mask_free.py index fe3f199d9de..8193c4dcf8d 100644 --- a/generative_ai/imagen/edit_image_mask_free.py +++ b/generative_ai/imagen/edit_image_mask_free.py @@ -22,7 +22,6 @@ def edit_image_mask_free( project_id: str, input_file: str, output_file: str, prompt: str ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_mask_free] import vertexai diff --git a/generative_ai/imagen/edit_image_outpainting_mask.py b/generative_ai/imagen/edit_image_outpainting_mask.py index a283689b410..c12dd33f3ef 100644 --- a/generative_ai/imagen/edit_image_outpainting_mask.py +++ b/generative_ai/imagen/edit_image_outpainting_mask.py @@ -27,7 +27,6 @@ def edit_image_outpainting_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_outpainting_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_product_image.py b/generative_ai/imagen/edit_image_product_image.py index f79ba769609..904a02742a2 100644 --- a/generative_ai/imagen/edit_image_product_image.py +++ b/generative_ai/imagen/edit_image_product_image.py @@ -25,7 +25,6 @@ def edit_image_product_image( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_product_image] import vertexai diff --git a/generative_ai/imagen/get_short_form_image_captions.py b/generative_ai/imagen/get_short_form_image_captions.py index 2864ff860d8..9112ececf2a 100644 --- a/generative_ai/imagen/get_short_form_image_captions.py +++ b/generative_ai/imagen/get_short_form_image_captions.py @@ -17,7 +17,6 @@ def get_short_form_image_captions(project_id: str, input_file: str) -> list: - # [START generativeaionvertexai_imagen_get_short_form_image_captions] import vertexai diff --git a/generative_ai/imagen/get_short_form_image_responses.py b/generative_ai/imagen/get_short_form_image_responses.py index 6608fde9ff6..cd2f011b4d6 100644 --- a/generative_ai/imagen/get_short_form_image_responses.py +++ b/generative_ai/imagen/get_short_form_image_responses.py @@ -20,7 +20,6 @@ def get_short_form_image_responses( project_id: str, input_file: str, question: str ) -> list: - # [START generativeaionvertexai_imagen_get_short_form_image_responses] import vertexai From 6e5d5db053f52de498775d710b825f629500af71 Mon Sep 17 00:00:00 2001 From: Holt Skinner Date: Mon, 22 Apr 2024 10:57:27 -0500 Subject: [PATCH 19/19] Add TODOs and Skip Tests --- generative_ai/rag_test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py index 7ace6a1be69..a710dc11e93 100644 --- a/generative_ai/rag_test.py +++ b/generative_ai/rag_test.py @@ -21,6 +21,9 @@ import rag import vertexai +# TODO: Remove once SDK is updated +pytest.skip(allow_module_level=True) + PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") LOCATION = "us-central1" GCS_FILE = "gs://cloud-samples-data/generative-ai/pdf/earnings_statement.pdf" @@ -48,6 +51,7 @@ def test_corpus_fixture(): def uploaded_file_fixture(test_corpus, test_file): """Uploads a file to the corpus and deletes it after the test.""" rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) + # TODO: Remove when upload_file service response changes to the correct format. rag_file.name = rag_file.name.replace("RagFile", "ragFiles") yield rag_file @@ -71,6 +75,8 @@ def test_list_corpora(test_corpus): def test_upload_file(test_corpus, test_file): rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) assert rag_file + # TODO: Remove when upload_file service response changes to the correct format. + rag_file.name = rag_file.name.replace("RagFile", "ragFiles") rag.delete_file(PROJECT_ID, rag_file.name) @@ -96,7 +102,7 @@ def test_list_files(test_corpus, uploaded_file): def test_retrieval_query(test_corpus): - response = rag.retrieval_query(PROJECT_ID, [test_corpus.name], "test query") + response = rag.retrieval_query(PROJECT_ID, [test_corpus.name], "hello") assert response assert response.contexts