diff --git a/generative_ai/imagen/edit_image_inpainting_insert_mask.py b/generative_ai/imagen/edit_image_inpainting_insert_mask.py index 72902698887..d36fd6fd5fe 100644 --- a/generative_ai/imagen/edit_image_inpainting_insert_mask.py +++ b/generative_ai/imagen/edit_image_inpainting_insert_mask.py @@ -27,7 +27,6 @@ def edit_image_inpainting_insert_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_insert_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py b/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py index a09d796297d..7922a929c4b 100644 --- a/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py +++ b/generative_ai/imagen/edit_image_inpainting_insert_mask_mode.py @@ -28,7 +28,6 @@ def edit_image_inpainting_insert_mask_mode( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_insert_mask_mode] import vertexai diff --git a/generative_ai/imagen/edit_image_inpainting_remove_mask.py b/generative_ai/imagen/edit_image_inpainting_remove_mask.py index 6c7074b49ff..b6fdf4e4d31 100644 --- a/generative_ai/imagen/edit_image_inpainting_remove_mask.py +++ b/generative_ai/imagen/edit_image_inpainting_remove_mask.py @@ -26,7 +26,6 @@ def edit_image_inpainting_remove_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_remove_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py b/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py index e525e9fd962..d7e3e40c7d9 100644 --- a/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py +++ b/generative_ai/imagen/edit_image_inpainting_remove_mask_mode.py @@ -28,7 +28,6 @@ def edit_image_inpainting_remove_mask_mode( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_inpainting_remove_mask_mode] import vertexai diff --git a/generative_ai/imagen/edit_image_mask.py b/generative_ai/imagen/edit_image_mask.py index 4437bc9e838..9a33ca1a47a 100644 --- a/generative_ai/imagen/edit_image_mask.py +++ b/generative_ai/imagen/edit_image_mask.py @@ -26,7 +26,6 @@ def edit_image_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_mask_free.py b/generative_ai/imagen/edit_image_mask_free.py index fe3f199d9de..8193c4dcf8d 100644 --- a/generative_ai/imagen/edit_image_mask_free.py +++ b/generative_ai/imagen/edit_image_mask_free.py @@ -22,7 +22,6 @@ def edit_image_mask_free( project_id: str, input_file: str, output_file: str, prompt: str ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_mask_free] import vertexai diff --git a/generative_ai/imagen/edit_image_outpainting_mask.py b/generative_ai/imagen/edit_image_outpainting_mask.py index a283689b410..c12dd33f3ef 100644 --- a/generative_ai/imagen/edit_image_outpainting_mask.py +++ b/generative_ai/imagen/edit_image_outpainting_mask.py @@ -27,7 +27,6 @@ def edit_image_outpainting_mask( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_outpainting_mask] import vertexai diff --git a/generative_ai/imagen/edit_image_product_image.py b/generative_ai/imagen/edit_image_product_image.py index f79ba769609..904a02742a2 100644 --- a/generative_ai/imagen/edit_image_product_image.py +++ b/generative_ai/imagen/edit_image_product_image.py @@ -25,7 +25,6 @@ def edit_image_product_image( output_file: str, prompt: str, ) -> vision_models.ImageGenerationResponse: - # [START generativeaionvertexai_imagen_edit_image_product_image] import vertexai diff --git a/generative_ai/imagen/get_short_form_image_captions.py b/generative_ai/imagen/get_short_form_image_captions.py index 2864ff860d8..9112ececf2a 100644 --- a/generative_ai/imagen/get_short_form_image_captions.py +++ b/generative_ai/imagen/get_short_form_image_captions.py @@ -17,7 +17,6 @@ def get_short_form_image_captions(project_id: str, input_file: str) -> list: - # [START generativeaionvertexai_imagen_get_short_form_image_captions] import vertexai diff --git a/generative_ai/imagen/get_short_form_image_responses.py b/generative_ai/imagen/get_short_form_image_responses.py index 6608fde9ff6..cd2f011b4d6 100644 --- a/generative_ai/imagen/get_short_form_image_responses.py +++ b/generative_ai/imagen/get_short_form_image_responses.py @@ -20,7 +20,6 @@ def get_short_form_image_responses( project_id: str, input_file: str, question: str ) -> list: - # [START generativeaionvertexai_imagen_get_short_form_image_responses] import vertexai diff --git a/generative_ai/rag.py b/generative_ai/rag.py new file mode 100644 index 00000000000..20a66c3890a --- /dev/null +++ b/generative_ai/rag.py @@ -0,0 +1,373 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa ANN001, ANN201 + +from typing import List, Optional + + +def create_corpus( + project_id: str, + display_name: Optional[str] = None, + description: Optional[str] = None, +): + # [START generativeaionvertexai_rag_create_corpus] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # display_name = "test_corpus" + # description = "Corpus Description" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + corpus = rag.create_corpus(display_name=display_name, description=description) + print(corpus) + # [END generativeaionvertexai_rag_create_corpus] + return corpus + + +def get_corpus(project_id: str, corpus_name: str): + # [START generativeaionvertexai_rag_get_corpus] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + corpus = rag.get_corpus(name=corpus_name) + print(corpus) + # [END generativeaionvertexai_rag_get_corpus] + return corpus + + +def list_corpora(project_id: str): + # [START generativeaionvertexai_rag_list_corpora] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + corpora = rag.list_corpora() + print(corpora) + # [END generativeaionvertexai_rag_list_corpora] + return corpora + + +def upload_file( + project_id: str, + corpus_name: str, + path: str, + display_name: Optional[str] = None, + description: Optional[str] = None, +): + # [START generativeaionvertexai_rag_upload_file] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + # display_name = "file_display_name" + # description = "file description" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + rag_file = rag.upload_file( + corpus_name=corpus_name, + path=path, + display_name=display_name, + description=description, + ) + print(rag_file) + # [END generativeaionvertexai_rag_upload_file] + return rag_file + + +def import_files( + project_id: str, + corpus_name: str, + paths: List[str], +): + # [START generativeaionvertexai_rag_import_files] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] # Supports Google Cloud Storage and Google Drive Links + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + response = rag.import_files( + corpus_name=corpus_name, + paths=paths, + chunk_size=512, # Optional + chunk_overlap=100, # Optional + ) + print(f"Imported {response.imported_rag_files_count} files.") + # [END generativeaionvertexai_rag_import_files] + return response + + +async def import_files_async( + project_id: str, + corpus_name: str, + paths: List[str], +): + # [START generativeaionvertexai_rag_import_files_async] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + + # Supports Google Cloud Storage and Google Drive Links + # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + response = await rag.import_files_async( + corpus_name=corpus_name, + paths=paths, + chunk_size=512, # Optional + chunk_overlap=100, # Optional + ) + + result = await response.result() + print(f"Imported {result.imported_rag_files_count} files.") + # [END generativeaionvertexai_rag_import_files_async] + return result + + +def get_file(project_id: str, file_name: str): + # [START generativeaionvertexai_rag_get_file] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + rag_file = rag.get_file(name=file_name) + print(rag_file) + # [END generativeaionvertexai_rag_get_file] + + return rag_file + + +def list_files(project_id: str, corpus_name: str): + # [START generativeaionvertexai_rag_list_files] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + files = rag.list_files(corpus_name=corpus_name) + for file in files: + print(file) + # [END generativeaionvertexai_rag_list_files] + + return files + + +def delete_file(project_id: str, file_name: str) -> None: + # [START generativeaionvertexai_rag_delete_file] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # file_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}/ragFiles/{rag_file_id}" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + rag.delete_file(name=file_name) + print(f"File {file_name} deleted.") + # [END generativeaionvertexai_rag_delete_file] + + +def delete_corpus(project_id: str, corpus_name: str) -> None: + # [START generativeaionvertexai_rag_delete_corpus] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # corpus_name = "projects/{project_id}/locations/us-central1/ragCorpora/{rag_corpus_id}" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + rag.delete_corpus(name=corpus_name) + print(f"Corpus {corpus_name} deleted.") + # [END generativeaionvertexai_rag_delete_corpus] + + +def retrieval_query( + project_id: str, + rag_corpora: List[str], + text: str, +): + # [START generativeaionvertexai_rag_retrieval_query] + + from vertexai.preview import rag + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # rag_corpora = ["9183965540115283968"] # Only one corpus is supported at this time + # text = "Your Query" + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + response = rag.retrieval_query( + rag_corpora=rag_corpora, + text=text, + similarity_top_k=10, # Optional + ) + print(response) + # [END generativeaionvertexai_rag_retrieval_query] + + return response + + +def generate_content_with_rag( + project_id: str, + rag_corpora: List[str], +): + # [START generativeaionvertexai_rag_generate_content] + + from vertexai.preview import rag + from vertexai.preview.generative_models import GenerativeModel, Tool + import vertexai + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # rag_corpora = ["9183965540115283968"] # Only one corpus is supported at this time + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + rag_retrieval_tool = Tool.from_retrieval( + retrieval=rag.Retrieval( + source=rag.VertexRagStore( + rag_corpora=rag_corpora, + similarity_top_k=3, # Optional + vector_distance_threshold=0.3, # Optional + ), + ) + ) + + rag_model = GenerativeModel("gemini-1.0-pro", tools=[rag_retrieval_tool]) + response = rag_model.generate_content("Why is the sky blue?") + print(response.text) + # [END generativeaionvertexai_rag_generate_content] + + return response + + +def quickstart( + project_id: str, + display_name: str, + paths: List[str], +): + # [START generativeaionvertexai_rag_quickstart] + from vertexai.preview import rag + from vertexai.preview.generative_models import GenerativeModel, Tool + import vertexai + + # Create a RAG Corpus, Import Files, and Generate a response + + # TODO(developer): Update and un-comment below lines + # project_id = "PROJECT_ID" + # display_name = "test_corpus" + # paths = ["https://drive.google.com/file/123", "gs://my_bucket/my_files_dir"] # Supports Google Cloud Storage and Google Drive Links + + # Initialize Vertex AI API once per session + vertexai.init(project=project_id, location="us-central1") + + # Create RagCorpus + rag_corpus = rag.create_corpus(display_name=display_name) + + # Import Files to the RagCorpus + response = rag.import_files( + rag_corpus.name, + paths, + chunk_size=512, # Optional + chunk_overlap=100, # Optional + ) + + # Direct context retrieval + response = rag.retrieval_query( + rag_corpora=[rag_corpus.name], + text="What is RAG and why it is helpful?", + similarity_top_k=10, + ) + print(response) + + # Enhance generation + # Create a RAG retrieval tool + rag_retrieval_tool = Tool.from_retrieval( + retrieval=rag.Retrieval( + source=rag.VertexRagStore( + rag_corpora=[rag_corpus.name], # Currently only 1 corpus is allowed. + similarity_top_k=3, # Optional + vector_distance_threshold=0.4, # Optional + ), + ) + ) + # Create a gemini-pro model instance + rag_model = GenerativeModel("gemini-1.0-pro", tools=[rag_retrieval_tool]) + + # Generate response + response = rag_model.generate_content("What is RAG and why it is helpful?") + print(response.text) + # [END generativeaionvertexai_rag_quickstart] + return rag_corpus, response diff --git a/generative_ai/rag_test.py b/generative_ai/rag_test.py new file mode 100644 index 00000000000..a710dc11e93 --- /dev/null +++ b/generative_ai/rag_test.py @@ -0,0 +1,120 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# flake8: noqa ANN001, ANN201 + +import os +from pathlib import Path + +import pytest +import rag +import vertexai + +# TODO: Remove once SDK is updated +pytest.skip(allow_module_level=True) + +PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT") +LOCATION = "us-central1" +GCS_FILE = "gs://cloud-samples-data/generative-ai/pdf/earnings_statement.pdf" + +vertexai.init(project=PROJECT_ID, location=LOCATION) + + +@pytest.fixture(scope="module", name="test_file") +def test_file_fixture(): + file_path = Path("./hello.txt") + file_path.write_text("Hello World", encoding="utf-8") + yield file_path.absolute().as_posix() + file_path.unlink() # Delete the file after tests + + +@pytest.fixture(scope="module", name="test_corpus") +def test_corpus_fixture(): + """Creates a corpus for testing and deletes it after tests are complete.""" + corpus = rag.create_corpus(PROJECT_ID, "test_corpus") + yield corpus + rag.delete_corpus(PROJECT_ID, corpus.name) + + +@pytest.fixture(scope="module", name="uploaded_file") +def uploaded_file_fixture(test_corpus, test_file): + """Uploads a file to the corpus and deletes it after the test.""" + rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) + # TODO: Remove when upload_file service response changes to the correct format. + rag_file.name = rag_file.name.replace("RagFile", "ragFiles") + yield rag_file + + +def test_create_corpus(): + corpus = rag.create_corpus(PROJECT_ID, "test_create_corpus") + assert corpus.display_name == "test_create_corpus" + rag.delete_corpus(PROJECT_ID, corpus.name) + + +def test_get_corpus(test_corpus): + retrieved_corpus = rag.get_corpus(PROJECT_ID, test_corpus.name) + assert retrieved_corpus.name == test_corpus.name + + +def test_list_corpora(test_corpus): + corpora = rag.list_corpora(PROJECT_ID) + assert any(c.name == test_corpus.name for c in corpora) + + +def test_upload_file(test_corpus, test_file): + rag_file = rag.upload_file(PROJECT_ID, test_corpus.name, test_file) + assert rag_file + # TODO: Remove when upload_file service response changes to the correct format. + rag_file.name = rag_file.name.replace("RagFile", "ragFiles") + rag.delete_file(PROJECT_ID, rag_file.name) + + +def test_import_files(test_corpus): + response = rag.import_files(PROJECT_ID, test_corpus.name, [GCS_FILE]) + assert response.imported_rag_files_count > 0 + + +@pytest.mark.asyncio +async def test_import_files_async(test_corpus): + result = await rag.import_files_async(PROJECT_ID, test_corpus.name, [GCS_FILE]) + assert result.imported_rag_files_count > 0 + + +def test_get_file(uploaded_file): + retrieved_file = rag.get_file(PROJECT_ID, uploaded_file.name) + assert retrieved_file.name == uploaded_file.name + + +def test_list_files(test_corpus, uploaded_file): + files = rag.list_files(PROJECT_ID, test_corpus.name) + assert any(f.name == uploaded_file.name for f in files) + + +def test_retrieval_query(test_corpus): + response = rag.retrieval_query(PROJECT_ID, [test_corpus.name], "hello") + assert response + assert response.contexts + + +def test_generate_content_with_rag(test_corpus): + response = rag.generate_content_with_rag(PROJECT_ID, [test_corpus.name]) + assert response + assert response.text + + +def test_quickstart(): + corpus, response = rag.quickstart(PROJECT_ID, "test_corpus_quickstart", [GCS_FILE]) + assert response + assert response.text + rag.delete_corpus(PROJECT_ID, corpus.name) diff --git a/generative_ai/requirements-test.txt b/generative_ai/requirements-test.txt index 27f70f46f2a..fceefa0d35e 100644 --- a/generative_ai/requirements-test.txt +++ b/generative_ai/requirements-test.txt @@ -1,3 +1,4 @@ backoff==2.2.1 google-api-core==2.17.1 -pytest==6.2.4 +pytest==8.1.1 +pytest-asyncio==0.23.6 diff --git a/generative_ai/requirements.txt b/generative_ai/requirements.txt index 6021926a973..13a6bd5075f 100644 --- a/generative_ai/requirements.txt +++ b/generative_ai/requirements.txt @@ -1,5 +1,7 @@ pandas==1.3.5; python_version == '3.7' pandas==2.0.1; python_version > '3.7' -google-cloud-aiplatform[pipelines]==1.47.0 +pillow==9.5.0; python_version < '3.8' +pillow==10.3.0; python_version >= '3.8' +google-cloud-aiplatform[pipelines]==1.48.0 google-auth==2.17.3 anthropic[vertex]==0.21.3