From 791d95908165290c858bcdb78e3c922b16149e71 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 15 Jul 2025 19:24:27 +0200 Subject: [PATCH 01/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 6fbd4b5 (#311) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index b449c599..1fe0ce47 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@fb89146037a50ca9d96801be3208bc1c3efcd50d + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@6fbd4b557d0784569dd246e386acd01a264d7de4 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 6e4ba0c0093ffdd78bd5f5eb10e30d0a3ea9ebd0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 22 Jul 2025 14:30:39 +0200 Subject: [PATCH 02/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 9f1715c (#313) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 1fe0ce47..045e0170 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@6fbd4b557d0784569dd246e386acd01a264d7de4 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9f1715c5b93b52ede70f858c78420b8a708ef541 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From f13aac64896a8e61675acdce846cbe703cd6c548 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 9 Aug 2025 01:33:29 +0200 Subject: [PATCH 03/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to fdb0967 (#315) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 045e0170..d374101a 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9f1715c5b93b52ede70f858c78420b8a708ef541 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@fdb096794ec246cc8eb250a7332682a21925f2d9 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From b08bc127bc21687e7f2f4a4da8bf8432a5ee62ec Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 11 Aug 2025 18:53:52 +0200 Subject: [PATCH 04/61] chore(deps): update actions/checkout action to v5 (#318) --- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d6cbeaef..90ff4b84 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: @@ -26,7 +26,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b9721507..ba304f22 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Setup Python uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 From c207bc12be2932f4c6132f200018478d6040db85 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Tue, 12 Aug 2025 09:35:10 +0000 Subject: [PATCH 05/61] chore: Upgrade Langgraph deps (#320) * chore: Upgrade Langgraph deps * Update pyproject.toml * Update test_async_checkpoint.py --- pyproject.toml | 2 +- requirements.txt | 2 +- tests/test_async_checkpoint.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8f656b2c..837c2db4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ test = [ "pytest==8.4.1", "pytest-cov==6.2.1", "langchain-tests==0.3.20", - "langgraph==0.5.2" + "langgraph==0.6.0" ] [build-system] diff --git a/requirements.txt b/requirements.txt index e3045bbc..190d69af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,4 @@ numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" pgvector==0.4.1 SQLAlchemy[asyncio]==2.0.41 -langgraph==0.5.2 +langgraph==0.6.0 diff --git a/tests/test_async_checkpoint.py b/tests/test_async_checkpoint.py index f3d8b5ed..c78fc726 100644 --- a/tests/test_async_checkpoint.py +++ b/tests/test_async_checkpoint.py @@ -39,7 +39,7 @@ empty_checkpoint, ) from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer -from langgraph.prebuilt import ( # type: ignore[import-not-found] +from langgraph.prebuilt import ( # type: ignore ToolNode, ValidationNode, create_react_agent, From 40278de1adaf8dbccf3cbaf18d4a3a14ecb5015b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 13 Aug 2025 00:02:04 +0200 Subject: [PATCH 06/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 4ccc09a (#317) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index d374101a..5fb9d0ae 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@fdb096794ec246cc8eb250a7332682a21925f2d9 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@4ccc09a2194a752a8e112f2c88ee1a6efd7a8512 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 7917d62c3f9ea2c6ca8ab8d6284cfa2c7e535401 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Wed, 13 Aug 2025 17:57:18 +0000 Subject: [PATCH 07/61] refactor!: Refactor PostgresVectorStore and PostgresEngine to depend on PGVectorstore and PGEngine respectively (#316) * refactor: Refactor PostgresEngine to depend on PGEngine * update deps * update deps * Linter fix * fix tests * linter fix * refactor!: Refactor PostgresVectorStore to depend on PGVectorstore (#319) * refactor!: Refactor PostgresVectorStore to depend on PGVectorstore * Liter fix * fix tests * re-expose hybrid search config * header fix * fix tests * fix tests * fix tests * fix tests * fix tests * expose hybrid search config through init --------- Co-authored-by: Averi Kitsch --- docs/vector_store.ipynb | 43 +- pyproject.toml | 4 +- requirements.txt | 4 +- src/langchain_google_cloud_sql_pg/__init__.py | 12 +- .../async_vectorstore.py | 1184 +---------------- src/langchain_google_cloud_sql_pg/engine.py | 265 +--- src/langchain_google_cloud_sql_pg/indexes.py | 104 +- .../vectorstore.py | 742 +---------- tests/test_async_vectorstore.py | 2 +- tests/test_async_vectorstore_from_methods.py | 4 +- tests/test_async_vectorstore_index.py | 97 +- tests/test_async_vectorstore_search.py | 397 +++++- tests/test_engine.py | 78 +- tests/test_standard_test_suite.py | 4 +- tests/test_vectorstore.py | 2 +- tests/test_vectorstore_from_methods.py | 6 +- tests/test_vectorstore_index.py | 8 +- tests/test_vectorstore_search.py | 84 +- 18 files changed, 767 insertions(+), 2273 deletions(-) diff --git a/docs/vector_store.ipynb b/docs/vector_store.ipynb index cf2814fe..74bc3f54 100644 --- a/docs/vector_store.ipynb +++ b/docs/vector_store.ipynb @@ -585,10 +585,49 @@ "all_texts = [\"Apples and oranges\", \"Cars and airplanes\", \"Pineapple\", \"Train\", \"Banana\"]\n", "metadatas = [{\"len\": len(t)} for t in all_texts]\n", "ids = [str(uuid.uuid4()) for _ in all_texts]\n", - "await custom_store.aadd_texts(all_texts, metadatas=metadatas, ids=ids)\n", + "await custom_store.aadd_texts(all_texts, metadatas=metadatas, ids=ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For v0.15.0+\n", + "\n", + "**Important Update:** Support for string filters has been deprecated. Please use dictionaries to add filters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use filter on search\n", + "docs = await custom_store.asimilarity_search_by_vector(\n", + " query_vector, filter={\"len\": {\"$gte\": 6}}\n", + ")\n", "\n", + "print(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For v0.14 and under\n", + "\n", + "You can make use of the string filters to filter on metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "# Use filter on search\n", - "docs = await custom_store.asimilarity_search_by_vector(query_vector, filter=\"len >= 6\")\n", + "docs = await custom_store.asimilarity_search(query, filter=\"len >= 6\")\n", "\n", "print(docs)" ] diff --git a/pyproject.toml b/pyproject.toml index 837c2db4..8e691bda 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,12 +11,10 @@ authors = [ dependencies = [ "cloud-sql-python-connector[asyncpg] >= 1.10.0, <2.0.0", - "langchain-core>=0.2.36, <1.0.0 ", "numpy>=1.24.4, <3.0.0; python_version >= '3.11'", "numpy>=1.24.4, <=2.2.6; python_version == '3.10'", "numpy>=1.24.4, <=2.0.2; python_version <= '3.9'", - "pgvector>=0.2.5, <1.0.0", - "SQLAlchemy[asyncio]>=2.0.25, <3.0.0" + "langchain-postgres>=0.0.15", ] classifiers = [ diff --git a/requirements.txt b/requirements.txt index 190d69af..ac28b2c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,6 @@ cloud-sql-python-connector[asyncpg]==1.18.2 -langchain-core==0.3.68 numpy==2.3.1; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" -pgvector==0.4.1 -SQLAlchemy[asyncio]==2.0.41 langgraph==0.6.0 +langchain-postgres==0.0.15 diff --git a/src/langchain_google_cloud_sql_pg/__init__.py b/src/langchain_google_cloud_sql_pg/__init__.py index ca8ab9ef..34bf7514 100644 --- a/src/langchain_google_cloud_sql_pg/__init__.py +++ b/src/langchain_google_cloud_sql_pg/__init__.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +from langchain_postgres import Column +from langchain_postgres.v2.hybrid_search_config import ( + HybridSearchConfig, + reciprocal_rank_fusion, + weighted_sum_ranking, +) + from . import indexes from .chat_message_history import PostgresChatMessageHistory from .checkpoint import PostgresSaver -from .engine import Column, PostgresEngine +from .engine import PostgresEngine from .loader import PostgresDocumentSaver, PostgresLoader from .vectorstore import PostgresVectorStore from .version import __version__ @@ -29,5 +36,8 @@ "PostgresLoader", "PostgresDocumentSaver", "PostgresSaver", + "HybridSearchConfig", + "reciprocal_rank_fusion", + "weighted_sum_ranking", "__version__", ] diff --git a/src/langchain_google_cloud_sql_pg/async_vectorstore.py b/src/langchain_google_cloud_sql_pg/async_vectorstore.py index 0cde1f8d..d40470f3 100644 --- a/src/langchain_google_cloud_sql_pg/async_vectorstore.py +++ b/src/langchain_google_cloud_sql_pg/async_vectorstore.py @@ -15,1187 +15,11 @@ # TODO: Remove below import when minimum supported Python version is 3.10 from __future__ import annotations -import copy -import json -import uuid -from typing import Any, Callable, Iterable, Optional, Sequence +from langchain_postgres.v2.async_vectorstore import AsyncPGVectorStore -import numpy as np -from langchain_core.documents import Document -from langchain_core.embeddings import Embeddings -from langchain_core.vectorstores import VectorStore, utils -from sqlalchemy import text -from sqlalchemy.engine.row import RowMapping -from sqlalchemy.ext.asyncio import AsyncEngine -from .engine import PostgresEngine -from .indexes import ( - DEFAULT_DISTANCE_STRATEGY, - DEFAULT_INDEX_NAME_SUFFIX, - BaseIndex, - DistanceStrategy, - ExactNearestNeighbor, - QueryOptions, -) - -COMPARISONS_TO_NATIVE = { - "$eq": "=", - "$ne": "!=", - "$lt": "<", - "$lte": "<=", - "$gt": ">", - "$gte": ">=", -} - -SPECIAL_CASED_OPERATORS = { - "$in", - "$nin", - "$between", - "$exists", -} - -TEXT_OPERATORS = { - "$like", - "$ilike", -} - -LOGICAL_OPERATORS = {"$and", "$or", "$not"} - -SUPPORTED_OPERATORS = ( - set(COMPARISONS_TO_NATIVE) - .union(TEXT_OPERATORS) - .union(LOGICAL_OPERATORS) - .union(SPECIAL_CASED_OPERATORS) -) - - -class AsyncPostgresVectorStore(VectorStore): +class AsyncPostgresVectorStore(AsyncPGVectorStore): """Google Cloud SQL for PostgreSQL Vector Store class""" - __create_key = object() - - def __init__( - self, - key: object, - pool: AsyncEngine, - embedding_service: Embeddings, - table_name: str, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - id_column: str = "langchain_id", - metadata_json_column: Optional[str] = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ): - """AsyncPostgresVectorStore constructor. - Args: - key (object): Prevent direct constructor usage. - pool (PostgresEngine): Connection pool engine for managing connections to Postgres database. - embedding_service (Embeddings): Text embedding model to use. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - content_column (str): Column that represent a Document's page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - - Raises: - Exception: If called directly by user. - """ - if key != AsyncPostgresVectorStore.__create_key: - raise Exception( - "Only create class through 'create' or 'create_sync' methods!" - ) - - self.pool = pool - self.embedding_service = embedding_service - self.table_name = table_name - self.schema_name = schema_name - self.content_column = content_column - self.embedding_column = embedding_column - self.metadata_columns = metadata_columns - self.id_column = id_column - self.metadata_json_column = metadata_json_column - self.distance_strategy = distance_strategy - self.k = k - self.fetch_k = fetch_k - self.lambda_mult = lambda_mult - self.index_query_options = index_query_options - - @classmethod - async def create( - cls, - engine: PostgresEngine, - embedding_service: Embeddings, - table_name: str, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: Optional[str] = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> AsyncPostgresVectorStore: - """Create a new AsyncPostgresVectorStore instance. - - Args: - engine (PostgresEngine): Connection pool engine for managing connections to Cloud SQL for PostgreSQL database. - embedding_service (Embeddings): Text embedding model to use. - table_name (str): Name of an existing table or table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - content_column (str): Column that represent a Document's page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Returns: - AsyncPostgresVectorStore - """ - if metadata_columns and ignore_metadata_columns: - raise ValueError( - "Can not use both metadata_columns and ignore_metadata_columns." - ) - # Get field type information - async with engine._pool.connect() as conn: - result = await conn.execute( - text( - f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}'AND table_schema = '{schema_name}'" - ) - ) - result_map = result.mappings() - results = result_map.fetchall() - - columns = {} - for field in results: - columns[field["column_name"]] = field["data_type"] - - # Check columns - if id_column not in columns: - raise ValueError(f"Id column, {id_column}, does not exist.") - if content_column not in columns: - raise ValueError(f"Content column, {content_column}, does not exist.") - content_type = columns[content_column] - if content_type != "text" and "char" not in content_type: - raise ValueError( - f"Content column, {content_column}, is type, {content_type}. It must be a type of character string." - ) - if embedding_column not in columns: - raise ValueError(f"Embedding column, {embedding_column}, does not exist.") - if columns[embedding_column] != "USER-DEFINED": - raise ValueError( - f"Embedding column, {embedding_column}, is not type Vector." - ) - - metadata_json_column = ( - None if metadata_json_column not in columns else metadata_json_column - ) - - # If using metadata_columns check to make sure column exists - for column in metadata_columns: - if column not in columns: - raise ValueError(f"Metadata column, {column}, does not exist.") - - # If using ignore_metadata_columns, filter out known columns and set known metadata columns - all_columns = columns - if ignore_metadata_columns: - for column in ignore_metadata_columns: - del all_columns[column] - - del all_columns[id_column] - del all_columns[content_column] - del all_columns[embedding_column] - metadata_columns = [k for k in all_columns.keys()] - - return cls( - cls.__create_key, - engine._pool, - embedding_service, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - - @property - def embeddings(self) -> Embeddings: - return self.embedding_service - - async def __aadd_embeddings( - self, - texts: Iterable[str], - embeddings: list[list[float]], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Add embeddings to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - if not ids: - ids = [str(uuid.uuid4()) for _ in texts] - else: - # This is done to fill in any missing ids - ids = [id if id is not None else str(uuid.uuid4()) for id in ids] - if not metadatas: - metadatas = [{} for _ in texts] - # Insert embeddings - for id, content, embedding, metadata in zip(ids, texts, embeddings, metadatas): - metadata_col_names = ( - ", " + ", ".join(f'"{col}"' for col in self.metadata_columns) - if len(self.metadata_columns) > 0 - else "" - ) - insert_stmt = f'INSERT INTO "{self.schema_name}"."{self.table_name}"("{self.id_column}", "{self.content_column}", "{self.embedding_column}"{metadata_col_names}' - values = { - "langchain_id": id, - "content": content, - "embedding": str([float(dimension) for dimension in embedding]), - } - values_stmt = "VALUES (:langchain_id, :content, :embedding" - - # Add metadata - extra = copy.deepcopy(metadata) - for metadata_column in self.metadata_columns: - if metadata_column in metadata: - values_stmt += f", :{metadata_column}" - values[metadata_column] = metadata[metadata_column] - del extra[metadata_column] - else: - values_stmt += ",null" - - # Add JSON column and/or close statement - insert_stmt += ( - f""", "{self.metadata_json_column}")""" - if self.metadata_json_column - else ")" - ) - if self.metadata_json_column: - values_stmt += ", :extra)" - values["extra"] = json.dumps(extra) - else: - values_stmt += ")" - - upsert_stmt = f' ON CONFLICT ("{self.id_column}") DO UPDATE SET "{self.content_column}" = EXCLUDED."{self.content_column}", "{self.embedding_column}" = EXCLUDED."{self.embedding_column}"' - - if self.metadata_json_column: - upsert_stmt += f', "{self.metadata_json_column}" = EXCLUDED."{self.metadata_json_column}"' - - for column in self.metadata_columns: - upsert_stmt += f', "{column}" = EXCLUDED."{column}"' - - upsert_stmt += ";" - - query = insert_stmt + values_stmt + upsert_stmt - async with self.pool.connect() as conn: - await conn.execute(text(query), values) - await conn.commit() - - return ids - - async def aget_by_ids(self, ids: Sequence[str]) -> list[Document]: - """Get documents by ids.""" - - quoted_ids = [f"'{id_val}'" for id_val in ids] - id_list_str = ", ".join(quoted_ids) - - columns = self.metadata_columns + [ - self.id_column, - self.content_column, - ] - if self.metadata_json_column: - columns.append(self.metadata_json_column) - - column_names = ", ".join(f'"{col}"' for col in columns) - - query = f'SELECT {column_names} FROM "{self.schema_name}"."{self.table_name}" WHERE "{self.id_column}" IN ({id_list_str});' - - async with self.pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - results = result_map.fetchall() - - documents = [] - for row in results: - metadata = ( - row[self.metadata_json_column] - if self.metadata_json_column and row[self.metadata_json_column] - else {} - ) - for col in self.metadata_columns: - metadata[col] = row[col] - documents.append( - ( - Document( - page_content=row[self.content_column], - metadata=metadata, - id=str(row[self.id_column]), - ) - ) - ) - - return documents - - async def aadd_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed texts and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - embeddings = self.embedding_service.embed_documents(list(texts)) - ids = await self.__aadd_embeddings( - texts, embeddings, metadatas=metadatas, ids=ids, **kwargs - ) - return ids - - async def aadd_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed documents and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - if not ids: - ids = [doc.id for doc in documents] - ids = await self.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) - return ids - - async def adelete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - """Delete records from the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - if not ids: - return False - - id_list = ", ".join([f"'{id}'" for id in ids]) - query = f'DELETE FROM "{self.schema_name}"."{self.table_name}" WHERE {self.id_column} in ({id_list})' - async with self.pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - return True - - @classmethod - async def afrom_texts( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - """Create an AsyncPostgresVectorStore instance from texts. - - Args: - texts (list[str]): Texts to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list[str]]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - AsyncPostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - await vs.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) - return vs - - @classmethod - async def afrom_documents( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - """Create an AsyncPostgresVectorStore instance from documents. - - Args: - documents (list[Document]): Documents to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list[str]]): List of IDs to add to table records. - content_column (str): Column that represent a Document's page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - AsyncPostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - await vs.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) - return vs - - async def __query_collection( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> Sequence[RowMapping]: - """Perform similarity search query on the vector store table.""" - k = k if k else self.k - operator = self.distance_strategy.operator - search_function = self.distance_strategy.search_function - - columns = self.metadata_columns + [ - self.id_column, - self.content_column, - self.embedding_column, - ] - if self.metadata_json_column: - columns.append(self.metadata_json_column) - - column_names = ", ".join(f'"{col}"' for col in columns) - - if filter and isinstance(filter, dict): - filter = self._create_filter_clause(filter) - filter = f"WHERE {filter}" if filter else "" - embedding_string = f"'{[float(dimension) for dimension in embedding]}'" - stmt = f'SELECT {column_names}, {search_function}({self.embedding_column}, {embedding_string}) as distance FROM "{self.schema_name}"."{self.table_name}" {filter} ORDER BY {self.embedding_column} {operator} {embedding_string} LIMIT {k};' - if self.index_query_options: - async with self.pool.connect() as conn: - await conn.execute( - text(f"SET LOCAL {self.index_query_options.to_string()};") - ) - result = await conn.execute(text(stmt)) - result_map = result.mappings() - results = result_map.fetchall() - else: - async with self.pool.connect() as conn: - result = await conn.execute(text(stmt)) - result_map = result.mappings() - results = result_map.fetchall() - return results - - async def asimilarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by similarity search on query.""" - embedding = self.embedding_service.embed_query(text=query) - - return await self.asimilarity_search_by_vector( - embedding=embedding, k=k, filter=filter, **kwargs - ) - - def _select_relevance_score_fn(self) -> Callable[[float], float]: - """Select a relevance function based on distance strategy.""" - # Calculate distance strategy provided in - # vectorstore constructor - if self.distance_strategy == DistanceStrategy.COSINE_DISTANCE: - return self._cosine_relevance_score_fn - if self.distance_strategy == DistanceStrategy.INNER_PRODUCT: - return self._max_inner_product_relevance_score_fn - elif self.distance_strategy == DistanceStrategy.EUCLIDEAN: - return self._euclidean_relevance_score_fn - - async def asimilarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on query.""" - embedding = self.embedding_service.embed_query(query) - docs = await self.asimilarity_search_with_score_by_vector( - embedding=embedding, k=k, filter=filter, **kwargs - ) - return docs - - async def asimilarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by vector similarity search.""" - docs_and_scores = await self.asimilarity_search_with_score_by_vector( - embedding=embedding, k=k, filter=filter, **kwargs - ) - - return [doc for doc, _ in docs_and_scores] - - async def asimilarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by vector similarity search.""" - results = await self.__query_collection( - embedding=embedding, k=k, filter=filter, **kwargs - ) - - documents_with_scores = [] - for row in results: - metadata = ( - row[self.metadata_json_column] - if self.metadata_json_column and row[self.metadata_json_column] - else {} - ) - for col in self.metadata_columns: - metadata[col] = row[col] - documents_with_scores.append( - ( - Document( - page_content=row[self.content_column], - metadata=metadata, - id=str(row[self.id_column]), - ), - row["distance"], - ) - ) - - return documents_with_scores - - async def amax_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - embedding = self.embedding_service.embed_query(text=query) - - return await self.amax_marginal_relevance_search_by_vector( - embedding=embedding, - k=k, - fetch_k=fetch_k, - lambda_mult=lambda_mult, - filter=filter, - **kwargs, - ) - - async def amax_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - docs_and_scores = ( - await self.amax_marginal_relevance_search_with_score_by_vector( - embedding, - k=k, - fetch_k=fetch_k, - lambda_mult=lambda_mult, - filter=filter, - **kwargs, - ) - ) - - return [result[0] for result in docs_and_scores] - - async def amax_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected using the maximal marginal relevance.""" - results = await self.__query_collection( - embedding=embedding, k=fetch_k, filter=filter, **kwargs - ) - - k = k if k else self.k - fetch_k = fetch_k if fetch_k else self.fetch_k - lambda_mult = lambda_mult if lambda_mult else self.lambda_mult - embedding_list = [json.loads(row[self.embedding_column]) for row in results] - mmr_selected = utils.maximal_marginal_relevance( - np.array(embedding, dtype=np.float32), - embedding_list, - k=k, - lambda_mult=lambda_mult, - ) - - documents_with_scores = [] - for row in results: - metadata = ( - row[self.metadata_json_column] - if self.metadata_json_column and row[self.metadata_json_column] - else {} - ) - for col in self.metadata_columns: - metadata[col] = row[col] - documents_with_scores.append( - ( - Document( - page_content=row[self.content_column], - metadata=metadata, - id=str(row[self.id_column]), - ), - row["distance"], - ) - ) - - return [r for i, r in enumerate(documents_with_scores) if i in mmr_selected] - - async def aapply_vector_index( - self, - index: BaseIndex, - name: Optional[str] = None, - concurrently: bool = False, - ) -> None: - """Create an index on the vector store table.""" - if isinstance(index, ExactNearestNeighbor): - await self.adrop_vector_index() - return - - filter = f"WHERE ({index.partial_indexes})" if index.partial_indexes else "" - params = "WITH " + index.index_options() - function = index.distance_strategy.index_function - if name is None: - if index.name == None: - index.name = self.table_name + DEFAULT_INDEX_NAME_SUFFIX - name = index.name - stmt = f'CREATE INDEX {"CONCURRENTLY" if concurrently else ""} {name} ON "{self.schema_name}"."{self.table_name}" USING {index.index_type} ({self.embedding_column} {function}) {params} {filter};' - if concurrently: - async with self.pool.connect() as conn: - await conn.execute(text("COMMIT")) - await conn.execute(text(stmt)) - else: - async with self.pool.connect() as conn: - await conn.execute(text(stmt)) - await conn.commit() - - async def areindex(self, index_name: Optional[str] = None) -> None: - """Re-index the vector store table.""" - index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX - query = f"REINDEX INDEX {index_name};" - async with self.pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - - async def adrop_vector_index( - self, - index_name: Optional[str] = None, - ) -> None: - """Drop the vector index.""" - index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX - query = f"DROP INDEX IF EXISTS {index_name};" - async with self.pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - - async def is_valid_index( - self, - index_name: Optional[str] = None, - ) -> bool: - """Check if index exists in the table.""" - index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX - stmt = f""" - SELECT tablename, indexname - FROM pg_indexes - WHERE tablename = '{self.table_name}' AND schemaname = '{self.schema_name}' AND indexname = '{index_name}'; - """ - async with self.pool.connect() as conn: - result = await conn.execute(text(stmt)) - result_map = result.mappings() - results = result_map.fetchall() - - return bool(len(results) == 1) - - def _handle_field_filter( - self, - field: str, - value: Any, - ) -> str: - """Create a filter for a specific field. - Args: - field: name of field - value: value to filter - If provided as is then this will be an equality filter - If provided as a dictionary then this will be a filter, the key - will be the operator and the value will be the value to filter by - Returns: - sql where query as a string - """ - if not isinstance(field, str): - raise ValueError( - f"field should be a string but got: {type(field)} with value: {field}" - ) - - if field.startswith("$"): - raise ValueError( - f"Invalid filter condition. Expected a field but got an operator: " - f"{field}" - ) - - # Allow [a-zA-Z0-9_], disallow $ for now until we support escape characters - if not field.isidentifier(): - raise ValueError( - f"Invalid field name: {field}. Expected a valid identifier." - ) - - if isinstance(value, dict): - # This is a filter specification - if len(value) != 1: - raise ValueError( - "Invalid filter condition. Expected a value which " - "is a dictionary with a single key that corresponds to an operator " - f"but got a dictionary with {len(value)} keys. The first few " - f"keys are: {list(value.keys())[:3]}" - ) - operator, filter_value = list(value.items())[0] - # Verify that that operator is an operator - if operator not in SUPPORTED_OPERATORS: - raise ValueError( - f"Invalid operator: {operator}. " - f"Expected one of {SUPPORTED_OPERATORS}" - ) - else: # Then we assume an equality operator - operator = "$eq" - filter_value = value - - if operator in COMPARISONS_TO_NATIVE: - # Then we implement an equality filter - # native is trusted input - if isinstance(filter_value, str): - filter_value = f"'{filter_value}'" - native = COMPARISONS_TO_NATIVE[operator] - return f"({field} {native} {filter_value})" - elif operator == "$between": - # Use AND with two comparisons - low, high = filter_value - - return f"({field} BETWEEN {low} AND {high})" - elif operator in {"$in", "$nin", "$like", "$ilike"}: - # We'll do force coercion to text - if operator in {"$in", "$nin"}: - for val in filter_value: - if not isinstance(val, (str, int, float)): - raise NotImplementedError( - f"Unsupported type: {type(val)} for value: {val}" - ) - - if isinstance(val, bool): # b/c bool is an instance of int - raise NotImplementedError( - f"Unsupported type: {type(val)} for value: {val}" - ) - - if operator in {"$in"}: - values = str(tuple(val for val in filter_value)) - return f"({field} IN {values})" - elif operator in {"$nin"}: - values = str(tuple(val for val in filter_value)) - return f"({field} NOT IN {values})" - elif operator in {"$like"}: - return f"({field} LIKE '{filter_value}')" - elif operator in {"$ilike"}: - return f"({field} ILIKE '{filter_value}')" - else: - raise NotImplementedError() - elif operator == "$exists": - if not isinstance(filter_value, bool): - raise ValueError( - "Expected a boolean value for $exists " - f"operator, but got: {filter_value}" - ) - else: - if filter_value: - return f"({field} IS NOT NULL)" - else: - return f"({field} IS NULL)" - else: - raise NotImplementedError() - - def _create_filter_clause(self, filters: Any) -> str: - """Create LangChain filter representation to matching SQL where clauses - Args: - filters: Dictionary of filters to apply to the query. - Returns: - String containing the sql where query. - """ - - if not isinstance(filters, dict): - raise ValueError( - f"Invalid type: Expected a dictionary but got type: {type(filters)}" - ) - if len(filters) == 1: - # The only operators allowed at the top level are $AND, $OR, and $NOT - # First check if an operator or a field - key, value = list(filters.items())[0] - if key.startswith("$"): - # Then it's an operator - if key.lower() not in ["$and", "$or", "$not"]: - raise ValueError( - f"Invalid filter condition. Expected $and, $or or $not " - f"but got: {key}" - ) - else: - # Then it's a field - return self._handle_field_filter(key, filters[key]) - - if key.lower() == "$and" or key.lower() == "$or": - if not isinstance(value, list): - raise ValueError( - f"Expected a list, but got {type(value)} for value: {value}" - ) - op = key[1:].upper() # Extract the operator - filter_clause = [self._create_filter_clause(el) for el in value] - if len(filter_clause) > 1: - return f"({f' {op} '.join(filter_clause)})" - elif len(filter_clause) == 1: - return filter_clause[0] - else: - raise ValueError( - "Invalid filter condition. Expected a dictionary " - "but got an empty dictionary" - ) - elif key.lower() == "$not": - if isinstance(value, list): - not_conditions = [ - self._create_filter_clause(item) for item in value - ] - not_stmts = [f"NOT {condition}" for condition in not_conditions] - return f"({' AND '.join(not_stmts)})" - elif isinstance(value, dict): - not_ = self._create_filter_clause(value) - return f"(NOT {not_})" - else: - raise ValueError( - f"Invalid filter condition. Expected a dictionary " - f"or a list but got: {type(value)}" - ) - else: - raise ValueError( - f"Invalid filter condition. Expected $and, $or or $not " - f"but got: {key}" - ) - elif len(filters) > 1: - # Then all keys have to be fields (they cannot be operators) - for key in filters.keys(): - if key.startswith("$"): - raise ValueError( - f"Invalid filter condition. Expected a field but got: {key}" - ) - # These should all be fields and combined using an $and operator - and_ = [self._handle_field_filter(k, v) for k, v in filters.items()] - if len(and_) > 1: - return f"({' AND '.join(and_)})" - elif len(and_) == 1: - return and_[0] - else: - raise ValueError( - "Invalid filter condition. Expected a dictionary " - "but got an empty dictionary" - ) - else: - return "" - - def get_by_ids(self, ids: Sequence[str]) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def add_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def add_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def delete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - @classmethod - def from_texts( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - @classmethod - def from_documents( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def max_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def max_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def max_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) diff --git a/src/langchain_google_cloud_sql_pg/engine.py b/src/langchain_google_cloud_sql_pg/engine.py index c40462b5..102467bd 100644 --- a/src/langchain_google_cloud_sql_pg/engine.py +++ b/src/langchain_google_cloud_sql_pg/engine.py @@ -24,6 +24,7 @@ import google.auth # type: ignore import google.auth.transport.requests # type: ignore from google.cloud.sql.connector import Connector, IPTypes, RefreshStrategy +from langchain_postgres import Column, PGEngine from sqlalchemy import MetaData, Table, text from sqlalchemy.engine import URL from sqlalchemy.exc import InvalidRequestError @@ -78,58 +79,10 @@ async def _get_iam_principal_email( return email.replace(".gserviceaccount.com", "") -@dataclass -class Column: - name: str - data_type: str - nullable: bool = True - - def __post_init__(self): - """Check if initialization parameters are valid. - - Raises: - ValueError: Raises error if Column name is not string. - ValueError: Raises error if data_type is not type string. - """ - if not isinstance(self.name, str): - raise ValueError("Column name must be type string") - if not isinstance(self.data_type, str): - raise ValueError("Column data_type must be type string") - - -class PostgresEngine: +class PostgresEngine(PGEngine): """A class for managing connections to a Cloud SQL for Postgres database.""" _connector: Optional[Connector] = None - _default_loop: Optional[asyncio.AbstractEventLoop] = None - _default_thread: Optional[Thread] = None - __create_key = object() - - def __init__( - self, - key: object, - pool: AsyncEngine, - loop: Optional[asyncio.AbstractEventLoop], - thread: Optional[Thread], - ): - """PostgresEngine constructor. - - Args: - key (object): Prevent direct constructor usage. - pool (AsyncEngine): Async engine connection pool. - loop (Optional[asyncio.AbstractEventLoop]): Async event loop used to create the engine. - thread (Optional[Thread]): Thread used to create the engine async. - - Raises: - Exception: If the constructor is called directly by the user. - """ - if key != PostgresEngine.__create_key: - raise Exception( - "Only create class through 'create' or 'create_sync' methods!" - ) - self._pool = pool - self._loop = loop - self._thread = thread @classmethod async def _create( @@ -219,7 +172,7 @@ async def getconn() -> asyncpg.Connection: async_creator=getconn, **engine_args, ) - return cls(cls.__create_key, engine, loop, thread) + return cls(PGEngine._PGEngine__create_key, engine, loop, thread) # type: ignore @classmethod def __start_background_loop( @@ -354,13 +307,22 @@ async def afrom_instance( return await asyncio.wrap_future(future) @classmethod - def from_engine( + def from_connection_string( cls, - engine: AsyncEngine, - loop: Optional[asyncio.AbstractEventLoop] = None, + url: str | URL, + **kwargs: Any, ) -> PostgresEngine: - """Create an PostgresEngine instance from an AsyncEngine.""" - return cls(cls.__create_key, engine, loop, None) + """Create an PostgresEngine instance from arguments. These parameters are pass directly into sqlalchemy's create_async_engine function. + Args: + url (Union[str | URL]): the URL used to connect to a database + **kwargs (Any, optional): sqlalchemy `create_async_engine` arguments + Raises: + ValueError: If `postgresql+asyncpg` is not specified as the PG driver + Returns: + PostgresEngine + """ + + return PostgresEngine.from_engine_args(url=url, **kwargs) @classmethod def from_engine_args( @@ -396,198 +358,7 @@ def from_engine_args( raise ValueError("Driver must be type 'postgresql+asyncpg'") engine = create_async_engine(url, **kwargs) - return cls(cls.__create_key, engine, cls._default_loop, cls._default_thread) - - async def _run_as_async(self, coro: Awaitable[T]) -> T: - """Run an async coroutine asynchronously""" - # If a loop has not been provided, attempt to run in current thread - if not self._loop: - return await coro - # Otherwise, run in the background thread - return await asyncio.wrap_future( - asyncio.run_coroutine_threadsafe(coro, self._loop) - ) - - def _run_as_sync(self, coro: Awaitable[T]) -> T: - """Run an async coroutine synchronously""" - if not self._loop: - raise Exception( - "Engine was initialized without a background loop and cannot call sync methods." - ) - return asyncio.run_coroutine_threadsafe(coro, self._loop).result() - - async def close(self) -> None: - """Dispose of connection pool""" - await self._run_as_async(self._pool.dispose()) - - async def _ainit_vectorstore_table( - self, - table_name: str, - vector_size: int, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[Column] = [], - metadata_json_column: str = "langchain_metadata", - id_column: Union[str, Column] = "langchain_id", - overwrite_existing: bool = False, - store_metadata: bool = True, - ) -> None: - """ - Create a table for saving of vectors to be used with PostgresVectorStore. - - Args: - table_name (str): The Postgres database table name. - vector_size (int): Vector size for the embedding model to be used. - schema_name (str): The schema name to store Postgres database table. - Default: "public". - content_column (str): Name of the column to store document content. - Default: "page_content". - embedding_column (str) : Name of the column to store vector embeddings. - Default: "embedding". - metadata_columns (list[Column]): A list of Columns to create for custom - metadata. Default: []. Optional. - metadata_json_column (str): The column to store extra metadata in JSON format. - Default: "langchain_metadata". Optional. - id_column (Union[str, Column]) : Column to store ids. - Default: "langchain_id" column name with data type UUID. Optional. - overwrite_existing (bool): Whether to drop existing table. Default: False. - store_metadata (bool): Whether to store metadata in the table. - Default: True. - Raises: - :class:`DuplicateTableError `: if table already exists and overwrite flag is not set. - :class:`UndefinedObjectError `: if the data type of the id column is not a postgreSQL data type. - """ - async with self._pool.connect() as conn: - await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) - await conn.commit() - - if overwrite_existing: - async with self._pool.connect() as conn: - await conn.execute( - text(f'DROP TABLE IF EXISTS "{schema_name}"."{table_name}"') - ) - await conn.commit() - - id_data_type = "UUID" if isinstance(id_column, str) else id_column.data_type - id_column_name = id_column if isinstance(id_column, str) else id_column.name - - query = f"""CREATE TABLE "{schema_name}"."{table_name}"( - "{id_column_name}" {id_data_type} PRIMARY KEY, - "{content_column}" TEXT NOT NULL, - "{embedding_column}" vector({vector_size}) NOT NULL""" - for column in metadata_columns: - nullable = "NOT NULL" if not column.nullable else "" - query += f',\n"{column.name}" {column.data_type} {nullable}' - if store_metadata: - query += f""",\n"{metadata_json_column}" JSON""" - query += "\n);" - - async with self._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - - async def ainit_vectorstore_table( - self, - table_name: str, - vector_size: int, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[Column] = [], - metadata_json_column: str = "langchain_metadata", - id_column: Union[str, Column] = "langchain_id", - overwrite_existing: bool = False, - store_metadata: bool = True, - ) -> None: - """ - Create a table for saving of vectors to be used with PostgresVectorStore. - - Args: - table_name (str): The Postgres database table name. - vector_size (int): Vector size for the embedding model to be used. - schema_name (str): The schema name to store Postgres database table. - Default: "public". - content_column (str): Name of the column to store document content. - Default: "page_content". - embedding_column (str) : Name of the column to store vector embeddings. - Default: "embedding". - metadata_columns (list[Column]): A list of Columns to create for custom - metadata. Default: []. Optional. - metadata_json_column (str): The column to store extra metadata in JSON format. - Default: "langchain_metadata". Optional. - id_column (Union[str, Column]) : Column to store ids. - Default: "langchain_id" column name with data type UUID. Optional. - overwrite_existing (bool): Whether to drop existing table. Default: False. - store_metadata (bool): Whether to store metadata in the table. - Default: True. - """ - await self._run_as_async( - self._ainit_vectorstore_table( - table_name, - vector_size, - schema_name, - content_column, - embedding_column, - metadata_columns, - metadata_json_column, - id_column, - overwrite_existing, - store_metadata, - ) - ) - - def init_vectorstore_table( - self, - table_name: str, - vector_size: int, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[Column] = [], - metadata_json_column: str = "langchain_metadata", - id_column: Union[str, Column] = "langchain_id", - overwrite_existing: bool = False, - store_metadata: bool = True, - ) -> None: - """ - Create a table for saving of vectors to be used with PostgresVectorStore. - - Args: - table_name (str): The Postgres database table name. - vector_size (int): Vector size for the embedding model to be used. - schema_name (str): The schema name to store Postgres database table. - Default: "public". - content_column (str): Name of the column to store document content. - Default: "page_content". - embedding_column (str) : Name of the column to store vector embeddings. - Default: "embedding". - metadata_columns (list[Column]): A list of Columns to create for custom - metadata. Default: []. Optional. - metadata_json_column (str): The column to store extra metadata in JSON format. - Default: "langchain_metadata". Optional. - id_column (Union[str, Column]) : Column to store ids. - Default: "langchain_id" column name with data type UUID. Optional. - overwrite_existing (bool): Whether to drop existing table. Default: False. - store_metadata (bool): Whether to store metadata in the table. - Default: True. - Raises: - :class:`UndefinedObjectError `: if the `ids` data type does not match that of the `id_column`. - """ - self._run_as_sync( - self._ainit_vectorstore_table( - table_name, - vector_size, - schema_name, - content_column, - embedding_column, - metadata_columns, - metadata_json_column, - id_column, - overwrite_existing, - store_metadata, - ) - ) + return cls(PGEngine._PGEngine__create_key, engine, cls._default_loop, cls._default_thread) # type: ignore async def _ainit_chat_history_table( self, table_name: str, schema_name: str = "public" diff --git a/src/langchain_google_cloud_sql_pg/indexes.py b/src/langchain_google_cloud_sql_pg/indexes.py index 18d7a740..7f5dd187 100644 --- a/src/langchain_google_cloud_sql_pg/indexes.py +++ b/src/langchain_google_cloud_sql_pg/indexes.py @@ -12,94 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import enum -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Optional - - -@dataclass -class StrategyMixin: - operator: str - search_function: str - index_function: str - - -class DistanceStrategy(StrategyMixin, enum.Enum): - """Enumerator of the Distance strategies.""" - - EUCLIDEAN = "<->", "l2_distance", "vector_l2_ops" - COSINE_DISTANCE = "<=>", "cosine_distance", "vector_cosine_ops" - INNER_PRODUCT = "<#>", "inner_product", "vector_ip_ops" - - -DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE_DISTANCE -DEFAULT_INDEX_NAME_SUFFIX: str = "langchainvectorindex" - - -@dataclass -class BaseIndex(ABC): - name: Optional[str] = None - index_type: str = "base" - distance_strategy: DistanceStrategy = field( - default_factory=lambda: DistanceStrategy.COSINE_DISTANCE - ) - partial_indexes: Optional[list[str]] = None - - @abstractmethod - def index_options(self) -> str: - """Set index query options for vector store initialization.""" - raise NotImplementedError( - "index_options method must be implemented by subclass" - ) - - -@dataclass -class ExactNearestNeighbor(BaseIndex): - index_type: str = "exactnearestneighbor" - - -@dataclass -class HNSWIndex(BaseIndex): - index_type: str = "hnsw" - m: int = 16 - ef_construction: int = 64 - - def index_options(self) -> str: - """Set index query options for vector store initialization.""" - return f"(m = {self.m}, ef_construction = {self.ef_construction})" - - -@dataclass -class QueryOptions(ABC): - def to_string(self) -> str: - """Convert index attributes to string.""" - raise NotImplementedError("to_string method must be implemented by subclass") - - -@dataclass -class HNSWQueryOptions(QueryOptions): - ef_search: int = 40 - - def to_string(self): - """Convert index attributes to string.""" - return f"hnsw.ef_search = {self.ef_search}" - - -@dataclass -class IVFFlatIndex(BaseIndex): - index_type: str = "ivfflat" - lists: int = 100 - - def index_options(self) -> str: - """Set index query options for vector store initialization.""" - return f"(lists = {self.lists})" - - -@dataclass -class IVFFlatQueryOptions(QueryOptions): - probes: int = 1 - - def to_string(self): - """Convert index attributes to string.""" - return f"ivflfat.probes = {self.probes}" +from langchain_postgres.v2.indexes import ( + DEFAULT_DISTANCE_STRATEGY, + DEFAULT_INDEX_NAME_SUFFIX, + BaseIndex, + DistanceStrategy, + ExactNearestNeighbor, + HNSWIndex, + HNSWQueryOptions, + IVFFlatIndex, + IVFFlatQueryOptions, + QueryOptions, + StrategyMixin, +) diff --git a/src/langchain_google_cloud_sql_pg/vectorstore.py b/src/langchain_google_cloud_sql_pg/vectorstore.py index f5333fd6..75598b85 100644 --- a/src/langchain_google_cloud_sql_pg/vectorstore.py +++ b/src/langchain_google_cloud_sql_pg/vectorstore.py @@ -15,12 +15,12 @@ # TODO: Remove below import when minimum supported Python version is 3.10 from __future__ import annotations -from typing import Any, Callable, Iterable, Optional, Sequence +from typing import Optional -import numpy as np -from langchain_core.documents import Document from langchain_core.embeddings import Embeddings -from langchain_core.vectorstores import VectorStore +from langchain_postgres import PGVectorStore + +from langchain_google_cloud_sql_pg import HybridSearchConfig from .async_vectorstore import AsyncPostgresVectorStore from .engine import PostgresEngine @@ -32,41 +32,22 @@ ) -class PostgresVectorStore(VectorStore): +class PostgresVectorStore(PGVectorStore): """Google Cloud SQL for PostgreSQL Vector Store class""" - __create_key = object() - - def __init__( - self, key: object, engine: PostgresEngine, vs: AsyncPostgresVectorStore - ): - """PostgresVectorStore constructor. - Args: - key (object): Prevent direct constructor usage. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - vs (AsyncPostgresVectorstore): The async only VectorStore implementation - - Raises: - Exception: If called directly by user. - """ - if key != PostgresVectorStore.__create_key: - raise Exception( - "Only create class through 'create' or 'create_sync' methods!" - ) - - self._engine = engine - self.__vs = vs + _engine: PostgresEngine + __vs: AsyncPostgresVectorStore @classmethod async def create( cls, - engine: PostgresEngine, + engine: PostgresEngine, # type: ignore embedding_service: Embeddings, table_name: str, schema_name: str = "public", content_column: str = "content", embedding_column: str = "embedding", - metadata_columns: list[str] = [], + metadata_columns: Optional[list[str]] = None, ignore_metadata_columns: Optional[list[str]] = None, id_column: str = "langchain_id", metadata_json_column: Optional[str] = "langchain_metadata", @@ -75,6 +56,7 @@ async def create( fetch_k: int = 20, lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, + hybrid_search_config: Optional[HybridSearchConfig] = None, ) -> PostgresVectorStore: """Create a new PostgresVectorStore instance. @@ -94,6 +76,7 @@ async def create( fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. index_query_options (QueryOptions): Index query option. + hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None. Returns: PostgresVectorStore @@ -102,32 +85,33 @@ async def create( engine, embedding_service, table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, + schema_name=schema_name, + content_column=content_column, + embedding_column=embedding_column, + metadata_columns=metadata_columns, + ignore_metadata_columns=ignore_metadata_columns, + metadata_json_column=metadata_json_column, + id_column=id_column, + distance_strategy=distance_strategy, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + index_query_options=index_query_options, + hybrid_search_config=hybrid_search_config, ) vs = await engine._run_as_async(coro) - return cls(cls.__create_key, engine, vs) + return cls(cls._PGVectorStore__create_key, engine, vs) # type: ignore @classmethod def create_sync( cls, - engine: PostgresEngine, + engine: PostgresEngine, # type: ignore embedding_service: Embeddings, table_name: str, schema_name: str = "public", content_column: str = "content", embedding_column: str = "embedding", - metadata_columns: list[str] = [], + metadata_columns: Optional[list[str]] = None, ignore_metadata_columns: Optional[list[str]] = None, id_column: str = "langchain_id", metadata_json_column: str = "langchain_metadata", @@ -136,6 +120,7 @@ def create_sync( fetch_k: int = 20, lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, + hybrid_search_config: Optional[HybridSearchConfig] = None, ) -> PostgresVectorStore: """Create a new PostgresVectorStore instance. @@ -155,6 +140,7 @@ def create_sync( fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. index_query_options (QueryOptions): Index query option. + hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None. Returns: PostgresVectorStore @@ -163,661 +149,19 @@ def create_sync( engine, embedding_service, table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, + schema_name=schema_name, + content_column=content_column, + embedding_column=embedding_column, + metadata_columns=metadata_columns, + ignore_metadata_columns=ignore_metadata_columns, + metadata_json_column=metadata_json_column, + id_column=id_column, + distance_strategy=distance_strategy, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + index_query_options=index_query_options, + hybrid_search_config=hybrid_search_config, ) vs = engine._run_as_sync(coro) - return cls(cls.__create_key, engine, vs) - - @property - def embeddings(self) -> Embeddings: - return self.__vs.embedding_service - - async def aadd_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed texts and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return await self._engine._run_as_async( - self.__vs.aadd_texts(texts, metadatas, ids, **kwargs) - ) - - def add_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed texts and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return self._engine._run_as_sync( - self.__vs.aadd_texts(texts, metadatas, ids, **kwargs) - ) - - async def aadd_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed documents and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return await self._engine._run_as_async( - self.__vs.aadd_documents(documents, ids, **kwargs) - ) - - def add_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed documents and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return self._engine._run_as_sync( - self.__vs.aadd_documents(documents, ids, **kwargs) - ) - - async def adelete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - """Delete records from the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return await self._engine._run_as_async(self.__vs.adelete(ids, **kwargs)) - - def delete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - """Delete records from the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return self._engine._run_as_sync(self.__vs.adelete(ids, **kwargs)) - - @classmethod - async def afrom_texts( # type: ignore[override] - cls: type[PostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from texts. - - Args: - texts (list[str]): Texts to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - await vs.aadd_texts(texts, metadatas=metadatas, ids=ids) - return vs - - @classmethod - async def afrom_documents( # type: ignore[override] - cls: type[PostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from documents. - - Args: - documents (list[Document]): Documents to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - await vs.aadd_documents(documents, ids=ids) - return vs - - @classmethod - def from_texts( # type: ignore[override] - cls: type[PostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from texts. - - Args: - texts (list[str]): Texts to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = cls.create_sync( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - vs.add_texts(texts, metadatas=metadatas, ids=ids) - return vs - - @classmethod - def from_documents( # type: ignore[override] - cls: type[PostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from documents. - - Args: - documents (list[Document]): Documents to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = cls.create_sync( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - vs.add_documents(documents, ids=ids) - return vs - - async def asimilarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by similarity search on query.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search(query, k, filter, **kwargs) - ) - - def similarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by similarity search on query.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search(query, k, filter, **kwargs) - ) - - # Required for (a)similarity_search_with_relevance_scores - def _select_relevance_score_fn(self) -> Callable[[float], float]: - """Select a relevance function based on distance strategy.""" - # Calculate distance strategy provided in vectorstore constructor - if self.__vs.distance_strategy == DistanceStrategy.COSINE_DISTANCE: - return self._cosine_relevance_score_fn - if self.__vs.distance_strategy == DistanceStrategy.INNER_PRODUCT: - return self._max_inner_product_relevance_score_fn - elif self.__vs.distance_strategy == DistanceStrategy.EUCLIDEAN: - return self._euclidean_relevance_score_fn - - async def asimilarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on query.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs) - ) - - def similarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on query.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs) - ) - - async def asimilarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by vector similarity search.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs) - ) - - def similarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by vector similarity search.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs) - ) - - async def asimilarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by vector similarity search.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search_with_score_by_vector( - embedding, k, filter, **kwargs - ) - ) - - def similarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on vector.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search_with_score_by_vector( - embedding, k, filter, **kwargs - ) - ) - - async def amax_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return await self._engine._run_as_async( - self.__vs.amax_marginal_relevance_search( - query, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - def max_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return self._engine._run_as_sync( - self.__vs.amax_marginal_relevance_search( - query, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - async def amax_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return await self._engine._run_as_async( - self.__vs.amax_marginal_relevance_search_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - def max_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return self._engine._run_as_sync( - self.__vs.amax_marginal_relevance_search_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - async def amax_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected using the maximal marginal relevance.""" - return await self._engine._run_as_async( - self.__vs.amax_marginal_relevance_search_with_score_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - def max_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected using the maximal marginal relevance.""" - return self._engine._run_as_sync( - self.__vs.amax_marginal_relevance_search_with_score_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - async def aapply_vector_index( - self, - index: BaseIndex, - name: Optional[str] = None, - concurrently: bool = False, - ) -> None: - """Create an index on the vector store table.""" - return await self._engine._run_as_async( - self.__vs.aapply_vector_index(index, name, concurrently) - ) - - def apply_vector_index( - self, - index: BaseIndex, - name: Optional[str] = None, - concurrently: bool = False, - ) -> None: - """Create an index on the vector store table.""" - return self._engine._run_as_sync( - self.__vs.aapply_vector_index(index, name, concurrently) - ) - - async def areindex(self, index_name: Optional[str] = None) -> None: - """Re-index the vector store table.""" - return await self._engine._run_as_async(self.__vs.areindex(index_name)) - - def reindex(self, index_name: Optional[str] = None) -> None: - """Re-index the vector store table.""" - return self._engine._run_as_sync(self.__vs.areindex(index_name)) - - async def adrop_vector_index( - self, - index_name: Optional[str] = None, - ) -> None: - """Drop the vector index.""" - return await self._engine._run_as_async( - self.__vs.adrop_vector_index(index_name) - ) - - def drop_vector_index( - self, - index_name: Optional[str] = None, - ) -> None: - """Drop the vector index.""" - return self._engine._run_as_sync(self.__vs.adrop_vector_index(index_name)) - - async def ais_valid_index( - self, - index_name: Optional[str] = None, - ) -> bool: - """Check if index exists in the table.""" - return await self._engine._run_as_async(self.__vs.is_valid_index(index_name)) - - def is_valid_index( - self, - index_name: Optional[str] = None, - ) -> bool: - """Check if index exists in the table.""" - return self._engine._run_as_sync(self.__vs.is_valid_index(index_name)) - - async def aget_by_ids(self, ids: Sequence[str]) -> list[Document]: - """Get documents by ids.""" - return await self._engine._run_as_async(self.__vs.aget_by_ids(ids=ids)) - - def get_by_ids(self, ids: Sequence[str]) -> list[Document]: - """Get documents by ids.""" - return self._engine._run_as_sync(self.__vs.aget_by_ids(ids=ids)) + return cls(cls._PGVectorStore__create_key, engine, vs) # type: ignore diff --git a/tests/test_async_vectorstore.py b/tests/test_async_vectorstore.py index 12fb6506..d0e85d0b 100644 --- a/tests/test_async_vectorstore.py +++ b/tests/test_async_vectorstore.py @@ -28,7 +28,7 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()) DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()) -CUSTOM_TABLE = "test-table-custom" + str(uuid.uuid4()) +CUSTOM_TABLE = "table-custom" + str(uuid.uuid4()) VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) diff --git a/tests/test_async_vectorstore_from_methods.py b/tests/test_async_vectorstore_from_methods.py index 59274f6a..529675c2 100644 --- a/tests/test_async_vectorstore_from_methods.py +++ b/tests/test_async_vectorstore_from_methods.py @@ -29,9 +29,7 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE_WITH_INT_ID = "test_table_custom_with_int_it" + str(uuid.uuid4()).replace( - "-", "_" -) +CUSTOM_TABLE_WITH_INT_ID = "custom_int" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 diff --git a/tests/test_async_vectorstore_index.py b/tests/test_async_vectorstore_index.py index 68bc4e72..d45e114f 100644 --- a/tests/test_async_vectorstore_index.py +++ b/tests/test_async_vectorstore_index.py @@ -14,7 +14,6 @@ import os -import sys import uuid import pytest @@ -23,7 +22,10 @@ from langchain_core.embeddings import DeterministicFakeEmbedding from sqlalchemy import text -from langchain_google_cloud_sql_pg import PostgresEngine +from langchain_google_cloud_sql_pg import ( # type: ignore + HybridSearchConfig, + PostgresEngine, +) from langchain_google_cloud_sql_pg.async_vectorstore import AsyncPostgresVectorStore from langchain_google_cloud_sql_pg.indexes import ( DEFAULT_INDEX_NAME_SUFFIX, @@ -32,9 +34,11 @@ IVFFlatIndex, ) -DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -DEFAULT_INDEX_NAME = DEFAULT_TABLE + DEFAULT_INDEX_NAME_SUFFIX +UUID_STR = str(uuid.uuid4()).replace("-", "_") +DEFAULT_TABLE = "table" + UUID_STR +SIMPLE_TABLE = "simple" + UUID_STR +DEFAULT_HYBRID_TABLE = "hybrid" + UUID_STR +DEFAULT_INDEX_NAME = DEFAULT_INDEX_NAME_SUFFIX + UUID_STR VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -90,11 +94,15 @@ async def engine(self, db_project, db_region, db_instance, db_name): ) yield engine await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_TABLE}") + await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_HYBRID_TABLE}") + await aexecute(engine, f"DROP TABLE IF EXISTS {SIMPLE_TABLE}") await engine.close() @pytest_asyncio.fixture(scope="class") async def vs(self, engine): - await engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + await engine._ainit_vectorstore_table( + DEFAULT_TABLE, VECTOR_SIZE, overwrite_existing=True + ) vs = await AsyncPostgresVectorStore.create( engine, embedding_service=embeddings_service, @@ -105,9 +113,26 @@ async def vs(self, engine): await vs.adrop_vector_index() yield vs - async def test_aapply_vector_index(self, vs): + async def test_apply_default_name_vector_index(self, engine): + await engine._ainit_vectorstore_table( + SIMPLE_TABLE, VECTOR_SIZE, overwrite_existing=True + ) + vs = await AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=SIMPLE_TABLE, + ) + await vs.aadd_texts(texts, ids=ids) + await vs.adrop_vector_index() index = HNSWIndex() await vs.aapply_vector_index(index) + assert await vs.is_valid_index() + await vs.adrop_vector_index() + + async def test_aapply_vector_index(self, vs): + await vs.adrop_vector_index(DEFAULT_INDEX_NAME) + index = HNSWIndex(name=DEFAULT_INDEX_NAME) + await vs.aapply_vector_index(index) assert await vs.is_valid_index(DEFAULT_INDEX_NAME) await vs.adrop_vector_index() @@ -115,18 +140,21 @@ async def test_areindex(self, vs): if not await vs.is_valid_index(DEFAULT_INDEX_NAME): index = HNSWIndex() await vs.aapply_vector_index(index) - await vs.areindex() + await vs.areindex(DEFAULT_INDEX_NAME) await vs.areindex(DEFAULT_INDEX_NAME) assert await vs.is_valid_index(DEFAULT_INDEX_NAME) await vs.adrop_vector_index() async def test_dropindex(self, vs): - await vs.adrop_vector_index() + await vs.adrop_vector_index(DEFAULT_INDEX_NAME) result = await vs.is_valid_index(DEFAULT_INDEX_NAME) assert not result async def test_aapply_vector_index_ivfflat(self, vs): - index = IVFFlatIndex(distance_strategy=DistanceStrategy.EUCLIDEAN) + await vs.adrop_vector_index(DEFAULT_INDEX_NAME) + index = IVFFlatIndex( + name=DEFAULT_INDEX_NAME, distance_strategy=DistanceStrategy.EUCLIDEAN + ) await vs.aapply_vector_index(index, concurrently=True) assert await vs.is_valid_index(DEFAULT_INDEX_NAME) index = IVFFlatIndex( @@ -136,8 +164,55 @@ async def test_aapply_vector_index_ivfflat(self, vs): await vs.aapply_vector_index(index) assert await vs.is_valid_index("secondindex") await vs.adrop_vector_index("secondindex") - await vs.adrop_vector_index() + await vs.adrop_vector_index(DEFAULT_INDEX_NAME) async def test_is_valid_index(self, vs): is_valid = await vs.is_valid_index("invalid_index") assert is_valid == False + + async def test_aapply_hybrid_search_index_table_without_tsv_column( + self, engine, vs + ): + # overwriting vs to get a hybrid vs + tsv_index_name = "index_without_tsv_column_" + UUID_STR + vs = await AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + hybrid_search_config=HybridSearchConfig(index_name=tsv_index_name), + ) + is_valid_index = await vs.is_valid_index(tsv_index_name) + assert is_valid_index == False + await vs.aapply_hybrid_search_index() + assert await vs.is_valid_index(tsv_index_name) + await vs.adrop_vector_index(tsv_index_name) + is_valid_index = await vs.is_valid_index(tsv_index_name) + assert is_valid_index == False + + async def test_aapply_hybrid_search_index_table_with_tsv_column(self, engine): + tsv_index_name = "index_without_tsv_column_" + UUID_STR + config = HybridSearchConfig( + tsv_column="tsv_column", + tsv_lang="pg_catalog.english", + index_name=tsv_index_name, + ) + await engine._ainit_vectorstore_table( + DEFAULT_HYBRID_TABLE, + VECTOR_SIZE, + hybrid_search_config=config, + ) + vs = await AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_HYBRID_TABLE, + hybrid_search_config=config, + ) + is_valid_index = await vs.is_valid_index(tsv_index_name) + assert is_valid_index == False + await vs.aapply_hybrid_search_index() + assert await vs.is_valid_index(tsv_index_name) + await vs.areindex(tsv_index_name) + assert await vs.is_valid_index(tsv_index_name) + await vs.adrop_vector_index(tsv_index_name) + is_valid_index = await vs.is_valid_index(tsv_index_name) + assert is_valid_index == False diff --git a/tests/test_async_vectorstore_search.py b/tests/test_async_vectorstore_search.py index 418dbbad..7e4effdf 100644 --- a/tests/test_async_vectorstore_search.py +++ b/tests/test_async_vectorstore_search.py @@ -22,15 +22,22 @@ from metadata_filtering_data import FILTERING_TEST_CASES, METADATAS from sqlalchemy import text -from langchain_google_cloud_sql_pg import Column, PostgresEngine +from langchain_google_cloud_sql_pg import ( # type: ignore + Column, + HybridSearchConfig, + PostgresEngine, + reciprocal_rank_fusion, + weighted_sum_ranking, +) from langchain_google_cloud_sql_pg.async_vectorstore import AsyncPostgresVectorStore from langchain_google_cloud_sql_pg.indexes import DistanceStrategy, HNSWQueryOptions DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_FILTER_TABLE = "test_table_custom_filter" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_FILTER_TABLE = "custom_filter" + str(uuid.uuid4()).replace("-", "_") +HYBRID_SEARCH_TABLE1 = "hybrid1" + str(uuid.uuid4()).replace("-", "_") +HYBRID_SEARCH_TABLE2 = "hybrid2" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 -sync_method_exception_str = "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -45,6 +52,19 @@ ] embeddings = [embeddings_service.embed_query("foo") for i in range(len(texts))] +# Documents designed for hybrid search testing +hybrid_docs_content = { + "hs_doc_apple_fruit": "An apple is a sweet and edible fruit produced by an apple tree. Apples are very common.", + "hs_doc_apple_tech": "Apple Inc. is a multinational technology company. Their latest tech is amazing.", + "hs_doc_orange_fruit": "The orange is the fruit of various citrus species. Oranges are tasty.", + "hs_doc_generic_tech": "Technology drives innovation in the modern world. Tech is evolving.", + "hs_doc_unrelated_cat": "A fluffy cat sat on a mat quietly observing a mouse.", +} +hybrid_docs = [ + Document(page_content=content, metadata={"doc_id_key": key}) + for key, content in hybrid_docs_content.items() +] + def get_env_var(key: str, desc: str) -> str: v = os.environ.get(key) @@ -92,6 +112,8 @@ async def engine(self, db_project, db_region, db_instance, db_name): await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_TABLE}") await aexecute(engine, f"DROP TABLE IF EXISTS {CUSTOM_TABLE}") await aexecute(engine, f"DROP TABLE IF EXISTS {CUSTOM_FILTER_TABLE}") + await aexecute(engine, f"DROP TABLE IF EXISTS {HYBRID_SEARCH_TABLE1}") + await aexecute(engine, f"DROP TABLE IF EXISTS {HYBRID_SEARCH_TABLE2}") await engine.close() @pytest_asyncio.fixture(scope="class") @@ -170,11 +192,54 @@ async def vs_custom_filter(self, engine): await vs_custom_filter.aadd_documents(filter_docs, ids=ids) yield vs_custom_filter + @pytest_asyncio.fixture(scope="class") + async def vs_hybrid_search_with_tsv_column(self, engine): + hybrid_search_config = HybridSearchConfig( + tsv_column="mycontent_tsv", + tsv_lang="pg_catalog.english", + fts_query="my_fts_query", + fusion_function=reciprocal_rank_fusion, + fusion_function_parameters={ + "rrf_k": 60, + "fetch_top_k": 10, + }, + ) + await engine._ainit_vectorstore_table( + HYBRID_SEARCH_TABLE1, + VECTOR_SIZE, + id_column=Column("myid", "TEXT"), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + Column("doc_id_key", "TEXT"), + ], + metadata_json_column="mymetadata", # ignored + store_metadata=False, + hybrid_search_config=hybrid_search_config, + ) + + vs_custom = await AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE1, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_json_column="mymetadata", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=hybrid_search_config, + ) + await vs_custom.aadd_documents(hybrid_docs) + yield vs_custom + async def test_asimilarity_search(self, vs): results = await vs.asimilarity_search("foo", k=1) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs.asimilarity_search("foo", k=1, filter="content = 'bar'") + results = await vs.asimilarity_search("foo", k=1, filter={"content": "bar"}) assert results == [Document(page_content="bar", id=ids[1])] async def test_asimilarity_search_score(self, vs): @@ -242,7 +307,7 @@ async def test_amax_marginal_relevance_search(self, vs): results = await vs.amax_marginal_relevance_search("bar") assert results[0] == Document(page_content="bar", id=ids[1]) results = await vs.amax_marginal_relevance_search( - "bar", filter="content = 'boo'" + "bar", filter={"content": "boo"} ) assert results[0] == Document(page_content="boo", id=ids[3]) @@ -268,7 +333,7 @@ async def test_similarity_search(self, vs_custom): assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] results = await vs_custom.asimilarity_search( - "foo", k=1, filter="mycontent = 'bar'" + "foo", k=1, filter={"mycontent": "bar"} ) assert results == [Document(page_content="bar", id=ids[1])] @@ -291,7 +356,7 @@ async def test_max_marginal_relevance_search(self, vs_custom): results = await vs_custom.amax_marginal_relevance_search("bar") assert results[0] == Document(page_content="bar", id=ids[1]) results = await vs_custom.amax_marginal_relevance_search( - "bar", filter="mycontent = 'boo'" + "bar", filter={"mycontent": "boo"} ) assert results[0] == Document(page_content="boo", id=ids[3]) @@ -326,7 +391,7 @@ async def test_aget_by_ids_custom_vs(self, vs_custom): def test_get_by_ids(self, vs): test_ids = [ids[0]] - with pytest.raises(Exception, match=sync_method_exception_str): + with pytest.raises(Exception): vs.get_by_ids(ids=test_ids) @pytest.mark.parametrize("test_filter, expected_ids", FILTERING_TEST_CASES) @@ -341,3 +406,319 @@ async def test_vectorstore_with_metadata_filters( "meow", k=5, filter=test_filter ) assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter + + async def test_asimilarity_hybrid_search_rrk(self, vs): + results = await vs.asimilarity_search( + "foo", + k=1, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion + ), + ) + assert len(results) == 1 + assert results == [Document(page_content="foo", id=ids[0])] + + results = await vs.asimilarity_search( + "bar", + k=1, + filter={"content": {"$ne": "baz"}}, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion, + fusion_function_parameters={ + "rrf_k": 100, + "fetch_top_k": 10, + }, + primary_top_k=1, + secondary_top_k=1, + ), + ) + assert results == [Document(page_content="bar", id=ids[1])] + + async def test_hybrid_search_weighted_sum_default( + self, vs_hybrid_search_with_tsv_column + ): + """Test hybrid search with default weighted sum (0.5 vector, 0.5 FTS).""" + query = "apple" # Should match "apple" in FTS and vector + + # The vs_hybrid_search_with_tsv_column instance is already configured for hybrid search. + # Default fusion is weighted_sum_ranking with 0.5/0.5 weights. + # fts_query will default to the main query. + results_with_scores = ( + await vs_hybrid_search_with_tsv_column.asimilarity_search_with_score( + query, k=3 + ) + ) + + assert len(results_with_scores) > 1 + result_ids = [doc.metadata["doc_id_key"] for doc, score in results_with_scores] + + # Expect "hs_doc_apple_fruit" and "hs_doc_apple_tech" to be highly ranked. + assert "hs_doc_apple_fruit" in result_ids + + # Scores should be floats (fused scores) + for doc, score in results_with_scores: + assert isinstance(score, float) + + # Check if sorted by score (descending for weighted_sum_ranking with positive scores) + assert results_with_scores[0][1] >= results_with_scores[1][1] + + async def test_hybrid_search_weighted_sum_vector_bias( + self, vs_hybrid_search_with_tsv_column + ): + """Test weighted sum with higher weight for vector results.""" + query = "Apple Inc technology" # More specific for vector similarity + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", # Must match table setup + fusion_function_parameters={ + "primary_results_weight": 0.8, # Vector bias + "secondary_results_weight": 0.2, + }, + # fts_query will default to main query + ) + results = await vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(result_ids) > 0 + assert result_ids[0] == "hs_doc_orange_fruit" + + async def test_hybrid_search_weighted_sum_fts_bias( + self, vs_hybrid_search_with_tsv_column + ): + """Test weighted sum with higher weight for FTS results.""" + query = "fruit common tasty" # Strong FTS signal for fruit docs + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fusion_function=weighted_sum_ranking, + fusion_function_parameters={ + "primary_results_weight": 0.01, + "secondary_results_weight": 0.99, # FTS bias + }, + ) + results = await vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(result_ids) == 2 + assert "hs_doc_apple_fruit" in result_ids + + async def test_hybrid_search_reciprocal_rank_fusion( + self, vs_hybrid_search_with_tsv_column + ): + """Test hybrid search with Reciprocal Rank Fusion.""" + query = "technology company" + + # Configure RRF. primary_top_k and secondary_top_k control inputs to fusion. + # fusion_function_parameters.fetch_top_k controls output count from RRF. + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fusion_function=reciprocal_rank_fusion, + primary_top_k=3, # How many dense results to consider + secondary_top_k=3, # How many sparse results to consider + fusion_function_parameters={ + "rrf_k": 60, + "fetch_top_k": 2, + }, # RRF specific params + ) + # The `k` in asimilarity_search here is the final desired number of results, + # which should align with fusion_function_parameters.fetch_top_k for RRF. + results = await vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(result_ids) == 2 + # "hs_doc_apple_tech" (FTS: technology, company; Vector: Apple Inc technology) + # "hs_doc_generic_tech" (FTS: technology; Vector: Technology drives innovation) + # RRF should combine these ranks. "hs_doc_apple_tech" is likely higher. + assert "hs_doc_apple_tech" in result_ids + assert result_ids[0] == "hs_doc_apple_tech" # Stronger combined signal + + async def test_hybrid_search_explicit_fts_query( + self, vs_hybrid_search_with_tsv_column + ): + """Test hybrid search when fts_query in HybridSearchConfig is different from main query.""" + main_vector_query = "Apple Inc." # For vector search + fts_specific_query = "fruit" # For FTS + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=fts_specific_query, # Override FTS query + fusion_function_parameters={ # Using default weighted_sum_ranking + "primary_results_weight": 0.5, + "secondary_results_weight": 0.5, + }, + ) + results = await vs_hybrid_search_with_tsv_column.asimilarity_search( + main_vector_query, k=2, hybrid_search_config=config + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + # Vector search for "Apple Inc.": hs_doc_apple_tech + # FTS search for "fruit": hs_doc_apple_fruit, hs_doc_orange_fruit + # Combined: hs_doc_apple_fruit (strong FTS) and hs_doc_apple_tech (strong vector) are candidates. + # "hs_doc_apple_fruit" might get a boost if "Apple Inc." vector has some similarity to "apple fruit" doc. + assert len(result_ids) > 0 + assert ( + "hs_doc_apple_fruit" in result_ids + or "hs_doc_apple_tech" in result_ids + or "hs_doc_orange_fruit" in result_ids + ) + + async def test_hybrid_search_with_filter(self, vs_hybrid_search_with_tsv_column): + """Test hybrid search with a metadata filter applied.""" + query = "apple" + # Filter to only include "tech" related apple docs using metadata + # Assuming metadata_columns=["doc_id_key"] was set up for vs_hybrid_search_with_tsv_column + doc_filter = {"doc_id_key": {"$eq": "hs_doc_apple_tech"}} + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + ) + results = await vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, filter=doc_filter, hybrid_search_config=config + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(results) == 1 + assert result_ids[0] == "hs_doc_apple_tech" + + async def test_hybrid_search_fts_empty_results( + self, vs_hybrid_search_with_tsv_column + ): + """Test when FTS query yields no results, should fall back to vector search.""" + vector_query = "apple" + no_match_fts_query = "zzyyxx_gibberish_term_for_fts_nomatch" + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=no_match_fts_query, + fusion_function_parameters={ + "primary_results_weight": 0.6, + "secondary_results_weight": 0.4, + }, + ) + results = await vs_hybrid_search_with_tsv_column.asimilarity_search( + vector_query, k=2, hybrid_search_config=config + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + # Expect results based purely on vector search for "apple" + assert len(result_ids) > 0 + assert "hs_doc_apple_fruit" in result_ids or "hs_doc_apple_tech" in result_ids + # The top result should be one of the apple documents based on vector search + assert results[0].metadata["doc_id_key"].startswith("hs_doc_unrelated_cat") + + async def test_hybrid_search_vector_empty_results_effectively( + self, vs_hybrid_search_with_tsv_column + ): + """Test when vector query is very dissimilar to docs, should rely on FTS.""" + # This is hard to guarantee with fake embeddings, but we try. + # A better way might be to use a filter that excludes all docs for the vector part, + # but filters are applied to both. + vector_query_far_off = "supercalifragilisticexpialidocious_vector_nomatch" + fts_query_match = "orange fruit" # Should match hs_doc_orange_fruit + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=fts_query_match, + fusion_function_parameters={ + "primary_results_weight": 0.4, + "secondary_results_weight": 0.6, + }, + ) + results = await vs_hybrid_search_with_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + # Expect results based purely on FTS search for "orange fruit" + assert len(result_ids) == 1 + assert result_ids[0] == "hs_doc_generic_tech" + + async def test_hybrid_search_without_tsv_column(self, engine): + """Test hybrid search without a TSV column.""" + # This is hard to guarantee with fake embeddings, but we try. + # A better way might be to use a filter that excludes all docs for the vector part, + # but filters are applied to both. + vector_query_far_off = "apple iphone tech is better designed than macs" + fts_query_match = "apple fruit" + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=fts_query_match, + fusion_function_parameters={ + "primary_results_weight": 0.1, + "secondary_results_weight": 0.9, + }, + ) + await engine._ainit_vectorstore_table( + HYBRID_SEARCH_TABLE2, + VECTOR_SIZE, + id_column=Column("myid", "TEXT"), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + Column("doc_id_key", "TEXT"), + ], + store_metadata=False, + hybrid_search_config=config, + ) + + vs_with_tsv_column = await AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE2, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=config, + ) + await vs_with_tsv_column.aadd_documents(hybrid_docs) + + config = HybridSearchConfig( + tsv_column="", # no TSV column + fts_query=fts_query_match, + fusion_function_parameters={ + "primary_results_weight": 0.9, + "secondary_results_weight": 0.1, + }, + ) + vs_without_tsv_column = await AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE2, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=config, + ) + + results_with_tsv_column = await vs_with_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ) + results_without_tsv_column = await vs_without_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ) + result_ids_with_tsv_column = [ + doc.metadata["doc_id_key"] for doc in results_with_tsv_column + ] + result_ids_without_tsv_column = [ + doc.metadata["doc_id_key"] for doc in results_without_tsv_column + ] + + # Expect results based purely on FTS search for "orange fruit" + assert len(result_ids_with_tsv_column) == 1 + assert len(result_ids_without_tsv_column) == 1 + assert result_ids_with_tsv_column[0] == "hs_doc_apple_tech" + assert result_ids_without_tsv_column[0] == "hs_doc_apple_tech" diff --git a/tests/test_engine.py b/tests/test_engine.py index 7883cf4b..4a34c575 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -27,16 +27,18 @@ from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.pool import NullPool -from langchain_google_cloud_sql_pg import Column, PostgresEngine +from langchain_google_cloud_sql_pg import Column, HybridSearchConfig, PostgresEngine DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") INT_ID_CUSTOM_TABLE = "test_table_custom_int_id" + str(uuid.uuid4()).replace("-", "_") +HYBRID_SEARCH_TABLE = "hybrid" + str(uuid.uuid4()).replace("-", "_") DEFAULT_TABLE_SYNC = "test_table" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE_SYNC = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") INT_ID_CUSTOM_TABLE_SYNC = "test_table_custom_int_id" + str(uuid.uuid4()).replace( "-", "_" ) +HYBRID_SEARCH_TABLE_SYNC = "hybrid_sync" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -120,6 +122,7 @@ async def engine(self, db_project, db_region, db_instance, db_name): await aexecute(engine, f'DROP TABLE "{CUSTOM_TABLE}"') await aexecute(engine, f'DROP TABLE "{DEFAULT_TABLE}"') await aexecute(engine, f'DROP TABLE "{INT_ID_CUSTOM_TABLE}"') + await aexecute(engine, f'DROP TABLE "{HYBRID_SEARCH_TABLE}"') await engine.close() async def test_engine_args(self, engine): @@ -236,6 +239,28 @@ async def getconn() -> asyncpg.Connection: await aexecute(engine, "SELECT 1") await engine.close() + async def test_from_connection_string( + self, + db_name, + user, + password, + ): + port = "5432" + url = f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{db_name}" + engine = PostgresEngine.from_connection_string( + url, + echo=True, + poolclass=NullPool, + ) + await aexecute(engine, "SELECT 1") + await engine.close() + + engine = PostgresEngine.from_connection_string( + URL.create("postgresql+asyncpg", user, password, host, port, db_name) + ) + await aexecute(engine, "SELECT 1") + await engine.close() + async def test_from_engine_args_url( self, db_name, @@ -338,6 +363,31 @@ async def test_ainit_checkpoint_writes_table(self, engine): await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name_writes}"') + async def test_init_table_hybrid_search(self, engine): + await engine.ainit_vectorstore_table( + HYBRID_SEARCH_TABLE, + VECTOR_SIZE, + id_column="uuid", + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + hybrid_search_config=HybridSearchConfig(), + ) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{HYBRID_SEARCH_TABLE}';" + results = await afetch(engine, stmt) + expected = [ + {"column_name": "uuid", "data_type": "uuid"}, + {"column_name": "my_embedding", "data_type": "USER-DEFINED"}, + {"column_name": "langchain_metadata", "data_type": "json"}, + {"column_name": "my-content", "data_type": "text"}, + {"column_name": "my-content_tsv", "data_type": "tsvector"}, + {"column_name": "page", "data_type": "text"}, + {"column_name": "source", "data_type": "text"}, + ] + for row in results: + assert row in expected + @pytest.mark.asyncio(scope="module") class TestEngineSync: @@ -381,6 +431,7 @@ async def engine(self, db_project, db_region, db_instance, db_name): await aexecute(engine, f'DROP TABLE "{CUSTOM_TABLE_SYNC}"') await aexecute(engine, f'DROP TABLE "{DEFAULT_TABLE_SYNC}"') await aexecute(engine, f'DROP TABLE "{INT_ID_CUSTOM_TABLE_SYNC}"') + await aexecute(engine, f'DROP TABLE "{HYBRID_SEARCH_TABLE_SYNC}"') await engine.close() async def test_init_table(self, engine): @@ -525,3 +576,28 @@ async def test_init_checkpoints_table(self, engine): assert row in expected await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name_writes}"') + + async def test_init_table_hybrid_search(self, engine): + engine.init_vectorstore_table( + HYBRID_SEARCH_TABLE_SYNC, + VECTOR_SIZE, + id_column="uuid", + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + hybrid_search_config=HybridSearchConfig(), + ) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{HYBRID_SEARCH_TABLE_SYNC}';" + results = await afetch(engine, stmt) + expected = [ + {"column_name": "uuid", "data_type": "uuid"}, + {"column_name": "my_embedding", "data_type": "USER-DEFINED"}, + {"column_name": "langchain_metadata", "data_type": "json"}, + {"column_name": "my-content", "data_type": "text"}, + {"column_name": "my-content_tsv", "data_type": "tsvector"}, + {"column_name": "page", "data_type": "text"}, + {"column_name": "source", "data_type": "text"}, + ] + for row in results: + assert row in expected diff --git a/tests/test_standard_test_suite.py b/tests/test_standard_test_suite.py index 19c77128..2a853bdd 100644 --- a/tests/test_standard_test_suite.py +++ b/tests/test_standard_test_suite.py @@ -23,8 +23,8 @@ from langchain_google_cloud_sql_pg import Column, PostgresEngine, PostgresVectorStore -DEFAULT_TABLE = "test_table_standard_test_suite" + str(uuid.uuid4()) -DEFAULT_TABLE_SYNC = "test_table_sync_standard_test_suite" + str(uuid.uuid4()) +DEFAULT_TABLE = "test_table" + str(uuid.uuid4()) +DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()) def get_env_var(key: str, desc: str) -> str: diff --git a/tests/test_vectorstore.py b/tests/test_vectorstore.py index 7995cd63..4e82cab6 100644 --- a/tests/test_vectorstore.py +++ b/tests/test_vectorstore.py @@ -31,7 +31,7 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()) DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()) -CUSTOM_TABLE = "test-table-custom" + str(uuid.uuid4()) +CUSTOM_TABLE = "custom" + str(uuid.uuid4()) VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) diff --git a/tests/test_vectorstore_from_methods.py b/tests/test_vectorstore_from_methods.py index fadf8fc1..5d054dfb 100644 --- a/tests/test_vectorstore_from_methods.py +++ b/tests/test_vectorstore_from_methods.py @@ -29,10 +29,8 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE_WITH_INT_ID = "test_table_with_int_id" + str(uuid.uuid4()).replace( - "-", "_" -) -CUSTOM_TABLE_WITH_INT_ID_SYNC = "test_table_with_int_id" + str(uuid.uuid4()).replace( +CUSTOM_TABLE_WITH_INT_ID = "test_table_int_id" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE_WITH_INT_ID_SYNC = "test_table_int_id" + str(uuid.uuid4()).replace( "-", "_" ) VECTOR_SIZE = 768 diff --git a/tests/test_vectorstore_index.py b/tests/test_vectorstore_index.py index cb797219..72a99b00 100644 --- a/tests/test_vectorstore_index.py +++ b/tests/test_vectorstore_index.py @@ -31,8 +31,8 @@ IVFFlatIndex, ) -DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") +DEFAULT_TABLE = "table" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE = "custom" + str(uuid.uuid4()).replace("-", "_") DEFAULT_INDEX_NAME = DEFAULT_TABLE + DEFAULT_INDEX_NAME_SUFFIX VECTOR_SIZE = 768 @@ -120,7 +120,7 @@ async def test_areindex(self, vs): if not vs.is_valid_index(DEFAULT_INDEX_NAME): index = HNSWIndex() vs.apply_vector_index(index) - vs.reindex() + vs.reindex(DEFAULT_INDEX_NAME) vs.reindex(DEFAULT_INDEX_NAME) assert vs.is_valid_index(DEFAULT_INDEX_NAME) vs.drop_vector_index(DEFAULT_INDEX_NAME) @@ -201,7 +201,7 @@ async def test_areindex(self, vs): if not await vs.ais_valid_index(DEFAULT_INDEX_NAME): index = HNSWIndex() await vs.aapply_vector_index(index) - await vs.areindex() + await vs.areindex(DEFAULT_INDEX_NAME) await vs.areindex(DEFAULT_INDEX_NAME) assert await vs.ais_valid_index(DEFAULT_INDEX_NAME) await vs.adrop_vector_index(DEFAULT_INDEX_NAME) diff --git a/tests/test_vectorstore_search.py b/tests/test_vectorstore_search.py index ae1341ed..963bc41b 100644 --- a/tests/test_vectorstore_search.py +++ b/tests/test_vectorstore_search.py @@ -22,16 +22,21 @@ from metadata_filtering_data import FILTERING_TEST_CASES, METADATAS, NEGATIVE_TEST_CASES from sqlalchemy import text -from langchain_google_cloud_sql_pg import Column, PostgresEngine, PostgresVectorStore +from langchain_google_cloud_sql_pg import ( # type: ignore + Column, + HybridSearchConfig, + PostgresEngine, + PostgresVectorStore, + reciprocal_rank_fusion, + weighted_sum_ranking, +) from langchain_google_cloud_sql_pg.indexes import DistanceStrategy, HNSWQueryOptions -DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_FILTER_TABLE = "test_table_custom_filter" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_FILTER_TABLE_SYNC = "test_table_custom_filter_sync" + str(uuid.uuid4()).replace( - "-", "_" -) +DEFAULT_TABLE = "default" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE = "custom" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE_SYNC = "custom_sync" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_FILTER_TABLE = "custom_filter" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_FILTER_TABLE_SYNC = "custom_filter_sync" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -191,7 +196,7 @@ async def test_asimilarity_search(self, vs): results = await vs.asimilarity_search("foo", k=1) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs.asimilarity_search("foo", k=1, filter="content = 'bar'") + results = await vs.asimilarity_search("foo", k=1, filter={"content": "bar"}) assert results == [Document(page_content="bar", id=ids[1])] async def test_asimilarity_search_score(self, vs): @@ -252,7 +257,7 @@ async def test_amax_marginal_relevance_search(self, vs): results = await vs.amax_marginal_relevance_search("bar") assert results[0] == Document(page_content="bar", id=ids[1]) results = await vs.amax_marginal_relevance_search( - "bar", filter="content = 'boo'" + "bar", filter={"content": "boo"} ) assert results[0] == Document(page_content="boo", id=ids[3]) @@ -298,6 +303,37 @@ async def test_vectorstore_with_metadata_filters( ) assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter + async def test_asimilarity_hybrid_search(self, vs): + results = await vs.asimilarity_search( + "foo", k=1, hybrid_search_config=HybridSearchConfig() + ) + assert len(results) == 1 + assert results == [Document(page_content="foo", id=ids[0])] + + results = await vs.asimilarity_search( + "bar", + k=1, + hybrid_search_config=HybridSearchConfig(), + ) + assert results[0] == Document(page_content="bar", id=ids[1]) + + results = await vs.asimilarity_search( + "foo", + k=1, + filter={"content": {"$ne": "baz"}}, + hybrid_search_config=HybridSearchConfig( + fusion_function=weighted_sum_ranking, + fusion_function_parameters={ + "primary_results_weight": 0.1, + "secondary_results_weight": 0.9, + "fetch_top_k": 10, + }, + primary_top_k=1, + secondary_top_k=1, + ), + ) + assert results == [Document(page_content="foo", id=ids[0])] + class TestVectorStoreSearchSync: @pytest.fixture(scope="module") @@ -398,7 +434,7 @@ def test_similarity_search(self, vs_custom): results = vs_custom.similarity_search("foo", k=1) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = vs_custom.similarity_search("foo", k=1, filter="mycontent = 'bar'") + results = vs_custom.similarity_search("foo", k=1, filter={"mycontent": "bar"}) assert results == [Document(page_content="bar", id=ids[1])] def test_similarity_search_score(self, vs_custom): @@ -420,7 +456,7 @@ def test_max_marginal_relevance_search(self, vs_custom): results = vs_custom.max_marginal_relevance_search("bar") assert results[0] == Document(page_content="bar", id=ids[1]) results = vs_custom.max_marginal_relevance_search( - "bar", filter="mycontent = 'boo'" + "bar", filter={"mycontent": "boo"} ) assert results[0] == Document(page_content="boo", id=ids[3]) @@ -465,3 +501,27 @@ def test_metadata_filter_negative_tests(self, vs_custom_filter_sync, test_filter docs = vs_custom_filter_sync.similarity_search( "meow", k=5, filter=test_filter ) + + def test_similarity_hybrid_search(self, vs_custom): + results = vs_custom.similarity_search( + "foo", k=1, hybrid_search_config=HybridSearchConfig() + ) + assert len(results) == 1 + assert results == [Document(page_content="foo", id=ids[0])] + + results = vs_custom.similarity_search( + "bar", + k=1, + hybrid_search_config=HybridSearchConfig(), + ) + assert results == [Document(page_content="bar", id=ids[1])] + + results = vs_custom.similarity_search( + "foo", + k=1, + filter={"mycontent": {"$ne": "baz"}}, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion + ), + ) + assert results == [Document(page_content="foo", id=ids[0])] From 303d89f3a83fa74709ea8ab08f31412483b8549c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 13 Aug 2025 20:46:28 +0200 Subject: [PATCH 08/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 2293bd7 (#321) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 5fb9d0ae..9c8b8f06 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@4ccc09a2194a752a8e112f2c88ee1a6efd7a8512 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@2293bd76f294c3e62f08560311621f58a1628cdd with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From f8808cfeb9e8d9b18a79a6db5b60d181b224f905 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 18 Aug 2025 12:04:23 +0200 Subject: [PATCH 09/61] chore(deps): update python-nonmajor (#312) * chore(deps): update python-nonmajor * Update pyproject.toml * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt * Update requirements.txt --------- Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- pyproject.toml | 4 ++-- requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8e691bda..38db0f27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,12 +44,12 @@ langgraph = [ test = [ "black[jupyter]==25.1.0", "isort==6.0.1", - "mypy==1.15.0", + "mypy==1.17.1", "pytest-asyncio==0.26.0", "pytest==8.4.1", "pytest-cov==6.2.1", "langchain-tests==0.3.20", - "langgraph==0.6.0" + "langgraph==0.6.5" ] [build-system] diff --git a/requirements.txt b/requirements.txt index ac28b2c2..55636030 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -cloud-sql-python-connector[asyncpg]==1.18.2 -numpy==2.3.1; python_version >= "3.11" +cloud-sql-python-connector[asyncpg]==1.18.4 +numpy==2.3.2; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" -langgraph==0.6.0 +langgraph==0.6.5 langchain-postgres==0.0.15 From ea4566d5d04398e7cb673132e5bfc26dde2c8de5 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 18 Aug 2025 12:07:19 +0200 Subject: [PATCH 10/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 3423e9a (#326) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 9c8b8f06..99bae1f1 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@2293bd76f294c3e62f08560311621f58a1628cdd + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@3423e9aea159d63dee57d11d2667358ae233c081 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 14098ca7a6cf7116e6edbcb7a5c6c3ccbce76b4a Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Wed, 20 Aug 2025 07:28:57 +0000 Subject: [PATCH 11/61] docs: Add Hybrid Search documentation (#329) * docs: Add Hybrid Search documentation * minor changes * minor fixes --- README.rst | 20 +++ docs/vector_store.ipynb | 351 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 371 insertions(+) diff --git a/README.rst b/README.rst index 6833433b..9839b661 100644 --- a/README.rst +++ b/README.rst @@ -111,6 +111,26 @@ Use a Vector Store to store embedded data and perform vector search. embeddings=embedding_service ) +Hybrid search +~~~~~~~~~~~~~ + +The `PostgresVectorStore` supports hybrid search (dense vectors + full text) for more comprehensive and relevant search results. + +.. code-block:: python + + from langchain_google_cloud_sql_pg import HybridSearchConfig, reciprocal_rank_fusion + + vs = PostgresVectorStore.create_sync( + engine=engine, + table_name=TABLE_NAME, + embedding_service=embedding, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion + ), + ) + hybrid_docs = vector_store.similarity_search("products", k=5) + + See the full `Vector Store`_ tutorial. .. _`Vector Store`: https://github.com/googleapis/langchain-google-cloud-sql-pg-python/tree/main/docs/vector_store.ipynb diff --git a/docs/vector_store.ipynb b/docs/vector_store.ipynb index 74bc3f54..ddc5ce30 100644 --- a/docs/vector_store.ipynb +++ b/docs/vector_store.ipynb @@ -662,6 +662,357 @@ "docs = await loader.aload()\n", "print(docs)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Vector Store using existing table\n", + "\n", + "A Vector Store can be built up on an existing table.\n", + "\n", + "Assuming there's a pre-existing table in Cloud SQL Pg: `products`, which stores product details for an eComm venture.\n", + "\n", + "
\n", + " Click for Table Schema Details\n", + " \n", + " ### SQL query for table creation\n", + " ```\n", + " CREATE TABLE products (\n", + " product_id SERIAL PRIMARY KEY,\n", + " name VARCHAR(255) NOT NULL,\n", + " description TEXT,\n", + " price_usd DECIMAL(10, 2) NOT NULL,\n", + " category VARCHAR(255),\n", + " quantity INT DEFAULT 0,\n", + " sku VARCHAR(255) UNIQUE NOT NULL,\n", + " image_url VARCHAR(255),\n", + " metadata JSON,\n", + " embed vector(768) DEFAULT NULL --> vector dimensions depends on the embedding model\n", + " );\n", + " ```\n", + " ### Insertion of records\n", + " ```\n", + "INSERT INTO\n", + " products (name,\n", + " description,\n", + " price_usd,\n", + " category,\n", + " quantity,\n", + " sku,\n", + " image_url,\n", + " METADATA,\n", + " embed)\n", + "VALUES\n", + " ('Laptop', 'High-performance gaming laptop', 1200.00, 'Electronics', 10, 'SKU12345', 'https://example.com/laptop.jpg', '{\"category\" : \"Electronics\", \"name\" : \"Laptop\", \"description\" : \"High-performance gaming laptop\"}', ARRAY[0.028855365,-0.012488421,0.006031946,0.0041402685,0.058347773,0.034766156,0.0033533745,0.02021188,0.022670388,0.049201276,0.029006215,-0.00986186,-0.052214462,-0.012280585,0.023684537,-0.059519604,0.001378169,-0.04670758,0.020753963,0.0013795564,0.013659675,0.013842887,-0.011299884,-0.03746782,-0.024693582,-0.07013125,0.030126512,-0.028513059,-0.045777187,0.020505989,-0.05952914,0.0015648323,-0.050879195,0.006477519,-0.007886009,-0.02629686,-0.0161126,0.0314275,-0.0328995,0.0265609,-0.01530363,-0.019561788,-0.04535006,0.030131247,0.05462397,-0.0122205755,0.009777537,-0.0049046725,0.02023674,-0.064513534,0.041379478,0.006994005,0.045187026,-0.029661352,0.019398877,-0.02221874,-0.017291287,-0.016321573,-0.033429787,-0.009547383,0.031690586,0.009064364,-0.015285908,0.076494075,0.010917006,-0.016593782,-0.018348552,0.017040739,0.05943369,-0.020822933,0.009285482,0.027736548,0.07029796,-0.0644397,-0.037717465,-0.047550958,-0.0054535423,0.047678974,0.060069297,-0.015072207,-0.04320405,-0.0019738402,-0.061910342,-0.034316592,-0.023359261,0.057676528,0.0054635284,0.042063717,0.020484874,0.005591504,-0.008757174,-0.0153757995,0.04932489,-0.04626516,0.0004756786,0.03749645,0.018522505,-0.015642159,-0.00842546,-0.06284679,-0.006150201,-0.061204597,0.0008340049,0.0040505463,0.014210282,-0.009027461,-0.014203488,0.030791085,-0.022282222,0.0011378798,-0.047313087,-0.008226634,-0.03726029,-0.04307269,0.04519085,-0.021895533,0.019570287,0.08584432,-0.003815025,0.021276724,0.027253378,-0.01660856,0.056772888,0.053538952,0.02739156,0.04655151,0.021516826,0.064367436,-0.021094408,0.0149244,-0.009901731,-0.04166729,-0.0032499651,0.022982895,0.063407354,0.04826923,0.056767307,-0.024418632,0.063300684,0.08071309,-0.054988176,0.01652395,-0.014671885,0.000837919,-0.044569198,0.03651631,-0.016364796,0.0053244857,0.051150765,-0.01878448,0.005112729,-0.0011729974,-0.052268386,0.034706745,0.05072015,0.0052968785,0.021704907,0.045661792,0.002976117,-0.02205154,0.037168674,0.002627892,0.018275578,0.032312263,-0.06719407,-0.056915596,-0.019727554,0.0009450171,0.0029568567,0.047435578,0.033826437,-0.009351167,-0.05718618,-0.062166944,-0.005684254,-0.009788955,0.016364967,0.0122847315,-0.016126394,0.012999976,-0.075272575,0.017478324,0.03005914,0.024401167,0.0099941185,-0.043311242,0.032115143,0.0047207233,-0.034337096,0.0054743756,-0.0024234303,0.012045114,0.032277416,-0.019994166,0.012312445,0.021211047,-0.037350595,0.0017910452,0.04450775,0.0054527316,0.03591427,0.029365221,0.0009824947,-0.006488191,0.034008037,0.01649739,0.07955305,-0.035204325,0.0056851353,-0.0086927805,-0.032573096,0.0010878195,-0.061459325,0.027879931,0.015068312,0.032717325,0.03890655,0.01902891,0.016527452,-0.0020142202,0.025338948,-0.0016015576,-0.06429177,-0.0041105347,-0.025726322,0.09078289,-0.03174613,0.015951345,0.009411334,-0.03598392,0.034463316,0.010011217,-0.009883364,-0.008042991,0.040896636,-0.025115138,0.048056312,0.028382989,0.007793395,0.019581616,-0.02584373,0.04317992,0.025689745,0.02035658,-0.05990108,-0.0007803719,-0.06793038,-0.02130707,0.0048890263,0.042799927,-0.009928141,-0.003192067,0.008781545,0.024785394,-0.07565836,-0.043356933,-0.067785084,-0.019649943,-0.024896448,-0.008327102,-0.015189734,-0.0140810255,0.0049958434,-0.015353841,0.020730853,0.028829988,-0.022614283,-0.03751693,0.011577282,0.031927988,-0.024855413,-0.042680055,0.08018929,-0.0021632465,-0.017928878,-0.0030442774,-0.005651566,-0.0010570051,-0.040446285,-0.00189408,0.06388222,0.0024985478,0.004886204,-0.05113467,-0.019480383,0.049765434,0.0077566532,-0.07356923,0.011988718,-0.020965552,-0.04025921,-0.032686763,-0.0053743063,-0.015599607,-0.03576176,0.00907552,-0.044702522,0.038329247,0.046024352,0.02194124,0.01844749,0.004619246,-0.029577129,-0.031205669,0.00896738,0.0115034515,0.013058729,0.01372364,0.03063813,-0.0316296,-0.04826321,-0.049244087,-0.037644744,0.019473651,0.059536345,0.04033204,-0.06602803,0.050612085,-0.027031716,0.04213856,-0.015262794,0.07257449,0.044631373,-0.0151061565,0.012033797,0.0009732858,-0.014827035,0.046652585,-0.042083394,-0.0436095,-0.035586536,0.026696088,-0.004066648,0.06954644,-0.029623765,-0.020358749,-0.04957031,0.01740737,-0.017026579,0.011162373,0.0487351,-0.031720005,-0.050231773,-0.089686565,-0.014156863,-0.02636994,0.015916161,-0.025308851,0.02081637,-0.02257452,0.021604244,0.10139386,-0.03208752,-0.008580313,-0.008898747,-0.06853021,0.04102758,0.041922912,0.047566738,-0.0341902,-0.07725792,0.005653997,0.00021225312,-0.0104829185,0.001749244,0.011929626,0.078264005,0.036519475,-0.0073295147,0.021337496,-0.008336836,-0.035804152,0.010720447,0.007127837,-0.053885818,-0.009795316,-0.05424524,-0.003111704,0.019710006,-0.012413589,0.02320744,0.024137065,0.023079542,0.0030920266,0.013961592,0.0040291087,0.020265838,0.041183334,-0.0029272675,-0.018539282,-0.011489972,0.017938145,0.025854694,0.033188265,-0.042004097,-0.0106819095,-0.045249976,-0.06986475,0.030204961,-0.032193515,-0.00095170306,-0.0107111735,-0.017970158,-0.02740307,-0.06307846,-0.031544626,0.004178074,0.016592229,-0.032037992,-0.030618787,0.008946463,0.03110429,0.0207187,-0.016861247,-0.08070464,-0.03067543,0.067448415,-0.041909873,-0.0048193526,0.018761802,0.020243261,0.024184326,0.002299031,-0.014152546,-0.035749547,-0.0071563246,0.050069712,-0.027215304,0.049641047,0.02778935,0.070745096,0.023794815,0.0029510225,0.0069351746,-0.034430653,-0.085317925,-0.036851004,0.023848707,0.035138704,-0.017030267,0.041982725,0.014077844,0.012787886,-0.029716792,-0.024732213,-0.059604853,0.024058796,-0.027469097,0.02969232,-0.06889772,-0.034953564,-0.0678685,0.02039748,-0.073483475,-0.04067064,-0.023628144,0.052601792,0.10005532,0.0027910264,-0.00044562414,0.025615653,0.008896907,-0.016369712,-0.030180404,0.026393086,-0.02041892,0.0072918,-0.018448602,0.020845268,0.006290655,-0.010850651,-0.035378493,-0.01083432,0.012116494,-0.045438327,0.05191333,-0.082797736,0.042320468,0.039703712,0.00923727,0.03598509,-0.064069025,0.049349498,0.007205401,-0.0079013845,0.015407162,-0.049755134,-0.0335355,-0.033252683,0.025886077,-0.043650113,-0.021745201,-0.046847582,-0.02873071,-0.01435186,0.01642749,-0.030346846,0.00564007,0.0074587157,0.027222605,-0.024691164,0.007528186,-0.04551536,-0.011026097,0.091698915,-0.062147886,0.0013525741,-0.0065618614,-0.030818032,0.024246406,-0.010786434,0.006758053,-0.016815495,0.071824,0.022536254,-0.026362726,-0.066206455,0.011966612,0.06430261,0.021586932,0.032340884,-0.015460002,-0.0963993,-0.0041012894,0.026189657,-0.101343565,0.038662393,0.07043264,-0.0373032,0.0038455573,-0.017408002,0.12948644,-0.056175977,0.02693295,-0.033682294,-0.032874268,0.0016187532,0.023056049,0.06884863,0.04350595,0.02135146,-0.059129357,-0.0055416543,0.0098204445,-0.008596177,-0.04332969,-0.012624592,-0.09298762,0.041691724,-0.014171953,0.004045705,0.009756654,0.059401184,-0.02852561,0.006892971,-0.019445946,-0.013781522,-0.03458903,-0.001079532,-0.008455719,-0.025446072,-0.03641567,-0.034449898,0.004487285,0.07899037,0.031314176,-0.031828023,0.031026838,0.034468375,0.0166286,0.032397788,0.02265452,0.07575427,0.015329588,0.05969185,-0.049144097,-0.043501142,0.031721197,-0.03434621,0.04558533,-0.00039121095,0.00093291467,0.033810064,0.0131731015,-0.0161992,0.039637238,0.0018543458,-0.041811496,-0.01406263,-0.020126836,-0.011859638,0.029031854,0.018889664,0.015262868,-0.03756649,-0.024570176,0.02538295,0.0038968727,-0.06393701,0.00093783275,-0.05943941,-0.062095385,0.08169533,-0.026443593,0.045758378,-0.026765708,-0.023990292,-0.028646782,0.0013627055,0.0022589415,0.009424216,-0.004252787,0.01159273,-0.0393901,-0.02593045,-0.04785985,0.023880653,0.012857186,-0.028907716,-0.05117687,-0.017512657,-0.035777926,0.01183514,0.025101895,0.089760125,-0.009716518,0.012040118,-0.023447596,0.057904292,0.03486462,-0.014875794,0.05191007,0.002385196,0.016686346,-0.052348964,-0.029286617,0.023832947,-0.02915365,0.007727999,-0.012708917,-0.055755604,-0.0073897606,0.032306697,0.02891973,-0.029123511,0.08987496,0.049180396,-0.08122004,-0.029804248,0.03262262,-0.06680825,0.016717656,0.0038353673,0.021287518,0.0018424556,-0.0041867862,-0.0011719886,-0.044280436,0.02019424,-0.052992586,-0.05063449,0.039644204,-0.0494374,-0.033791043,-0.0041454337,-0.032513123,-0.073564336,-0.04585872,0.0023792102,0.027335508,-0.06999816,0.04888005,0.026423248,0.021874929,0.010904174,0.060097646,-0.034017522,0.05548881,-0.024519302,0.049890403,-0.015645353,-0.060680103,0.017045638,0.019808227,0.025153033,0.0040058065,0.053807795,0.034485374,-0.053428553,-0.0034872151,0.033813756,-0.03047597,0.007858348,0.024711734,0.060215656,0.008143574,-0.0070263194,0.0048007956,0.015641727,0.052094024,-0.049206913,0.016296484,-0.0059813466,0.040864628,0.013278136,-0.012139221,-0.04106141,0.0144868875,0.0013842004,0.021345256,0.04826021,-0.06929805,-0.021199407,0.00090551435,0.009481861,-0.0017141728,0.028452767,-0.019797614,0.038415838,0.056153923,-0.014074272,-0.00823969,-0.00050664565,-0.07698735,-0.025168924,0.057516575,-0.07501726,0.037316702,-0.02765656,-0.011325112,0.058868058,-0.010426108,-0.013318932,-0.0016809561,-0.062076304,0.027063645,-0.020674324,0.06843111,0.018448142,-0.04226709,-0.015164476,-0.008888517,0.040828817,0.048462827,0.00942803,-0.019631634,0.020950766,-0.0003345382,-0.030098192,0.022870619,-0.0017267349,-0.009055838,-0.012781693,0.07583533,0.045031916,-0.02076535,-0.07310905,-0.011597339,-0.00062336307,0.005723161,-0.018269768,0.020560576,0.023111053,-0.00881239,0.0052197427,0.022200806,0.013797317,0.019722437]::vector(768)),\n", + " ('Smartphone', 'Latest model with high-resolution camera', 800.00, 'Electronics', 15, 'SKU12346', 'https://example.com/smartphone.jpg', '{\"category\" : \"Electronics\", \"name\" : \"Smartphone\", \"description\" : \"Latest model with high-resolution camera\"}', ARRAY[0.031757303,-0.030950155,-0.058881454,-0.05073203,0.053704526,-0.01064694,0.030361004,0.0036670829,-0.014013894,0.022840602,0.06545107,0.0108244,0.009321064,-0.0236112,0.0098358095,-0.038861487,-0.011348891,-0.011887714,0.011245335,-0.018139482,0.03049321,-0.030338986,-0.001923893,0.011787388,-0.01825618,-0.050398953,0.0036137043,-0.04487695,-0.021582587,0.023590472,-0.051335085,0.08021365,-0.06793676,-0.00514603,0.024418706,-0.054447155,-0.050472837,0.010439093,-0.017847419,0.07124281,0.004419413,-0.028902968,-0.062286377,-0.02737251,0.048311986,-0.029160773,0.0059961462,0.0344943,0.037635062,-0.081315145,0.025175434,-0.0050063017,0.023545247,-0.015210805,-0.035123624,-0.020403884,0.014771475,0.015879042,0.0029214756,0.011768866,0.004276383,-0.009031657,-0.050000243,0.059927624,-0.03906005,-0.027238877,-0.04796615,0.03084268,0.07360646,-0.028875567,0.027232852,0.015592421,0.07156161,-0.059652634,-0.04831314,-0.049740285,-0.017305655,0.10253246,0.016519215,-0.0021727297,-0.0063062175,-0.0015423468,-0.03617129,-0.03982753,-0.059866134,0.082323685,-0.01662162,-0.0048025097,0.011876321,0.08410362,-0.006159452,-0.0008565244,0.04274695,-0.08079417,0.04427687,0.04110836,0.04812812,-0.053979542,-0.004387368,-0.04829328,-0.022975856,-0.015012431,-0.0056774826,-0.03936704,0.023132714,-0.007810687,-0.011018049,0.031620245,-0.02713872,0.0018347959,-0.024968592,0.02253628,-0.00809666,-0.0076680584,0.06435103,-0.020083368,-0.0049473317,0.07430767,0.01915259,0.040656384,0.00998682,-0.014684721,0.026354978,0.032759093,0.037668057,-0.009659323,0.006720873,0.063525185,0.03982695,0.04567435,-0.02619304,-0.030550981,-0.014520635,0.0010599799,0.034034356,0.06294083,0.07422565,0.01973267,0.05249243,0.010003681,-0.034319345,-0.023254821,0.0019625498,0.033209592,-0.015176091,0.056498263,0.0041291295,-0.046049923,0.054690883,-0.021583585,-0.019928787,-0.010311507,-0.03155074,0.038876258,0.055084117,0.0006716143,-0.005959439,0.02702423,-0.0041947966,0.015374709,0.057063535,0.028639654,0.069971144,0.019529812,-0.026227735,-0.083985895,-0.0041349265,0.009833876,-0.015811538,0.016993256,-0.010458223,0.040068664,0.009195164,-0.03924835,-0.007896623,-0.06261605,0.015779363,-0.018634042,-0.0013783163,0.016493134,-0.041971806,-0.039205268,0.020863583,-0.00169911,0.026609324,-0.07237093,0.07898098,-0.008871385,0.017599586,0.018514562,-0.01763139,0.00015460308,-0.03443664,0.026305566,0.0019577034,0.049758997,-0.014016935,0.01580608,-0.005885855,-0.014773614,0.008331391,0.011858725,0.047954902,0.016360788,0.040261615,-0.014324732,0.062151354,-0.037888777,0.02075746,0.039549813,-0.077434056,0.00096539775,-0.044017132,-0.012209571,0.034755055,-0.020098051,0.008095624,0.031291816,0.04792529,-0.008659437,0.01759492,0.009537845,-0.05313831,-0.010890252,-0.03342564,0.061369378,-0.031681072,-0.053262327,8.374469e-05,-0.027414132,-0.013404388,0.033906803,0.025408141,-0.035230264,0.030235829,-0.0014981066,0.023731904,0.029274339,0.047021322,0.025153603,-0.050763946,0.042003185,0.028869675,0.023947056,-0.045773767,-0.029348088,-0.04498305,0.03974547,0.021556387,0.032411546,-0.028107764,-0.01917967,0.020117322,0.035401057,-0.087708965,0.028180089,-0.07627729,0.010020432,-0.055026818,0.013467507,0.05156387,0.030606749,-0.012557438,0.0075980667,-0.049580842,0.025251655,0.011958476,-0.05784425,-0.00688397,-0.026897762,-0.0073929257,-0.082809925,0.0707716,0.0044888635,-0.023634167,0.00959699,0.027249858,0.009045479,-0.008601681,0.007323367,0.014609572,0.007073427,0.0055342577,-0.047172364,-0.023501316,-0.03593993,-0.022744065,-0.031178312,0.007601522,0.01038201,-0.040641543,-0.02084411,-0.04739785,0.0016813428,-0.022378212,-0.024991153,-0.019224035,0.033300195,0.04363394,-0.0072962623,0.0044990415,0.00530943,-0.0061862995,-0.1226422,-0.0048183375,-0.010383665,-0.043834127,-0.010673082,0.00016926302,0.026351877,-0.03451933,-0.017912712,-0.06287377,-0.00329357,0.056648213,-0.005951308,-0.017310314,0.06057505,0.00529039,0.04522765,0.009986563,0.09290384,0.0046436884,-0.027085476,-0.0051616537,0.014926508,-0.027059292,0.07819409,0.0018491915,-0.034066174,-0.04200668,0.017987153,-0.054097146,0.0263208,-0.030290576,0.012135319,-0.053635724,0.0040904377,-0.06391213,-0.012962556,0.039401833,0.029892938,-0.010509396,-0.09667328,-0.004525119,-0.0660734,0.005074788,-0.0043580704,0.048569698,-0.029491736,-0.00813117,0.099913284,-0.02152916,-0.0046480033,-0.004279434,-0.022350302,0.07403285,2.6268553e-05,0.024700351,-0.070556544,-0.046257928,0.047623277,0.013440511,0.022684522,0.0105078975,0.029062217,0.036317576,0.012476447,-0.025555858,0.0043436335,0.006260482,-0.030046312,0.012665346,-0.060015686,-0.042867333,-0.043334395,-0.09350731,-0.015882127,-0.023036648,0.0035012013,0.019168707,-0.029792963,0.014690395,-0.03232301,0.04318316,-0.023454774,0.024906443,0.033632547,0.026205529,0.021056164,-0.014863617,0.03884084,0.019737227,0.0643725,-0.015622061,0.010209574,-0.042415053,-0.041623153,0.020822845,-0.020490937,-0.0542278,-0.0033205135,-0.041752372,-0.069488324,-0.016277319,-0.0044792043,-0.02016524,-0.03959827,-0.032634977,-0.0039365673,-0.0132405395,0.0067148125,0.075648956,-0.05606617,-0.06265819,-0.019359354,0.05813966,-0.01447109,-0.010593954,-0.00086784246,0.00957173,0.02843471,0.00845407,0.024766237,-0.017594881,-0.02089351,-0.023622723,-0.033868976,0.01189866,0.04348284,0.017560178,0.0044504236,0.0201572,-0.010445271,0.016996963,-0.063251264,0.036506347,0.014985517,-0.004923813,-0.019643096,0.004065921,-0.03441569,0.02174584,-0.022037273,-0.105745554,-0.017520802,0.024135107,-0.056571614,0.065653384,-0.11961944,-0.019004421,-0.048515763,-0.018267322,-0.02178645,-0.00048087785,-0.042244278,0.041203473,0.039137937,-0.028382456,0.0027469762,-0.035103243,-0.008536376,-0.022003518,0.013834031,0.04035347,-0.05127768,-0.021083988,-0.019288905,0.030957388,0.03837377,-0.0003459004,-0.043197013,0.059090964,0.03584024,-0.009635979,0.049144205,-0.113005035,0.012198436,0.0030250824,-0.0005766731,0.010016404,-0.004630926,0.036304604,0.030682925,-0.028248072,0.0053004674,-0.028463472,-0.045950726,-0.016214147,0.02234844,-0.024365503,0.0045087263,0.0015641076,-0.046219032,0.019860927,0.011021814,-0.024108216,-0.048900776,0.012885111,-0.0022583513,-0.030102832,-0.016490621,0.024889058,-0.0009473834,0.015075038,-0.040798195,-0.005642347,-0.0029682147,-0.050329093,0.0009567131,0.007919075,0.01719906,-0.018685095,-0.016243592,0.010302834,4.1979074e-06,-0.042400364,0.055864133,0.033395868,-0.017874744,0.0013070442,-0.05331383,-0.10789571,0.0074728676,0.03525642,-0.07436872,0.04979144,0.046753135,0.0027637088,0.014162893,-0.026069263,0.06226656,-0.056384422,0.008216318,-0.02018645,-0.007397228,0.0074180462,0.035483476,-0.01882623,-0.02706421,0.04596009,-0.013163229,-0.021003753,0.037058793,0.052453898,0.013129776,0.015402059,-0.048313417,0.023352273,0.009391176,-0.044023603,-0.0107533,0.054881006,-0.019277383,0.02055352,-0.030710667,-0.02347742,0.0092705265,-0.047558293,-0.024285497,-0.03519891,-0.0038767713,-0.005330039,-0.026968258,0.06881978,0.06537581,-0.023353418,0.01331013,0.045053896,0.032502707,0.065926,0.0009946732,0.051750924,0.005718337,-0.0038732293,-0.029579317,-0.06977859,-0.0048092776,-0.025378013,0.023722455,0.032475006,-0.031788938,-0.00917764,0.0056064464,-0.016738426,0.021969007,0.012666437,-0.046921335,-0.02513667,-0.028311022,-0.009224157,0.05264038,-0.026426777,0.02599612,-0.018745475,-0.015264339,-0.013577108,0.0011754846,0.020499794,0.01423578,-0.015937831,-0.034813095,0.06295408,-0.033208452,0.041733917,0.0022288205,-0.0036853347,-0.015074669,-0.00813031,-0.004992453,0.010502773,0.017247686,0.03162546,-0.006212466,-0.06321386,0.022924462,0.03354761,-0.02742972,-0.018287206,-0.05058406,-0.02762529,0.014693771,-0.009422438,-0.0113650765,0.04500726,-0.009418481,0.023177318,-0.0394831,0.07899207,0.010970399,0.01519068,0.060208563,-0.014248415,-0.027108915,-0.055970594,-0.05615517,0.00082430604,-0.02946103,0.012972071,-0.034580585,-0.092063755,0.023562009,0.09187191,0.03979375,-0.048233856,0.0891921,0.0054705814,-0.07132956,-0.03294508,0.015985591,-0.06979576,-0.008607954,0.03748406,0.018775256,-0.00055046624,-0.0018972756,0.010640039,-0.039262787,0.045647603,-0.052634962,-0.04485457,0.059673585,0.005487001,0.005677175,-0.040526956,-0.0023886457,-0.051557075,-0.026969707,-0.020169057,0.020184118,-0.06750348,0.014797761,0.043389246,0.022667736,0.012956063,0.056346934,0.038232267,0.02334661,-0.002965094,0.053386245,-0.016282998,-0.08433834,-0.005240998,0.020763554,0.0041468525,0.011248255,0.013354228,0.0062226793,0.01238483,-0.042322755,0.017076539,-0.024617095,-0.03331688,-0.001430632,0.05623171,0.0073584137,0.013339925,-0.0041607106,0.015201854,0.029444456,-0.039367896,0.032675862,0.016636375,0.04101005,0.0073330533,0.03937178,-0.01699229,0.026922127,-0.00465699,0.014691186,0.07985071,-0.045738634,-0.040622048,0.040370528,-0.0070402357,-0.048223954,0.048428483,-0.013764062,0.02645368,0.030109879,-0.01834218,-0.0045400057,0.036011115,-0.010352046,-0.068165384,0.037795525,-0.036501475,0.020713413,-1.2293508e-05,-0.00038850267,0.073334076,0.01821627,0.003559663,0.017506005,-0.02564981,0.039007656,-0.026543219,0.018282859,-0.038226757,-0.04996024,0.01010447,-0.012900636,-0.020180488,0.042488355,0.0135185765,-0.0083626835,-0.019743606,0.025633369,0.035687257,-0.053833067,-0.053783447,0.007418253,-0.04581871,0.032362275,0.050387084,-0.010103674,-0.051880397,0.010476682,0.015898407,0.04970622,-0.04664034,0.036457486,-0.017625386,-0.0058598807,-0.011529857,0.018154921,0.013366902,0.0021690137]::vector(768)),\n", + " ('Coffee Maker', 'Brews coffee in under 5 minutes', 99.99, 'Kitchen Appliances', 20, 'SKU12347', 'https://example.com/coffeemaker.jpg', '{\"category\" : \"Kitchen Appliances\", \"name\" : \"Coffee Maker\", \"description\" : \"Brews coffee in under 5 minutes\"}', ARRAY[0.025002815,-0.052869678,-0.010500825,-0.024296444,0.049798742,0.043427017,-0.01307104,0.0077243242,0.022190414,0.037746448,0.029453197,-0.009484218,0.0028156517,-0.03531512,-0.012121426,0.0091221025,0.025652027,-0.009445565,-0.02820549,-0.04105274,-0.0010839493,0.015024874,0.053036522,-0.018628811,0.014746092,-0.049109433,0.026801802,-0.0070828577,-0.02369395,0.010975214,-0.03531074,0.04859645,-0.004710616,-0.018579654,-0.0076328423,-0.030808363,-0.012824788,0.03848257,0.014652247,0.058704656,0.00325119,-0.007205416,-0.04686223,-0.028575234,0.02045449,-0.008556303,-0.009746742,0.018289749,0.00093424425,-0.046003163,0.0039943205,-0.023993168,0.05866197,0.008093339,-0.00565744,-0.008198263,-0.001283407,-0.0007927462,-0.018114842,-0.008134085,-0.00014443924,0.021404255,-0.014830747,0.050932012,-0.032427747,-0.027500387,-0.020814912,0.025367612,0.061494272,-0.028271751,-0.002093295,-0.005629965,0.054627255,-0.062579386,-0.01051155,-0.06421958,-0.012094066,0.06576773,0.05998704,0.10272862,-0.021875817,-0.062225047,0.022178214,0.010618126,-0.05723891,0.040955715,-0.038523626,0.021909224,0.018677043,0.056335997,-0.01599579,0.015702266,0.025712736,-0.024550503,0.041618552,0.031751215,-0.0013378685,-0.042116627,-0.033073347,-0.011056941,0.022297822,-0.052519917,-0.06455736,0.030026494,0.04122688,0.0435459,-0.021909805,0.025392938,-0.05491582,0.022167888,-0.06104317,0.021199005,0.021531114,0.0003258208,0.051008765,-0.0056826724,0.0019850046,0.08186525,0.014742098,0.01913513,-0.026228607,-0.023587128,0.041640177,0.016765678,0.028365733,0.057187237,0.011515794,0.0734812,0.048084594,-0.0028821004,0.00025123838,-0.010272774,0.025670059,-0.049766205,0.0862307,0.07104121,0.008422137,0.026603732,0.06897059,-0.0013259795,-0.003537648,-0.016978277,-0.03289158,0.019160148,-0.030429484,0.03210423,-0.0025404708,-0.052619252,0.0020272017,-0.014941184,0.0026864705,0.012819193,-0.043763664,0.057997666,0.043563023,0.03174006,-0.04444913,0.0060016355,-0.029776296,-0.017748147,0.036185395,-0.0014833601,-0.017309692,0.04368944,0.020283954,-0.0160715,0.03019354,0.02680017,0.013467745,0.010598811,-0.009857402,0.0035379697,-0.04074403,-0.015414817,0.016311716,-0.0669727,0.0034562463,-0.024640094,-0.023524309,0.0028607736,-0.06249814,-0.058054965,-0.007223816,0.012088017,0.029124737,-0.030978883,0.07969112,-0.05076358,0.015344627,-0.00898595,-0.0097088795,-0.019155432,-0.035673082,0.027780814,0.006400352,0.055502266,-0.046420977,0.03919276,0.040964182,-0.024075434,-0.014520242,0.07941375,0.023109328,0.030869437,0.06598536,0.00059927267,0.076354064,-0.048273984,0.0025508753,0.0066330666,-0.070879966,0.03704847,-0.0650441,0.01176703,0.033744898,0.0400285,0.024317512,0.028281165,-0.008897873,-0.029537052,-0.0047060223,0.026686963,-0.07052627,0.023556747,-0.056385886,0.0714133,0.007949809,0.011887155,0.0029032454,-0.015065537,0.011513513,0.050219424,0.010533179,-0.009971522,0.03655571,-0.0066924663,-0.012303563,0.016773308,0.013691093,0.025839401,-0.044451136,0.049260832,0.05713467,0.013278825,-0.022100078,-0.0017930771,-0.016181005,0.0217466,-0.02600776,0.046996534,-0.022629611,-0.023503313,0.0074507482,0.0134722935,-0.04945182,0.022608835,-0.026130896,-0.01177188,-0.027667308,0.026118958,0.0025001818,0.021639917,-0.015105975,0.02968347,-0.043928802,0.03762012,0.019912925,-0.004347233,-0.006596504,0.016333994,-0.025137693,-0.01686705,0.04786869,0.034643404,0.011117003,-0.011134983,-0.0074818125,-0.006335571,0.022040822,-0.006491301,0.0054816976,0.038022403,0.016072717,-0.06609374,-0.03203102,-0.059326455,-0.04408214,-0.03787348,0.014894112,-0.02038928,-0.044823527,-0.015866352,-0.047105137,0.002020473,-0.04468357,0.018793538,-0.029475007,0.06967502,0.04684481,0.048074055,-0.0010090554,-0.0027273456,0.047790546,-0.030050496,0.023022242,-0.028264726,0.03571066,-0.0164874,-0.019399788,0.0076415916,-0.0060172956,-0.010469042,-0.045296766,-0.0071801674,0.032818798,0.034934863,-0.0737483,0.0327411,-0.006032433,0.05928009,-0.00927453,0.07627373,0.0050010816,-0.048511107,-0.0037969523,-0.007150538,-0.010152546,0.025746513,-0.02757783,-0.049112115,-0.029450508,0.037618097,-0.04765299,0.021502782,0.04031621,-0.021789404,-0.03477437,-0.0029428764,-0.04645585,0.015724704,0.0061205924,0.027327916,-0.016831782,-0.07413835,-0.009106179,-0.005994898,0.0015746661,-0.0066348854,0.08860898,0.026653405,-0.010490873,0.01737892,-0.036203787,0.0019658727,-0.05349199,-0.031604912,0.059320047,-0.0035595773,-0.013159466,-0.043662973,-0.000936755,0.037883844,1.1725969e-05,0.008455511,0.028007427,0.026448535,0.03587197,0.034501214,-0.020195212,-0.036874935,0.008322776,-0.038275808,0.014955824,-0.066956565,-0.03433901,-0.043297864,-0.07503335,-0.037108134,0.032691672,-0.05912909,0.023559488,-0.023238983,0.0012042256,0.0074822125,0.0058873207,-0.021845229,0.0054280413,0.05752058,-0.026954507,0.026175871,0.012301664,0.06307251,0.07353519,0.011740042,-0.012562488,-0.025707787,0.011014364,-0.064245604,-0.018075097,-0.04286179,-0.06992585,-0.031043975,-0.0022823277,-0.05855018,0.015864456,0.00024379989,0.0070141326,-0.00035948,-0.023150876,-0.063177474,0.008194795,0.023019124,0.014603101,-0.06850171,-0.07586402,-0.029384725,0.09732399,-0.023403296,0.00983274,0.00043465907,-0.037277438,0.060318034,-0.010698135,-0.0012939094,-0.015873678,-0.006272459,0.0014064384,-0.041425075,-0.021238888,0.021737115,0.030599548,0.043125883,0.01929081,-0.0011234619,-0.031159677,-0.05745639,0.0146679375,0.046521254,-0.01835481,-0.033141162,0.00036415283,-0.06466151,0.043580752,0.011921412,-0.07292401,-0.047980927,0.02159395,-0.023352068,0.0425091,-0.09635663,0.0060955146,-0.06484201,-0.029811602,-0.026076958,-0.014945281,-0.04334233,-0.00242451,0.047840517,-0.02103297,-0.0191666,-0.0074735563,1.0544848e-05,-0.028074,-0.037163526,0.030064873,-0.02934737,0.050285384,-0.023986174,0.025914317,0.10199452,-0.021887174,-0.0066847154,-0.023618985,0.03283886,0.045797225,0.047762897,-0.07030183,0.026901271,0.008702326,0.017019885,0.033345792,-0.03833666,0.031567782,-0.013102635,-0.009532979,0.025451964,-0.021708276,-0.023218581,-0.07980661,0.03028782,-0.021675726,0.03096571,-0.018742265,-0.04427001,0.009433704,0.03455316,-0.035231985,0.04002238,0.012793141,0.025124295,-0.04512409,-0.06486318,0.019942157,-0.030111039,-0.0069209165,0.0015545462,0.028818183,0.0014206765,-0.032698274,0.008883163,0.058960456,-0.00906729,0.0298577,-0.0070162034,-0.014469902,-0.0032146918,-0.04448409,0.03293327,0.040138587,0.01842061,0.0055912337,-0.03388838,-0.071546026,0.02821449,0.033089,-0.04839594,0.016159212,0.08211776,-0.08987595,0.036964364,-0.051373526,0.1035708,-0.053108595,-0.01896186,0.01644011,-0.012502358,0.008263514,0.04065409,-0.015298684,0.0011162056,0.04276282,0.0027434586,0.0324373,0.03511016,0.02446925,0.002442109,0.049384676,-0.05747281,-0.0020478321,-0.03639974,0.011938583,-0.031114291,0.03284646,-0.03238849,0.08670559,-0.07415254,-0.036738325,-0.025126172,-0.045095183,-0.015307702,-0.06554373,-0.05546525,0.005472855,-0.006981692,0.04587679,0.111925036,0.013912294,0.014268016,0.058842134,-0.011192024,0.034922387,0.012045642,0.008008024,-0.014226386,0.06913233,-0.04700873,-0.06164794,-0.0024386728,0.043209903,0.051432677,-0.017323477,0.013788927,0.012737198,0.06472892,-0.070449375,0.005222667,0.050599333,0.0015403829,0.015714316,-0.008632714,0.014941663,0.06433311,-0.021354778,-0.0071928906,-0.028242689,0.018915592,0.021451298,0.0063637616,0.0019523413,-0.017883593,0.028570741,-0.016318232,0.053636383,-0.028484613,-0.006531752,0.022900375,0.023723338,-0.024363475,-0.015181002,0.024642847,-0.002409233,-0.0001194501,0.013567875,-0.046026736,-0.016705032,-0.013025837,0.020370122,-0.027258568,-0.04735096,0.011894463,0.0019317217,-0.0031460563,0.040866848,0.00464604,0.03964947,0.027275842,-0.0030081465,-0.008669969,0.0462421,0.010375526,-0.024637504,0.08480695,-0.02768799,-0.005021901,-0.009944692,0.015040328,-0.0051919715,-0.043738216,0.054622557,-0.0116185825,-0.044851393,-0.01769878,0.06967592,-0.026938388,0.0030814619,0.07516173,-0.022243993,-0.09390373,-0.056307606,0.011178256,-0.058882743,0.016906237,0.010931337,0.011277608,-0.03310829,0.008875099,-0.017342865,-0.049926963,-0.0021014255,-0.019715691,-0.024091842,0.029629463,-0.06452303,0.009643791,-0.025999011,-0.017722748,-0.09347366,-0.019748896,-0.011190205,-0.0044534663,-0.04336357,-0.01312215,0.056558847,-0.022783643,0.0004763564,0.04152026,-0.03813543,0.0038315274,0.021157283,-0.007934057,0.0004752217,-0.057082873,-0.011285772,-0.014152046,0.03181829,0.033805694,0.04453719,-0.02024123,-0.0038247174,-0.0262423,0.007036252,-0.012817323,-0.025822328,0.06599188,0.067939,-0.022174655,-0.022773167,9.6714546e-05,-0.017627345,0.08549309,-0.06266334,-0.00575442,-0.011873023,-0.07250961,0.0056728884,0.017012162,-0.025071641,0.022021066,0.030550413,-0.010627088,0.050028834,-0.01721913,-0.050976366,-0.024867795,0.011782799,-0.075504154,-0.004392594,-0.01807583,0.031157117,0.030725744,-0.014750008,0.005684259,0.047403537,-0.08811708,0.007985649,0.043377616,-0.037903026,0.029741386,-0.0011720062,-0.010578729,0.051289707,-0.024345556,0.017949736,0.02636295,-0.059689533,0.06373776,-0.049072567,0.013506145,-0.040476285,-0.02940512,-0.023568999,-0.00035632766,0.056101788,0.061561547,0.03079068,-0.02166795,0.009211557,0.0030255727,-0.0036865661,0.023821775,0.0015869564,0.0064414316,-0.057368714,0.061502002,0.023947174,0.0046180966,-0.05202509,0.002360597,0.03557417,0.036739,-0.03005605,0.047780115,0.025282156,0.034349978,0.034781702,0.0276351,-0.040908,0.081558466]::vector(768)),\n", + " ('Bluetooth Headphones', 'Noise cancelling, over the ear headphones', 250.00, 'Accessories', 5, 'SKU12348', 'https://example.com/headphones.jpg', '{\"category\" : \"Accessories\", \"name\" : \"Bluetooth Headphones\", \"description\" : \"Noise cancelling, over the ear headphones\"}', ARRAY[0.022783848,-0.057248034,-0.047374193,-0.04242414,0.049324054,0.0077371066,0.017048897,0.00500827,0.008471851,0.010170231,0.054357704,0.018568166,-0.024179503,0.026519066,0.026404649,-0.06330503,0.014405935,-0.015520485,0.0052459002,-0.0398403,0.0026082278,-0.026374431,0.020055598,-0.009738811,0.013321584,-0.033184614,0.034118295,-0.0011876881,-0.04513898,0.04878162,-0.0725106,0.018109042,-0.075869314,-0.023766529,0.015067321,-0.019572936,0.024169574,-0.01577634,-0.048197363,0.049358875,-0.030935159,-0.0363981,-0.04534119,-0.044748895,-0.004167742,-0.02121328,-0.052715167,0.0006209187,0.036595955,-0.085123576,0.052309636,-0.01926014,0.00049565616,-0.0057477825,0.010993081,-0.06675727,0.0037074706,-0.033420403,-0.052601676,0.023439946,-0.01880516,-0.009576131,-0.0114066675,0.10504714,0.00022831495,0.029810086,-0.0044366047,0.043377023,0.06093195,-0.004545408,0.013371212,-0.029174658,0.06625106,-0.0077476054,-0.0163617,-0.056035727,-0.024698364,0.06076837,0.020102862,0.038081013,-0.018504761,-0.027918378,0.03942784,0.004596525,-0.057653908,0.034515597,0.010063118,0.04525672,0.023651283,0.03596632,-0.0378574,-0.013078957,0.021554954,-0.0606351,-0.007272484,0.044470455,-0.015513987,-0.018171282,-0.014020262,-0.040379126,-0.032836802,-0.055859733,-0.05644243,-0.001610613,-0.05527219,-0.00052593346,-0.00546389,0.02911079,-0.0037673921,0.036246333,-0.057133533,0.043779045,-0.0028422247,-0.044305976,0.05993566,-0.005543668,-0.0015800337,0.07515586,-0.00020748413,0.03876171,0.026035579,0.012980581,0.056657698,0.020252425,0.029382393,0.011205804,0.039896134,0.04349186,0.08402962,-0.0031059172,-0.022395832,-0.023471512,0.029480197,0.0038065156,0.07106566,0.07560159,0.019708911,0.0063190344,0.06826459,0.05426478,-0.016353253,-0.016603524,0.035430502,0.01285351,-0.044608854,0.06445639,0.027575186,-0.020047447,0.07155171,-0.024042875,0.007684551,-0.057774883,-0.05863421,0.04027459,0.034241315,0.029786138,-0.011771758,-0.008067332,0.005154275,0.017256541,0.012795448,0.0361206,0.046198364,0.007581977,-0.0643159,-0.032997373,0.025989803,0.039828006,0.00950064,0.043332074,0.016609278,0.034839373,-0.022875424,-0.028605282,-0.017703732,-0.06238004,0.010994231,-0.0007306017,-0.034711856,-0.0440203,-0.025970237,-0.04595589,0.030582627,0.0073314123,-0.017986864,-0.055571377,0.082270294,-0.018736921,-0.0012149982,-0.0060279733,0.0044796504,0.025173035,-0.037219252,0.00027956237,-0.010430433,0.02825617,-0.046855696,0.018841878,0.0435598,0.005803966,0.0019149927,0.092197396,0.022937872,-0.0033373323,0.072473325,-0.014439769,0.047117453,-0.08000118,-0.012863106,0.0260884,-0.04135028,0.0070068296,-0.07510927,0.03672727,0.033531025,0.042364623,-0.019229556,0.0048453975,0.031276144,0.014006409,0.016036421,-0.017694592,-0.036794797,0.014908425,0.030831292,0.03190712,-0.022060342,0.041704472,0.017002491,-0.06408182,0.03923344,-0.02587273,-0.017719302,-0.025430005,0.06814103,-0.009046621,0.033220492,-0.033640996,-0.02523642,0.048086986,-0.035158273,0.048114188,0.043751266,0.01995209,-0.0295469,-0.020247698,-0.053099316,0.032099206,-0.045260355,0.0326798,-0.0043251985,-0.052964494,0.07017924,-0.0037189184,-0.03395965,0.040903587,-0.060891,-0.010537573,-0.030650055,-0.029651405,0.013975478,0.007255845,-0.010439494,-0.011794211,-0.05466926,0.024609366,-0.017408509,-0.05243266,-0.020957882,0.037831362,0.0216147,-0.035116594,0.03829302,-0.016048789,-0.035066966,-0.013764898,0.00042713518,0.030633073,-0.008326726,-0.015224956,0.012373721,0.0844943,0.0245434,-0.046264216,-0.011655971,-0.013199105,-0.05529712,0.006216126,0.038966317,0.04622981,-0.039118554,-0.044550307,-0.009771392,-0.006652356,-0.023040479,0.010476257,-0.004093151,0.008969803,0.010324751,-0.022387082,0.023577597,0.019100022,0.008391375,-0.07391311,-0.02210422,0.021720598,-0.0109519595,-0.0820701,0.022086475,-0.003670014,0.0019491176,-0.053155318,-0.022906458,0.0148452455,0.015515676,0.019605495,-0.02868708,-0.01828674,-0.0005499542,0.06639364,-0.01821442,0.09175476,-0.0016622626,-0.059729476,-0.019477114,0.025505545,-0.034742665,0.028956799,-0.019135797,-0.016046764,-0.03779796,0.06325585,-0.04046284,-0.0065921973,-0.0019740656,0.053527426,-0.06304376,-0.035805233,-0.04792203,-0.0012729234,0.048093352,0.007456611,-0.058022104,-0.07442454,0.012629627,-0.027595298,0.0021199721,-0.027464667,0.02698153,0.00060683774,0.044545636,0.06083593,-0.0031620082,-0.025901018,-0.034706157,0.013555886,0.042545,0.056980383,0.009854132,-0.06190446,-0.034308147,0.0043845526,0.017239122,-0.031214224,-0.010807414,0.026710719,0.022394834,-0.009421089,-0.04236166,0.022885358,0.01318956,-0.019174583,-0.0026612883,0.010784672,-0.010333064,-0.043234736,-0.054500565,-0.027753199,-0.022639737,-0.03062474,0.008183766,-0.017117208,0.03024305,-0.03615811,-0.01150264,-0.03863528,0.04852956,0.024548976,-0.012997513,-0.0041008275,0.03406041,-0.0070994645,0.072934166,0.02805505,-0.030694276,-0.035828616,-0.017640414,-0.03957751,0.06840472,0.0046152286,-0.020437988,-0.025648775,-0.083415866,-0.04167123,-0.035016168,-0.015291769,0.009293348,0.04628708,-0.014721913,-0.0033228637,0.04403616,0.061276685,0.037830554,-0.041214965,-0.084479295,-0.0012414041,0.030978376,-0.017235488,0.04445431,0.05231969,-0.0008037167,0.045372415,0.02067265,0.024952972,-0.033815585,-0.03739797,0.034983158,-0.016312862,0.017926387,-0.02016297,-0.019343764,0.017820694,-0.011671569,0.02410841,-0.042012513,-0.03900872,0.032663334,0.011938514,-0.029834026,0.047740217,-0.0058686035,-0.046729274,0.05985927,0.007610642,-0.060446266,-0.04216537,0.017497085,-0.06986214,0.076023735,-0.10476386,-0.020937927,-0.073560745,-0.014322972,-0.048601817,-0.0056885225,-0.03637434,0.04715089,0.054749545,0.014689732,0.006048463,0.046543427,-0.017363597,-0.03678888,-0.08802858,0.063708976,0.021423126,0.04030153,-0.036243204,0.036450744,0.024569608,0.016401349,-0.022465378,-0.0034262848,0.060547307,-0.014745138,-0.020591581,-0.0054737274,-0.0074623367,0.06138278,-0.016604895,0.0032445828,0.009028142,0.002864045,0.001341044,-0.03825005,0.03237135,-0.009647875,-0.0470159,-0.024240978,0.017859152,0.010279892,-0.014414872,-0.017152937,0.020384172,0.008366546,-0.003495199,-0.024638942,-0.031768326,0.06240018,-0.0067493794,-0.04322142,-0.0030645356,-0.0027114467,-0.0072583677,0.06152745,-0.05525731,0.01201016,0.034348775,-0.032004267,0.027236925,0.05926736,-0.010569189,-0.023563573,0.0018119658,0.04231199,0.01966649,-0.014960187,0.029649874,0.01606933,0.0033748902,0.021692606,0.00794783,-0.113133654,0.012736659,0.03742399,-0.010987754,0.02547777,0.026347551,-0.09020402,0.009588993,-0.043276373,0.106708415,-0.049185734,0.007007848,0.030148245,-0.026434064,-0.017702276,0.0007948643,0.026716268,0.013030543,0.013651695,-0.019745413,-0.0087912055,0.0046337135,0.038207695,0.0059329793,0.016351476,-0.069271706,-0.006662047,0.028512167,0.0038343759,-0.00027066944,0.042824432,-0.038911343,0.012483791,-0.06324616,-0.0023198558,-0.0028892683,-0.043326154,-0.035926916,-0.006348816,-0.025913015,-0.015930604,0.040185526,0.044628017,0.039083507,0.009474702,0.017115341,0.05052131,-0.01357451,0.020331299,0.038159154,0.0349774,0.015846666,0.022699736,-0.022343196,-0.056707054,-0.0010885954,0.020071063,-0.000391925,0.06397024,-0.024627347,-0.005184313,0.05034518,0.009061781,0.034097236,-0.007981921,-0.03801412,-0.0028578758,0.013567372,-0.008190868,0.033633735,-0.053976685,-0.025468381,-0.044378527,0.032747604,0.036202736,0.038062613,0.014995585,0.0036792904,-0.01603846,-0.047275733,0.066113465,0.0045884387,0.08915791,-0.0068142917,-0.0064188805,-0.060516927,-0.016080644,0.041549493,-0.008397882,-0.0071816393,0.0064753946,-0.0017467311,-0.019128935,0.0164788,0.022168875,0.011003241,-0.026863558,-0.05437178,-0.032724023,-0.0042122444,0.010392475,0.0042387135,0.04948556,-0.013747793,0.051330764,-0.0050607547,0.054571416,0.025556272,-0.00022029632,0.047628347,0.01427685,-0.020254403,-0.03590239,0.011610469,0.041846078,-0.02470694,-0.013697807,-0.021193847,-0.04341633,-0.0041078446,0.053439837,-0.021625757,-0.037942924,0.07774385,0.005912317,-0.0929516,-0.025328774,0.025199909,-0.041145753,0.017296704,-0.0050417483,0.012186051,0.0024183579,-0.025558045,-0.0051468383,-0.07548276,0.028603543,-0.04549798,0.007635448,0.010916566,-0.029269122,0.01546215,-0.024502348,-0.021702306,-0.025917016,-0.016031386,-0.0012059321,0.031981774,-0.056502126,0.025166377,0.04160211,-0.020680273,0.010293909,0.029529357,-0.01568588,0.026115898,-0.01032236,0.02089118,-0.01709118,-0.0597839,-0.029326,0.045068808,0.00761455,0.034416553,0.022160128,-0.025166225,-0.04248117,-0.029465536,0.027829373,0.006342403,-0.05032602,0.040032476,0.07705231,-0.01583979,-0.0049204687,-0.0140532,0.022657644,0.05293866,-0.009608256,-0.005003684,-0.02478457,0.029608795,-0.022698086,0.003895031,-0.0039730286,0.023749523,0.07514458,-0.036099195,0.04289709,-0.02329434,-0.06419562,0.037709154,0.0004289863,-0.02686404,0.0032855049,-0.030955583,0.018836787,0.033646755,0.022193655,-0.028475504,0.00394302,-0.05765806,-0.062945105,0.024937302,-0.06828151,0.016094193,-0.036405172,-0.016450962,0.04907368,-0.05975235,-0.04858766,0.07675482,-0.06289323,0.052024625,0.018600732,0.038572676,-0.0011811757,-0.07612922,0.03844793,-0.015206284,0.05163554,0.046980042,0.023522004,0.00037627618,0.011324654,-0.028600419,6.0430884e-05,0.00431597,-0.023766082,-0.015001608,-0.018692184,0.08730754,0.032889076,-0.018612336,-0.019428827,-0.002722986,-0.020110032,0.04016962,-0.043657966,0.044247244,0.019661218,0.042629678,0.016911589,0.038489193,-0.0036892071,0.015036206]::vector(768)),\n", + " ('Backpack', 'Waterproof backpack with laptop compartment', 59.99, 'Accessories', 30, 'SKU12349', 'https://example.com/backpack.jpg', '{\"category\" : \"Accessories\", \"name\" : \"Backpack\", \"description\" : \"Waterproof backpack with laptop compartment\"}', ARRAY[-0.0028279827,-0.02903348,-0.02541054,-0.025740657,0.06572692,-0.01105207,-0.018005589,0.014476618,0.0039552255,0.04976717,0.034852527,0.018194634,-0.010718678,0.012003344,-0.008418802,-0.026018273,0.029329967,-0.016163627,0.009272989,-0.03639675,0.011046671,-0.008078595,0.023365447,-0.0033083789,0.020028763,-0.025491415,0.033595297,-0.0116388025,-0.057485484,0.06268812,-0.05302806,0.033510745,-0.06083909,-0.03115934,-0.014793818,-0.028653687,-0.011399838,0.03950949,-0.03437827,-0.001663737,-0.01088612,-0.01894241,-0.055767413,-0.0044360803,0.043946534,0.012161133,0.03891473,0.001239441,0.009908146,-0.07272227,0.055397917,0.003453955,0.016562339,-0.041937787,0.05197343,-0.026436094,-0.025229415,-0.034988422,-0.02628748,0.022921052,0.013600747,-0.0020118777,-0.033795673,0.06700571,0.016018055,-0.024256106,-0.02621731,0.045516666,0.05339654,0.0040287147,-0.03260985,0.0014520925,0.064204894,-0.07453437,-0.05054596,-0.042698923,-0.010596,0.013536595,0.0057951836,0.02499754,-0.008574824,-0.0074555897,-0.03567392,-0.016175417,-0.048651025,0.051804803,0.032162882,0.015001442,-0.015329716,0.028219966,-0.031235777,-0.011996138,0.001956758,-0.057833184,-0.022306677,0.031238675,-0.006414606,-0.06930158,-0.017475452,-0.027142663,0.020731354,-0.02221535,0.031049741,0.02081393,-0.022421336,0.0264318,-0.009509332,0.03522677,-0.004379289,0.011600757,-0.022017384,0.010730822,-0.010784208,-0.032706123,0.011207074,-0.023580823,0.013793131,0.05083659,0.047280807,0.048402432,0.05347524,-0.01837716,0.005956893,0.038448945,0.056967188,0.0107236095,0.03256511,0.06276655,0.04472847,0.04416061,-0.010116117,-0.048367113,0.029135885,0.010681488,0.036315914,0.056885246,0.03745567,-0.045721106,0.060501557,0.07454113,-0.018330548,-0.0113306865,-0.011580698,0.020741342,0.020118712,0.08663372,-0.009871896,0.0153012,0.05436686,-0.032210644,-0.029824084,0.023739373,-0.024163425,0.025129095,-0.016016128,0.04870382,0.013377057,0.012678613,0.011070294,-0.0072714896,0.042209458,0.029714484,0.042831365,0.032464053,-0.047759824,-0.032160178,-0.014084912,0.016434442,0.009782443,0.0013573115,-0.015243139,0.007621731,-0.037185922,-0.054615762,-0.008570435,-0.00029953485,-0.012346052,0.00016998274,-0.03163527,-0.0139267165,-0.07079747,-0.007061694,0.020720486,0.0025725542,0.019498186,-0.03700232,0.10145702,-0.004775887,-0.042089477,-0.023965659,-0.04021527,-0.0004672301,0.007410538,-0.0024715534,0.013863051,0.02261263,-0.027591249,0.020157337,0.012993745,-0.0067202765,-0.029478177,0.052134037,0.020799996,0.014809602,0.06626069,0.0069596902,0.063764,-0.04220143,-0.0040134583,0.007221788,0.014255095,0.059271786,-0.04741277,0.014235989,0.067689635,-0.005667792,0.03801926,0.0117749525,0.025480399,0.011015113,0.0037910545,0.00022392142,-0.044315543,0.010447604,0.010668871,0.0779741,-0.08010141,0.04994428,0.0024064495,-0.04755275,-0.0114773,0.014421721,-0.028229935,-0.06231835,0.05197635,-0.00798093,-0.0025467642,0.010583627,-0.017485484,0.048588324,-0.0008222008,0.033517472,0.007129084,0.0010124474,-0.05219366,0.017978905,-0.01833836,0.019664295,-0.008339645,0.013213594,8.404173e-05,-0.058585837,0.06634499,-0.032446846,-0.066239364,0.0011773852,-0.07504017,0.026009388,-0.026110237,-0.00089784985,0.004558591,-0.027107328,0.017480537,-0.0062587988,-0.008309775,0.024417007,0.022020336,-0.025295774,0.0089702625,0.026482984,0.008462929,-0.043885507,0.023143305,-0.012536918,-0.025114551,-0.030675266,-0.030063663,-0.004634334,-0.0024470752,-0.03869859,0.015594325,0.0131572345,0.0029243943,-0.046118148,-0.03834942,-0.022946607,-0.0071579637,-0.042097863,-0.01229437,0.024193348,-0.03535916,-0.05725744,-0.014191351,-0.034702625,-0.03553529,-0.0063754944,0.0024684118,0.042859882,0.013016258,-0.02985961,-0.0020391515,0.030625137,0.016144354,-0.049042817,0.024231678,-0.025589447,-0.05898161,-0.023193993,0.031626217,-0.028190944,0.017940147,-0.049932066,-0.04810013,0.047244985,0.1082508,0.001041191,-0.057233974,-0.006368648,-0.06945289,0.048442855,-0.021192377,0.10568124,0.053165488,-0.0084766345,0.031292096,-0.009400329,-0.042162478,0.06982496,-0.014560452,-0.0073914286,-0.048956916,0.030368945,0.022202695,-0.0050742053,-0.012722453,-0.011377622,-0.051865157,-0.0070718606,-0.01745792,-0.02462795,0.030636197,0.030104883,-0.04482826,-0.11079195,-0.024324121,-0.002861835,-0.014245193,-0.020608244,0.03153579,-0.009367316,0.014898636,0.033479474,-0.015162162,0.01307384,-0.052216247,-0.025208864,0.014302212,0.023454865,0.030064361,-0.00028293114,-0.05237653,0.02271106,0.0057998034,0.021696828,0.0065965196,0.061783127,0.052609395,0.018527359,-0.012383652,0.036548115,6.0759903e-05,-0.027102679,0.0020538126,-0.026467739,-0.00931995,-0.056754645,-0.059189495,0.022508893,-0.037084196,0.008752761,0.011397571,-0.001640177,0.010061019,0.024978038,0.01750796,0.0017406448,0.0692028,0.042931892,0.008515072,-0.03527143,0.006649334,-0.0015101181,0.09099013,0.0423155,-0.060909722,-0.007118597,-0.0070489836,-0.05583034,0.035233498,-0.008949495,-0.021592604,-0.023997912,-0.030185444,-0.015039309,-0.07469254,-0.05510056,0.029319923,0.01650634,-0.0660325,-0.015404232,0.03715267,0.03294396,0.005133208,-0.071616374,-0.04183193,-0.039515678,0.06556278,-0.006204309,0.018765671,0.0087025305,0.04139539,0.039423864,-0.0096283825,-0.03788884,-0.030308004,0.016888767,0.033892095,-0.0046063373,0.036512673,0.046478424,0.030432703,-0.008351917,0.038958482,0.030963391,-0.0012744869,-0.068324916,0.035514664,0.029101191,0.019952206,-0.035990257,0.05016547,-0.0034300084,0.011099454,-0.01642832,-0.055300374,-0.07178654,0.023697836,-0.02809622,0.054089297,-0.1083301,-0.018408947,-0.075191386,-0.0048826155,-0.042217527,-0.069461025,-0.06703293,0.009000863,0.06276143,-0.0017238993,0.03036515,-0.009982445,0.055421855,-0.027764114,-0.05543302,0.022685751,0.022210898,0.049183954,-0.0047965907,0.055648796,0.011152965,-0.014035957,-0.02337775,-0.01123261,0.052066986,-0.006916061,0.03199984,-0.094863154,0.003547006,0.041498255,0.004490882,0.020994756,-0.07455022,0.036187306,-0.0051827626,-0.017956927,-0.00029976605,-0.044009093,0.0028350798,-0.052361596,0.07876513,-0.06365592,0.0017824164,0.017088404,-0.038679466,-0.008001763,-0.0013830748,-0.025812596,-0.0182766,-8.765931e-05,-0.0072022257,-0.046436142,-0.072371304,0.0057044053,-0.03468649,0.056389496,-0.020051511,0.031401794,0.0026272596,-0.045338016,-0.029466175,0.008883405,0.036455907,-0.012484258,0.0015844881,0.036832172,0.023578366,-0.043958467,0.00577308,0.055652507,-0.036696434,0.002894534,-0.032786682,-0.05258521,-0.006260205,0.030400572,-0.061743345,0.021158593,0.028482735,-0.061397683,-0.015825676,0.01941984,0.075950265,-0.11372872,-0.018362995,-0.010228874,-0.009783626,0.023449693,0.027557475,-0.0023083165,-0.0021188299,0.05987247,-0.00944442,-0.020868102,0.03482851,0.039515875,-0.026193311,0.023197955,-0.07931663,0.005395495,0.013140455,-0.061495673,0.0022219154,0.038023517,-0.05545234,0.020771723,-0.0067305462,-0.03169365,-0.021337083,0.019638145,-0.053754907,-0.035756346,-0.036120877,-0.05413345,-0.0077516357,0.03129875,0.016264724,-0.011121187,0.016678393,0.0678958,-0.014889522,-0.019517552,-0.0059457496,0.018003179,-0.0072531863,0.081852585,-0.030259738,-0.05358454,0.020454926,-0.009424692,0.10091245,-0.012819172,-0.011656013,0.031110896,0.08538375,-0.026021762,0.047623295,0.04384129,-0.05093276,0.014624959,0.026958883,-0.004577614,0.02551685,-0.019736024,0.0063903728,-0.024696782,-0.041850932,0.027209712,0.0050771283,-0.028201208,-0.03125501,-0.001541728,-0.06142714,0.054404832,-0.007287412,0.0626698,0.03180891,-0.015927717,-0.04500077,-0.0022995493,0.0124429,-0.015138294,-0.026622217,0.008842311,-0.010787062,0.0010311591,0.0013770667,0.039663706,-0.02192414,-0.019322718,-0.051264115,-0.011981459,-0.03414706,-0.006800422,-0.028382706,0.043155897,-0.007300542,0.02638807,-0.019196216,0.06930381,0.020622948,0.014042502,0.06754253,-0.043790415,0.015294639,-0.040941276,0.028382495,-0.013607999,-0.040120583,0.008768077,-0.0101868035,-0.060808867,-0.013499631,0.059239235,0.035230562,-0.019976182,0.11870333,0.053272087,-0.08745547,-0.018802922,0.004555603,-0.028306624,0.0020639726,-0.018859716,0.026370116,0.0097041875,-0.0029847843,0.017317675,-0.0533067,0.038994376,-0.03322375,-0.052456018,0.050101582,-0.015041677,-0.03370439,-0.010739062,-0.039727744,-0.045931656,-0.08658831,0.05190126,0.055936754,-0.07664951,0.041408025,0.011245535,-0.012530026,0.024861438,0.016954603,0.017269976,0.06397909,-0.000105038154,0.036761504,0.006065827,-0.02139009,-0.025604198,0.010828613,0.023636553,0.04226646,0.041076142,0.025892248,-0.051934887,0.0029032188,0.040332098,-0.015436589,-0.057878137,0.005353198,0.064739525,-0.006427803,-0.024176747,0.011304507,0.03381613,0.08625095,-0.027353497,-0.039551895,-0.04934357,-0.016709028,0.024133967,0.00441431,-0.048314437,0.040782917,0.026620803,-0.02146332,0.030112874,-0.027528606,-0.016772546,0.005690125,-0.0047134855,-0.036793064,0.04092668,-0.02411072,0.023851473,0.07727627,-0.006492274,-0.0025583038,0.0017014288,-0.0541687,-0.010395329,0.031044465,-0.0536995,0.029957417,-0.040688735,-0.037072316,0.01663893,-0.04231374,-0.030213326,0.0061428403,-0.06634084,0.06036701,0.016658397,0.024410319,-0.03309207,-0.03735754,-0.04359427,-0.013476715,0.00078163255,0.033615876,0.022759296,-0.003551954,0.017715035,-0.0072518513,0.033236742,-0.0070533687,-0.05334901,-0.014660441,0.0025560227,0.03979979,-0.00433087,-0.018232862,-0.017161474,0.008870558,0.021989124,0.078787796,-0.009815632,0.022819351,0.020795409,0.028896132,-0.0061202813,0.012352534,-0.009014175,0.0024110335]::vector(768))\n", + " ```\n", + "
\n", + "\n", + "Here is how this table mapped to `PostgresVectorStore`:\n", + "\n", + "- **`id_column=\"product_id\"`**: ID column uniquely identifies each row in the products table.\n", + "\n", + "- **`content_column=\"description\"`**: The `description` column contains text descriptions of each product. This text is used by the `embedding_service` to create vectors that go in embedding_column and represent the semantic meaning of each description.\n", + "\n", + "- **`embedding_column=\"embed\"`**: The `embed` column stores the vectors created from the product descriptions. These vectors are used to find products with similar descriptions.\n", + "\n", + "- **`metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"]`**: These columns are treated as metadata for each product. Metadata provides additional information about a product, such as its name, category, price, quantity available, SKU (Stock Keeping Unit), and an image URL. This information is useful for displaying product details in search results or for filtering and categorization.\n", + "\n", + "- **`metadata_json_column=\"metadata\"`**: The `metadata` column can store any additional information about the products in a flexible JSON format. This allows for storing varied and complex data that doesn't fit into the standard columns.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set an existing table name\n", + "TABLE_NAME = \"products\"\n", + "# SCHEMA_NAME = \"my_schema\"\n", + "\n", + "# Initialize PostgresVectorStore\n", + "custom_store = await PostgresVectorStore.create(\n", + " engine=engine,\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " embedding_service=embedding,\n", + " # Connect to existing VectorStore by customizing below column names\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: \n", + "\n", + "1. Optional: If the `embed` column is newly created or has different dimensions than supported by embedding model, it is required to one-time add the embeddings for the old records, like this: \n", + "\n", + " `ALTER TABLE products ADD COLUMN embed vector(768) DEFAULT NULL`\n", + "\n", + "1. For new records, added via `VectorStore` embeddings are automatically generated." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hybrid Search with PostgresVectorStore\n", + "\n", + "A Hybrid Search combines multiple lookup strategies to provide more comprehensive and relevant search results. Specifically, it leverages both dense embedding vector search (for semantic similarity) and TSV (Text Search Vector) based keyword search (for lexical matching). This approach is particularly powerful for applications requiring efficient searching through customized text and metadata, especially when a specialized embedding model isn't feasible or necessary.\n", + "\n", + "By integrating both semantic and lexical capabilities, hybrid search helps overcome the limitations of each individual method:\n", + "* **Semantic Search**: Excellent for understanding the meaning of a query, even if the exact keywords aren't present. However, it can sometimes miss highly relevant documents that contain the precise keywords but have a slightly different semantic context.\n", + "* **Keyword Search**: Highly effective for finding documents with exact keyword matches and is generally fast. Its weakness lies in its inability to understand synonyms, misspellings, or conceptual relationships." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hybrid Search Config\n", + "\n", + "You can take advantage of hybrid search with PostgresVectorStore using the `HybridSearchConfig`.\n", + "\n", + "With a `HybridSearchConfig` provided, the `PostgresVectorStore` class can efficiently manage a hybrid search vector store using Cloud SQL Postgres as the backend, automatically handling the creation and population of the necessary TSV columns when possible." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Building the config\n", + "\n", + "Here are the parameters to the hybrid search config:\n", + "* **tsv_column:** The column name for TSV column. Default: `_tsv`\n", + "* **tsv_lang:** Value representing a supported language. Default: `pg_catalog.english`\n", + "* **fts_query:** If provided, this would be used for secondary retrieval instead of user provided query.\n", + "* **fusion_function:** Determines how the results are to be merged, default is equal weighted sum ranking.\n", + "* **fusion_function_parameters:** Parameters for the fusion function\n", + "* **primary_top_k:** Max results fetched for primary retrieval. Default: `4`\n", + "* **secondary_top_k:** Max results fetched for secondary retrieval. Default: `4`\n", + "* **index_name:** Name of the index built on the `tsv_column`\n", + "* **index_type:** GIN or GIST. Default: `GIN`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here is an example `HybridSearchConfig`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_google_cloud_sql_pg import (\n", + " HybridSearchConfig,\n", + " reciprocal_rank_fusion,\n", + ")\n", + "\n", + "hybrid_search_config = HybridSearchConfig(\n", + " tsv_column=\"hybrid_description\",\n", + " tsv_lang=\"pg_catalog.english\",\n", + " fusion_function=reciprocal_rank_fusion,\n", + " fusion_function_parameters={\n", + " \"rrf_k\": 60,\n", + " \"fetch_top_k\": 10,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note:** In this case, we have mentioned the fusion function to be a `reciprocal rank fusion` but you can also use the `weighted_sum_ranking`.\n", + "\n", + "Make sure to use the right fusion function parameters\n", + "\n", + "`reciprocal_rank_fusion`:\n", + "* rrf_k: The RRF parameter k. Defaults to 60\n", + "* fetch_top_k: The number of documents to fetch after merging the results. Defaults to 4\n", + "\n", + "`weighted_sum_ranking`:\n", + "* primary_results_weight: The weight for the primary source's scores. Defaults to 0.5\n", + "* secondary_results_weight: The weight for the secondary source's scores. Defaults to 0.5\n", + "* fetch_top_k: The number of documents to fetch after merging the results. Defaults to 4\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Let's assume we are using the previously mentioned table [`products`](#create-a-vector-store-using-existing-table), which stores product details for an eComm venture.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With a new hybrid search table\n", + "To create a new postgres table with the tsv column, specify the hybrid search config during the initialization of the vector store.\n", + "\n", + "In this case, all the similarity searches will make use of hybrid search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "TABLE_NAME = \"hybrid_search_products\"\n", + "VECTOR_SIZE = 768\n", + "\n", + "await engine.ainit_vectorstore_table(\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " vector_size=VECTOR_SIZE,\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + " hybrid_search_config=hybrid_search_config,\n", + " store_metadata=True,\n", + ")\n", + "\n", + "vs_hybrid = await PostgresVectorStore.create(\n", + " engine,\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " embedding_service=embedding,\n", + " # Connect to existing VectorStore by customizing below column names\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + " hybrid_search_config=hybrid_search_config,\n", + ")\n", + "\n", + "# Fetch documents from the previously created store to fetch product documents\n", + "docs = await custom_store.asimilarity_search(\"products\", k=5)\n", + "# Add data normally to the hybrid search vector store, which will also add the tsv values in tsv_column\n", + "await vs_hybrid.aadd_documents(docs)\n", + "\n", + "# Use hybrid search\n", + "hybrid_docs = await vs_hybrid.asimilarity_search(\"products\", k=5)\n", + "print(hybrid_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With a pre-existing table\n", + "\n", + "If a hybrid search config is **NOT** provided during `init_vectorstore_table` while creating a table, the table will not contain a tsv_column. In this case you can still take advantage of hybrid search using the `HybridSearchConfig`.\n", + "\n", + "The specified TSV column is not present but the TSV vectors are created dynamically on-the-go for hybrid search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the existing table name\n", + "TABLE_NAME = \"products\"\n", + "# SCHEMA_NAME = \"my_schema\"\n", + "\n", + "hybrid_search_config = HybridSearchConfig(\n", + " tsv_lang=\"pg_catalog.english\",\n", + " fusion_function=reciprocal_rank_fusion,\n", + " fusion_function_parameters={\n", + " \"rrf_k\": 60,\n", + " \"fetch_top_k\": 10,\n", + " },\n", + ")\n", + "\n", + "# Initialize PostgresVectorStore with the hybrid search config\n", + "custom_hybrid_store = await PostgresVectorStore.create(\n", + " engine,\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " embedding_service=embedding,\n", + " # Connect to existing VectorStore by customizing below column names\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + " hybrid_search_config=hybrid_search_config,\n", + ")\n", + "\n", + "# Use hybrid search\n", + "hybrid_docs = await custom_hybrid_store.asimilarity_search(\"products\", k=5)\n", + "print(hybrid_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case, all the similarity searches will make use of hybrid search." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Applying Hybrid Search to Specific Queries\n", + "\n", + "To use hybrid search only for certain queries, omit the configuration during initialization and pass it directly to the search method when needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use hybrid search\n", + "hybrid_docs = await custom_store.asimilarity_search(\n", + " \"products\", k=5, hybrid_search_config=hybrid_search_config\n", + ")\n", + "print(hybrid_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hybrid Search Index\n", + "\n", + "Optionally, if you have created a Cloud SQL PG table with a tsv_column, you can create an index." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "await vs_hybrid.aapply_hybrid_search_index()" + ] } ], "metadata": { From 566d80b2015af2df22001f1f5f82e9bf2eabea26 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 20 Aug 2025 20:47:40 +0200 Subject: [PATCH 12/61] chore(config): migrate config .github/renovate.json5 (#314) Co-authored-by: Averi Kitsch --- .github/renovate.json5 | 75 +++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 38 deletions(-) diff --git a/.github/renovate.json5 b/.github/renovate.json5 index 995819bb..2cc3ed8f 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -1,49 +1,48 @@ { - "extends": [ - "config:base", // https://docs.renovatebot.com/presets-config/#configbase - ":semanticCommitTypeAll(chore)", // https://docs.renovatebot.com/presets-default/#semanticcommittypeallarg0 - ":ignoreUnstable", // https://docs.renovatebot.com/presets-default/#ignoreunstable - "group:allNonMajor", // https://docs.renovatebot.com/presets-group/#groupallnonmajor - ":separateMajorReleases", // https://docs.renovatebot.com/presets-default/#separatemajorreleases - ":prConcurrentLimitNone", // View complete backlog as PRs. https://docs.renovatebot.com/presets-default/#prconcurrentlimitnone - ":prHourlyLimitNone", // https://docs.renovatebot.com/presets-default/#prhourlylimitnone - ":preserveSemverRanges", + extends: [ + 'config:recommended', + ':semanticCommitTypeAll(chore)', + ':ignoreUnstable', + 'group:allNonMajor', + ':separateMajorReleases', + ':prConcurrentLimitNone', + ':prHourlyLimitNone', + ':preserveSemverRanges', ], - "ignorePaths": [".kokoro/**"], // Dependabot will make security updates - - // Give ecosystem time to catch up. - // npm allows maintainers to unpublish a release up to 3 days later. - // https://docs.renovatebot.com/configuration-options/#minimumreleaseage - "minimumReleaseAge": "3", - - // Create PRs, but do not update them without manual action. - // Reduces spurious retesting in repositories that have many PRs at a time. - // https://docs.renovatebot.com/configuration-options/#rebasewhen - "rebaseWhen": "never", - - // Organizational processes. - // https://docs.renovatebot.com/configuration-options/#dependencydashboardlabels - "dependencyDashboardLabels": [ - "type: process", + ignorePaths: [ + '.kokoro/**', ], - "packageRules": [ - + minimumReleaseAge: '3', + rebaseWhen: 'never', + dependencyDashboardLabels: [ + 'type: process', + ], + packageRules: [ { - "groupName": "GitHub Actions", - "matchManagers": ["github-actions"], - "pinDigests": true, + groupName: 'GitHub Actions', + matchManagers: [ + 'github-actions', + ], + pinDigests: true, }, - - // Python Specific { - "matchPackageNames": ["pytest"], - "matchUpdateTypes": ["minor", "major"] + matchPackageNames: [ + 'pytest', + ], + matchUpdateTypes: [ + 'minor', + 'major', + ], }, { - "groupName": "python-nonmajor", - "matchLanguages": ["python"], - "matchUpdateTypes": ["minor", "patch"], + groupName: 'python-nonmajor', + matchCategories: [ + 'python', + ], + matchUpdateTypes: [ + 'minor', + 'patch', + ], }, - ], } From 56188d2c244a2e828d0b91dbdea9d53c15547e4e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 21 Aug 2025 23:09:51 +0200 Subject: [PATCH 13/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 2f77d31 (#328) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 99bae1f1..78aec84e 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@3423e9aea159d63dee57d11d2667358ae233c081 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@2f77d3151b53d3a38a7385bcf8cc1efeeb7dd364 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From bb3c02ad76c924cf2edcef6cb6b6b71b84ca2e18 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Thu, 21 Aug 2025 23:03:34 +0000 Subject: [PATCH 14/61] chore: release 0.15.0 (#331) Release-As: 0.15.0 From 15df66f48e15ba791e753237c8ee60213c303860 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 22 Aug 2025 01:06:21 +0200 Subject: [PATCH 15/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 9e60b1f (#332) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 78aec84e..7ae03df9 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@2f77d3151b53d3a38a7385bcf8cc1efeeb7dd364 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9e60b1f5145682e3ed761b7d64d3f98d3d244329 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From ee66c188ce48516fb45eb2776a4646889f909d02 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Fri, 22 Aug 2025 16:53:39 +0000 Subject: [PATCH 16/61] chore: Update renovate config (#330) Similar to https://github.com/googleapis/langchain-google-alloydb-pg-python/pull/458 Co-authored-by: Averi Kitsch --- .github/renovate.json5 | 42 +++++++++++++++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/.github/renovate.json5 b/.github/renovate.json5 index 2cc3ed8f..9f59ed09 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -13,11 +13,19 @@ '.kokoro/**', ], minimumReleaseAge: '3', - rebaseWhen: 'never', + rebaseWhen: 'conflicted', dependencyDashboardLabels: [ 'type: process', ], packageRules: [ + { + matchFileNames: ["samples/index_tuning_sample/**"], + groupName: "samples-index_tuning_sample", + }, + { + matchFileNames: ["samples/langchain_on_vertexai/**"], + groupName: "samples-langchain_on_vertexai", + }, { groupName: 'GitHub Actions', matchManagers: [ @@ -35,14 +43,30 @@ ], }, { - groupName: 'python-nonmajor', - matchCategories: [ - 'python', - ], - matchUpdateTypes: [ - 'minor', - 'patch', - ], + "description": "Disable numpy updates for python 3.10 in requirements.txt", + "matchPackageNames": ["numpy"], + "matchCurrentVersion": "<=2.2.6", + "enabled": false + }, + { + "description": "Disable numpy updates for python <=3.9 in requirements.txt", + "matchPackageNames": ["numpy"], + "matchCurrentVersion": "<=2.0.2", + "enabled": false }, + { + "description": "Disable numpy updates for python 3.10 in pyproject.toml", + "matchFileNames": ["pyproject.toml"], + "matchPackageNames": ["numpy"], + "matchCurrentValue": ">=1.24.4, <=2.2.6", + "enabled": false + }, + { + "description": "Disable numpy updates for python <=3.9 in pyproject.toml", + "matchFileNames": ["pyproject.toml"], + "matchPackageNames": ["numpy"], + "matchCurrentValue": ">=1.24.4, <=2.0.2", + "enabled": false + } ], } From 4ffd46a6c0ba880e5ecf390d784f60991f342c9d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 4 Sep 2025 23:27:34 +0200 Subject: [PATCH 17/61] chore(deps): update samples-index_tuning_sample (#333) --- samples/index_tuning_sample/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 94fe05d4..35c0439f 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-community==0.3.27 -langchain-google-cloud-sql-pg==0.14.0 -langchain-google-vertexai==2.0.27 +langchain-community==0.3.29 +langchain-google-cloud-sql-pg==0.14.1 +langchain-google-vertexai==2.0.28 From 1c00981eb79c514a04e2e94b554ea0ed1a609672 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 25 Sep 2025 20:34:12 +0200 Subject: [PATCH 18/61] chore(deps): update dependency langchain-google-vertexai to v2.1.2 (#339) --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 35c0439f..42e720ac 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-community==0.3.29 langchain-google-cloud-sql-pg==0.14.1 -langchain-google-vertexai==2.0.28 +langchain-google-vertexai==2.1.2 From 5614bfcd077a66c6e92b1a43ff63b34b87c8c9d0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 29 Sep 2025 13:30:55 +0100 Subject: [PATCH 19/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 50fcca8 (#336) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 7ae03df9..52f297d6 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9e60b1f5145682e3ed761b7d64d3f98d3d244329 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@50fcca800a8725cdb979e32df13dd399864acfc2 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From d6a973f63e54a77b628b571a95644089a2b7a2a0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 29 Sep 2025 13:45:53 +0100 Subject: [PATCH 20/61] chore(deps): update github actions to v6 (#337) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 90ff4b84..569858cd 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,7 +12,7 @@ jobs: - name: Checkout uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: python-version: "3.10" - name: Install nox @@ -28,7 +28,7 @@ jobs: - name: Checkout uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: python-version: "3.10" - name: Install nox diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index ba304f22..8a33dc15 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 with: python-version: "3.11" From 283c211bb7105c8c16e6e8d7a0e5de9570fa18b4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 29 Sep 2025 16:55:47 +0100 Subject: [PATCH 21/61] chore(deps): update dependency pytest-cov to v7 (#338) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 38db0f27..c234d0c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ test = [ "mypy==1.17.1", "pytest-asyncio==0.26.0", "pytest==8.4.1", - "pytest-cov==6.2.1", + "pytest-cov==7.0.0", "langchain-tests==0.3.20", "langgraph==0.6.5" ] From 200e0385b325026dba58c3d49b2bfa7ffdcc3703 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 30 Sep 2025 10:02:55 +0100 Subject: [PATCH 22/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to c9b7ad5 (#341) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 52f297d6..97b1773a 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@50fcca800a8725cdb979e32df13dd399864acfc2 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@c9b7ad54573ff86f74c1902d731d47e37c76dc3b with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 184710f9a9e9390f39a2d2c5fecc2d5d2d7e4660 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Oct 2025 10:08:15 +0100 Subject: [PATCH 23/61] chore(deps): update all non-major dependencies (#334) * chore(deps): update all non-major dependencies * Update requirements.txt * Update requirements.txt --------- Co-authored-by: Averi Kitsch --- pyproject.toml | 6 +++--- requirements.txt | 2 +- samples/requirements.txt | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c234d0c5..b10f7305 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,10 +46,10 @@ test = [ "isort==6.0.1", "mypy==1.17.1", "pytest-asyncio==0.26.0", - "pytest==8.4.1", + "pytest==8.4.2", "pytest-cov==7.0.0", - "langchain-tests==0.3.20", - "langgraph==0.6.5" + "langchain-tests==0.3.21", + "langgraph==0.6.6" ] [build-system] diff --git a/requirements.txt b/requirements.txt index 55636030..bb7ef0b1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ cloud-sql-python-connector[asyncpg]==1.18.4 numpy==2.3.2; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" -langgraph==0.6.5 +langgraph==0.6.6 langchain-postgres==0.0.15 diff --git a/samples/requirements.txt b/samples/requirements.txt index db0df6bc..b6b27ad1 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,5 +1,5 @@ google-cloud-aiplatform[reasoningengine,langchain]==1.97.0 google-cloud-resource-manager==1.14.2 -langchain-community==0.3.27 -langchain-google-cloud-sql-pg==0.14.0 +langchain-community==0.3.29 +langchain-google-cloud-sql-pg==0.14.1 langchain-google-vertexai==2.0.27 From 0d107c93c444017dfcafd636fcda3d8c78407da5 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Oct 2025 14:50:27 +0100 Subject: [PATCH 24/61] chore(deps): update dependency langchain-community to v0.3.30 (#340) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 42e720ac..345b0682 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-community==0.3.29 +langchain-community==0.3.30 langchain-google-cloud-sql-pg==0.14.1 langchain-google-vertexai==2.1.2 From f590017f9806d6bbf97c80cb70590c8294c16cb8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 2 Oct 2025 18:22:42 +0100 Subject: [PATCH 25/61] chore(deps): update samples-langchain_on_vertexai (#335) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- samples/langchain_on_vertexai/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index db0df6bc..317e45f9 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.97.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.118.0 google-cloud-resource-manager==1.14.2 -langchain-community==0.3.27 -langchain-google-cloud-sql-pg==0.14.0 -langchain-google-vertexai==2.0.27 +langchain-community==0.3.30 +langchain-google-cloud-sql-pg==0.14.1 +langchain-google-vertexai==2.1.2 From 30f3dc6f235c0e2126d29edd8cba6cd351f16b74 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 2 Oct 2025 18:44:36 +0100 Subject: [PATCH 26/61] chore(deps): update all non-major dependencies (#342) * chore(deps): update all non-major dependencies * Update requirements.txt --------- Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- pyproject.toml | 10 +++++----- requirements.txt | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b10f7305..2e44c825 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,14 +42,14 @@ langgraph = [ "langgraph-checkpoint>=2.0.9, <3.0.0" ] test = [ - "black[jupyter]==25.1.0", - "isort==6.0.1", - "mypy==1.17.1", + "black[jupyter]==25.9.0", + "isort==6.1.0", + "mypy==1.18.2", "pytest-asyncio==0.26.0", "pytest==8.4.2", "pytest-cov==7.0.0", - "langchain-tests==0.3.21", - "langgraph==0.6.6" + "langchain-tests==0.3.22", + "langgraph==0.6.8" ] [build-system] diff --git a/requirements.txt b/requirements.txt index bb7ef0b1..a87bef7c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ cloud-sql-python-connector[asyncpg]==1.18.4 -numpy==2.3.2; python_version >= "3.11" +numpy==2.3.3; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" -langgraph==0.6.6 +langgraph==0.6.8 langchain-postgres==0.0.15 From a1dbd588996ebfaed5d0275e8fda4264ae790a8c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 6 Oct 2025 18:24:05 +0100 Subject: [PATCH 27/61] chore(deps): update dependency google-cloud-aiplatform to v1.119.0 (#344) --- samples/langchain_on_vertexai/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index 317e45f9..68d34946 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.118.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.119.0 google-cloud-resource-manager==1.14.2 langchain-community==0.3.30 langchain-google-cloud-sql-pg==0.14.1 From adfca0c376b3f347e00fb37c45f45f6cd26c9b19 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 13 Oct 2025 20:07:33 +0100 Subject: [PATCH 28/61] chore(deps): update dependency langchain-community to v0.3.31 (#345) * chore(deps): update dependency langchain-community to v0.3.31 * fix tests * lint --------- Co-authored-by: Twisha Bansal --- samples/index_tuning_sample/requirements.txt | 2 +- tests/test_async_checkpoint.py | 2 ++ tests/test_checkpoint.py | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 345b0682..38630c9b 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-community==0.3.30 +langchain-community==0.3.31 langchain-google-cloud-sql-pg==0.14.1 langchain-google-vertexai==2.1.2 diff --git a/tests/test_async_checkpoint.py b/tests/test_async_checkpoint.py index c78fc726..821b27c0 100644 --- a/tests/test_async_checkpoint.py +++ b/tests/test_async_checkpoint.py @@ -78,6 +78,7 @@ "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } @@ -212,6 +213,7 @@ def test_data() -> dict[str, Any]: "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } chkpnt_1: Checkpoint = empty_checkpoint() chkpnt_2: Checkpoint = create_checkpoint(chkpnt_1, {}, 1) diff --git a/tests/test_checkpoint.py b/tests/test_checkpoint.py index 05545168..a8b0f0d0 100644 --- a/tests/test_checkpoint.py +++ b/tests/test_checkpoint.py @@ -59,6 +59,7 @@ "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } @@ -235,6 +236,7 @@ def test_data() -> dict[str, Any]: "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } chkpnt_1: Checkpoint = empty_checkpoint() chkpnt_2: Checkpoint = create_checkpoint(chkpnt_1, {}, 1) From 8d8564b5c79593ce4177a7f9512547b6b870818b Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Wed, 15 Oct 2025 18:51:37 +0000 Subject: [PATCH 29/61] chore: Update renovate to use `feat` type while updating langchain-postgres deps (#348) chore: Update renovate to use feat type while updating langchain-postgres deps --- .github/renovate.json5 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/renovate.json5 b/.github/renovate.json5 index 9f59ed09..fdc02e29 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -67,6 +67,12 @@ "matchPackageNames": ["numpy"], "matchCurrentValue": ">=1.24.4, <=2.0.2", "enabled": false + }, + { + "description": "Use feat commit type for LangChain Postgres dependency updates", + "matchPackageNames": ["langchain-postgres"], + "semanticCommitType": "feat", + "groupName": "langchain-postgres" } ], } From 34cc17fe9bdef500247e697ac706ad8c950d5a4c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 15 Oct 2025 23:43:26 +0100 Subject: [PATCH 30/61] chore(deps): update samples-langchain_on_vertexai (#346) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Co-authored-by: Averi Kitsch --- samples/langchain_on_vertexai/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index 68d34946..f841a4c3 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.119.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.120.0 google-cloud-resource-manager==1.14.2 -langchain-community==0.3.30 +langchain-community==0.3.31 langchain-google-cloud-sql-pg==0.14.1 langchain-google-vertexai==2.1.2 From d82ac33d6e36918ad9eef6389239a5fa52dd51cb Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 Oct 2025 03:11:18 +0100 Subject: [PATCH 31/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 58b58eb (#349) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 97b1773a..b177bae9 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@c9b7ad54573ff86f74c1902d731d47e37c76dc3b + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@58b58ebd4f5ea141d72f793c4c06735e7a733e31 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 0509559cfc9c56401617a2d0cd0e1aa859d8781a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 Oct 2025 10:34:59 +0100 Subject: [PATCH 32/61] chore(deps): update all non-major dependencies (#343) * chore(deps): update all non-major dependencies * Update requirements.txt * Update requirements.txt --------- Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Co-authored-by: Averi Kitsch --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2e44c825..c092aa60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,7 @@ test = [ "pytest==8.4.2", "pytest-cov==7.0.0", "langchain-tests==0.3.22", - "langgraph==0.6.8" + "langgraph==0.6.10" ] [build-system] diff --git a/requirements.txt b/requirements.txt index a87bef7c..02eaa6f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ cloud-sql-python-connector[asyncpg]==1.18.4 numpy==2.3.3; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" -langgraph==0.6.8 +langgraph==0.6.10 langchain-postgres==0.0.15 From 1a85c2d24387edd6c8d4be87bdff4af77d78f06b Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 Oct 2025 10:37:33 +0100 Subject: [PATCH 33/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 2728db1 (#351) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index b177bae9..4f1bf66e 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@58b58ebd4f5ea141d72f793c4c06735e7a733e31 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@2728db12d6fcc88acbeecb1201d54fa77a95e407 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 76dab16df89481e2d19022cb8a5f599d2fca66d8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 16 Oct 2025 18:18:59 +0100 Subject: [PATCH 34/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to d5dd585 (#352) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 4f1bf66e..28b65edb 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@2728db12d6fcc88acbeecb1201d54fa77a95e407 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@d5dd58511d12347d8a233d492b3ad15e6d2b3721 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 1c216065907374320eea442cd7cdfe7ed59540c6 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Fri, 17 Oct 2025 19:19:32 +0000 Subject: [PATCH 35/61] chore: Update dependency based on python version (#357) * chore: Update dependency based on python version chore: Update dependency based on python version * Update isort version constraints for Python versions --- .github/renovate.json5 | 7 +++++++ pyproject.toml | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/renovate.json5 b/.github/renovate.json5 index fdc02e29..93a38cf3 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -73,6 +73,13 @@ "matchPackageNames": ["langchain-postgres"], "semanticCommitType": "feat", "groupName": "langchain-postgres" + }, + { + "description": "Disable isort updates for python <=3.9 in pyproject.toml", + "matchFileNames": ["pyproject.toml"], + "matchPackageNames": ["isort"], + "matchCurrentValue": "==6.1.0", + "enabled": false } ], } diff --git a/pyproject.toml b/pyproject.toml index c092aa60..cfb4d532 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,8 @@ langgraph = [ ] test = [ "black[jupyter]==25.9.0", - "isort==6.1.0", + "isort==6.1.0; python_version == '3.9'", + "isort==7.0.0; python_version >= '3.10'", "mypy==1.18.2", "pytest-asyncio==0.26.0", "pytest==8.4.2", From ce1a43b6ef0a411be623493de1e139ab29d1b9f6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 21 Oct 2025 06:07:33 +0100 Subject: [PATCH 36/61] chore(deps): update dependency langchain-community to v0.4 (#361) --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 38630c9b..331bc988 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-community==0.3.31 +langchain-community==0.4 langchain-google-cloud-sql-pg==0.14.1 langchain-google-vertexai==2.1.2 From adb3f32b60e1e3ce3e725ee97da280d09c531d42 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 21 Oct 2025 06:27:37 +0100 Subject: [PATCH 37/61] chore(deps): update dependency langchain-google-vertexai to v3 (#358) Co-authored-by: Twisha Bansal <58483338+twishabansal@users.noreply.github.com> --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 331bc988..266b871f 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-community==0.4 langchain-google-cloud-sql-pg==0.14.1 -langchain-google-vertexai==2.1.2 +langchain-google-vertexai==3.0.0 From ca4232543ca83374f14dd6e3ae7d12ed4a8e7c52 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 24 Oct 2025 05:32:31 +0100 Subject: [PATCH 38/61] chore(deps): update dependency langchain-google-vertexai to v3.0.1 (#364) --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 266b871f..14e89a23 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-community==0.4 langchain-google-cloud-sql-pg==0.14.1 -langchain-google-vertexai==3.0.0 +langchain-google-vertexai==3.0.1 From 8f8f17c91d6545c9c30d2aee018439e9c43f4bb7 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 27 Oct 2025 17:05:46 +0000 Subject: [PATCH 39/61] chore(deps): update dependency langchain-community to v0.4.1 (#365) --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 14e89a23..d2c09cdf 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-community==0.4 +langchain-community==0.4.1 langchain-google-cloud-sql-pg==0.14.1 langchain-google-vertexai==3.0.1 From e773505453683dad5681e6155831b710cbc7fcc1 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Fri, 31 Oct 2025 18:25:55 +0000 Subject: [PATCH 40/61] feat(deps): Update langchain-postgres to v0.0.16 (#366) * feat(deps): Update langchain-postgres to v0.0.16 * Update langchain-postgres version to 0.0.16 * Update requirements.txt --- pyproject.toml | 2 +- requirements.txt | 2 +- tests/test_async_vectorstore_search.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cfb4d532..a68d6717 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ dependencies = [ "numpy>=1.24.4, <3.0.0; python_version >= '3.11'", "numpy>=1.24.4, <=2.2.6; python_version == '3.10'", "numpy>=1.24.4, <=2.0.2; python_version <= '3.9'", - "langchain-postgres>=0.0.15", + "langchain-postgres>=0.0.16", ] classifiers = [ diff --git a/requirements.txt b/requirements.txt index 02eaa6f8..d2aa371f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,4 @@ numpy==2.3.3; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" langgraph==0.6.10 -langchain-postgres==0.0.15 +langchain-postgres==0.0.16 diff --git a/tests/test_async_vectorstore_search.py b/tests/test_async_vectorstore_search.py index 7e4effdf..9f496503 100644 --- a/tests/test_async_vectorstore_search.py +++ b/tests/test_async_vectorstore_search.py @@ -482,7 +482,7 @@ async def test_hybrid_search_weighted_sum_vector_bias( result_ids = [doc.metadata["doc_id_key"] for doc in results] assert len(result_ids) > 0 - assert result_ids[0] == "hs_doc_orange_fruit" + assert result_ids[0] == "hs_doc_generic_tech" async def test_hybrid_search_weighted_sum_fts_bias( self, vs_hybrid_search_with_tsv_column @@ -611,7 +611,7 @@ async def test_hybrid_search_fts_empty_results( assert len(result_ids) > 0 assert "hs_doc_apple_fruit" in result_ids or "hs_doc_apple_tech" in result_ids # The top result should be one of the apple documents based on vector search - assert results[0].metadata["doc_id_key"].startswith("hs_doc_unrelated_cat") + assert results[0].metadata["doc_id_key"].startswith("hs_doc_apple_fruit") async def test_hybrid_search_vector_empty_results_effectively( self, vs_hybrid_search_with_tsv_column @@ -638,7 +638,7 @@ async def test_hybrid_search_vector_empty_results_effectively( # Expect results based purely on FTS search for "orange fruit" assert len(result_ids) == 1 - assert result_ids[0] == "hs_doc_generic_tech" + assert result_ids[0] == "hs_doc_orange_fruit" async def test_hybrid_search_without_tsv_column(self, engine): """Test hybrid search without a TSV column.""" From 413243fb2817427aa610191718843fad9354e386 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 4 Nov 2025 07:15:00 +0000 Subject: [PATCH 41/61] chore(deps): update dependency langgraph-checkpoint to v3 (#363) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a68d6717..c8232d20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ Changelog = "https://github.com/googleapis/langchain-google-cloud-sql-pg-python/ [project.optional-dependencies] langgraph = [ - "langgraph-checkpoint>=2.0.9, <3.0.0" + "langgraph-checkpoint>=3.0.0, <3.1.0" ] test = [ "black[jupyter]==25.9.0", From fb69bc277eb7cf26f135bde49edf63e6a91fb509 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 18 Nov 2025 12:15:29 +0000 Subject: [PATCH 42/61] chore(deps): update dependency langchain-google-vertexai to v3.0.3 (#367) --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index d2c09cdf..cb85477a 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-community==0.4.1 langchain-google-cloud-sql-pg==0.14.1 -langchain-google-vertexai==3.0.1 +langchain-google-vertexai==3.0.3 From aa988cab813fb8cdf60688bb5ea69f514ec3099a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 18 Nov 2025 12:52:38 +0000 Subject: [PATCH 43/61] chore(deps): update github actions (#354) --- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- .github/workflows/schedule_reporter.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 569858cd..cb3b7062 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup Python uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: @@ -26,7 +26,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup Python uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 8a33dc15..f250dc0d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup Python uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 28b65edb..76950bc1 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@d5dd58511d12347d8a233d492b3ad15e6d2b3721 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@1333538681861eec5dd2a944171a9ea8a6306520 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From c5342a61a6c76f92897aa9846125946a2ada0c30 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 25 Nov 2025 06:58:28 +0000 Subject: [PATCH 44/61] chore(deps): update github actions (#369) --- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- .github/workflows/schedule_reporter.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index cb3b7062..87fa0468 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,7 +12,7 @@ jobs: - name: Checkout uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup Python - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: python-version: "3.10" - name: Install nox @@ -28,7 +28,7 @@ jobs: - name: Checkout uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup Python - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: python-version: "3.10" - name: Install nox diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f250dc0d..256dec0c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -34,7 +34,7 @@ jobs: uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Setup Python - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.11" diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 76950bc1..ffa47e8e 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@1333538681861eec5dd2a944171a9ea8a6306520 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9eca5220f8f17a443f08b21e66dcdace0c5ac00f with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 26e67ecae4181339b569d6ddef436ac18cd82937 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 1 Dec 2025 21:17:50 +0000 Subject: [PATCH 45/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 01b6622 (#373) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index ffa47e8e..d3eed028 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9eca5220f8f17a443f08b21e66dcdace0c5ac00f + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@01b66222139586a87699ccccf938d1574ec94f85 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 0970e085065729d2ed9f5e3ff06d68bc6c79ebb8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 1 Dec 2025 21:42:44 +0000 Subject: [PATCH 46/61] chore(deps): update actions/checkout action to v6 (#370) Co-authored-by: Averi Kitsch --- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 87fa0468..36f543dd 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6 - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: @@ -26,7 +26,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6 - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 256dec0c..afe53954 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6 - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 From b3d8d82adecd1445014684564401278feacbc0ee Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Tue, 2 Dec 2025 19:08:45 +0000 Subject: [PATCH 47/61] chore: Fix LoopConnectorError by enforcing background loop execution for Async Classes tests (#377) * chore: Update tests for Async Classes * update tests for async loader * fix tests * fix lint * fix lint * fix lint * update connector dep * fix tests * fix tests --- requirements.txt | 2 +- tests/test_async_chatmessagehistory.py | 98 ++- tests/test_async_checkpoint.py | 149 +++-- tests/test_async_loader.py | 467 ++++++++------ tests/test_async_vectorstore.py | 303 +++++---- tests/test_async_vectorstore_from_methods.py | 169 +++-- tests/test_async_vectorstore_index.py | 189 +++--- tests/test_async_vectorstore_search.py | 612 +++++++++++-------- tests/test_engine.py | 120 ++-- tests/test_vectorstore.py | 2 +- 10 files changed, 1317 insertions(+), 794 deletions(-) diff --git a/requirements.txt b/requirements.txt index d2aa371f..9eacfe4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -cloud-sql-python-connector[asyncpg]==1.18.4 +cloud-sql-python-connector[asyncpg]==1.18.5 numpy==2.3.3; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" numpy==2.0.2; python_version <= "3.9" diff --git a/tests/test_async_chatmessagehistory.py b/tests/test_async_chatmessagehistory.py index e5443b11..585661a1 100644 --- a/tests/test_async_chatmessagehistory.py +++ b/tests/test_async_chatmessagehistory.py @@ -11,8 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -33,10 +35,23 @@ table_name_async = "message_store" + str(uuid.uuid4()) +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) @pytest_asyncio.fixture @@ -47,7 +62,10 @@ async def async_engine(): instance=instance_id, database=db_name, ) - await async_engine._ainit_chat_history_table(table_name=table_name_async) + await run_on_background( + async_engine, + async_engine._ainit_chat_history_table(table_name=table_name_async), + ) yield async_engine # use default table for AsyncPostgresChatMessageHistory query = f'DROP TABLE IF EXISTS "{table_name_async}"' @@ -59,14 +77,19 @@ async def async_engine(): async def test_chat_message_history_async( async_engine: PostgresEngine, ) -> None: - history = await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name_async + history = await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name_async + ), ) msg1 = HumanMessage(content="hi!") msg2 = AIMessage(content="whats up?") - await history.aadd_message(msg1) - await history.aadd_message(msg2) - messages = await history._aget_messages() + + await run_on_background(async_engine, history.aadd_message(msg1)) + await run_on_background(async_engine, history.aadd_message(msg2)) + + messages = await run_on_background(async_engine, history._aget_messages()) # verify messages are correct assert messages[0].content == "hi!" @@ -75,48 +98,71 @@ async def test_chat_message_history_async( assert type(messages[1]) is AIMessage # verify clear() clears message history - await history.aclear() - assert len(await history._aget_messages()) == 0 + await run_on_background(async_engine, history.aclear()) + messages_after_clear = await run_on_background( + async_engine, history._aget_messages() + ) + assert len(messages_after_clear) == 0 @pytest.mark.asyncio async def test_chat_message_history_sync_messages( async_engine: PostgresEngine, ) -> None: - history1 = await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name_async + history1 = await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name_async + ), ) - history2 = await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name_async + history2 = await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name_async + ), ) msg1 = HumanMessage(content="hi!") msg2 = AIMessage(content="whats up?") - await history1.aadd_message(msg1) - await history2.aadd_message(msg2) + await run_on_background(async_engine, history1.aadd_message(msg1)) + await run_on_background(async_engine, history2.aadd_message(msg2)) + + len_history1 = len(await run_on_background(async_engine, history1._aget_messages())) + len_history2 = len(await run_on_background(async_engine, history2._aget_messages())) - assert len(await history1._aget_messages()) == 2 - assert len(await history2._aget_messages()) == 2 + assert len_history1 == 2 + assert len_history2 == 2 # verify clear() clears message history - await history2.aclear() - assert len(await history2._aget_messages()) == 0 + await run_on_background(async_engine, history2.aclear()) + len_history2_after_clear = len( + await run_on_background(async_engine, history2._aget_messages()) + ) + assert len_history2_after_clear == 0 @pytest.mark.asyncio async def test_chat_table_async(async_engine): with pytest.raises(ValueError): - await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name="doesnotexist" + await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name="doesnotexist" + ), ) @pytest.mark.asyncio async def test_chat_schema_async(async_engine): table_name = "test_table" + str(uuid.uuid4()) - await async_engine._ainit_document_table(table_name=table_name) + await run_on_background( + async_engine, async_engine._ainit_document_table(table_name=table_name) + ) with pytest.raises(IndexError): - await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name + await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name + ), ) query = f'DROP TABLE IF EXISTS "{table_name}"' diff --git a/tests/test_async_checkpoint.py b/tests/test_async_checkpoint.py index 821b27c0..00d26b29 100644 --- a/tests/test_async_checkpoint.py +++ b/tests/test_async_checkpoint.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import re import uuid -from typing import Any, List, Literal, Optional, Sequence, Tuple, Union +from typing import Any, Coroutine, List, Literal, Optional, Sequence, Tuple, Union import pytest import pytest_asyncio @@ -107,18 +108,33 @@ def _AnyIdToolMessage(**kwargs: Any) -> ToolMessage: return message +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async with engine._pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + async def _impl(): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + return result_map.fetchall() + + return await run_on_background(engine, _impl()) @pytest_asyncio.fixture @@ -139,10 +155,15 @@ async def async_engine(): @pytest_asyncio.fixture async def checkpointer(async_engine): - await async_engine._ainit_checkpoint_table(table_name=table_name) - checkpointer = await AsyncPostgresSaver.create( + await run_on_background( + async_engine, async_engine._ainit_checkpoint_table(table_name=table_name) + ) + checkpointer = await run_on_background( async_engine, - table_name, # serde=JsonPlusSerializer + AsyncPostgresSaver.create( + async_engine, + table_name, # serde=JsonPlusSerializer + ), ) yield checkpointer @@ -160,7 +181,9 @@ async def test_checkpoint_async( } } # Verify if updated configuration after storing the checkpoint is correct - next_config = await checkpointer.aput(write_config, checkpoint, {}, {}) + next_config = await run_on_background( + async_engine, checkpointer.aput(write_config, checkpoint, {}, {}) + ) assert dict(next_config) == test_config # Verify if the checkpoint is stored correctly in the database @@ -258,7 +281,9 @@ async def test_checkpoint_aput_writes( ("test_channel1", {}), ("test_channel2", {}), ] - await checkpointer.aput_writes(config, writes, task_id="1") + await run_on_background( + async_engine, checkpointer.aput_writes(config, writes, task_id="1") + ) results = await afetch(async_engine, f'SELECT * FROM "{table_name_writes}"') assert len(results) == 2 @@ -277,9 +302,19 @@ async def test_checkpoint_alist( checkpoints = test_data["checkpoints"] metadata = test_data["metadata"] - await checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) - await checkpointer.aput(configs[2], checkpoints[2], metadata[1], {}) - await checkpointer.aput(configs[3], checkpoints[3], metadata[2], {}) + await run_on_background( + async_engine, checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) + ) + await run_on_background( + async_engine, checkpointer.aput(configs[2], checkpoints[2], metadata[1], {}) + ) + await run_on_background( + async_engine, checkpointer.aput(configs[3], checkpoints[3], metadata[2], {}) + ) + + # Helper to consume async iterator on background thread + async def consume_alist(config, filter): + return [c async for c in checkpointer.alist(config, filter=filter)] # call method / assertions query_1 = {"source": "input"} # search by 1 key @@ -290,26 +325,35 @@ async def test_checkpoint_alist( query_3: dict[str, Any] = {} # search by no keys, return all checkpoints query_4 = {"source": "update", "step": 1} # no match - search_results_1 = [c async for c in checkpointer.alist(None, filter=query_1)] + search_results_1 = await run_on_background( + async_engine, consume_alist(None, filter=query_1) + ) assert len(search_results_1) == 1 print(metadata[0]) print(search_results_1[0].metadata) assert search_results_1[0].metadata == metadata[0] - search_results_2 = [c async for c in checkpointer.alist(None, filter=query_2)] + search_results_2 = await run_on_background( + async_engine, consume_alist(None, filter=query_2) + ) assert len(search_results_2) == 1 assert search_results_2[0].metadata == metadata[1] - search_results_3 = [c async for c in checkpointer.alist(None, filter=query_3)] + search_results_3 = await run_on_background( + async_engine, consume_alist(None, filter=query_3) + ) assert len(search_results_3) == 3 - search_results_4 = [c async for c in checkpointer.alist(None, filter=query_4)] + search_results_4 = await run_on_background( + async_engine, consume_alist(None, filter=query_4) + ) assert len(search_results_4) == 0 # search by config (defaults to checkpoints across all namespaces) - search_results_5 = [ - c async for c in checkpointer.alist({"configurable": {"thread_id": "thread-2"}}) - ] + search_results_5 = await run_on_background( + async_engine, + consume_alist({"configurable": {"thread_id": "thread-2"}}, filter=None), + ) assert len(search_results_5) == 2 assert { search_results_5[0].config["configurable"]["checkpoint_ns"], @@ -353,6 +397,7 @@ def _llm_type(self) -> str: @pytest.mark.asyncio async def test_checkpoint_with_agent( + async_engine: PostgresEngine, checkpointer: AsyncPostgresSaver, ) -> None: # from the tests in https://github.com/langchain-ai/langgraph/blob/909190cede6a80bb94a2d4cfe7dedc49ef0d4127/libs/langgraph/tests/test_prebuilt.py @@ -360,8 +405,9 @@ async def test_checkpoint_with_agent( agent = create_react_agent(model, [], checkpointer=checkpointer) inputs = [HumanMessage("hi?")] - response = await agent.ainvoke( - {"messages": inputs}, config=thread_agent_config, debug=True + response = await run_on_background( + async_engine, + agent.ainvoke({"messages": inputs}, config=thread_agent_config, debug=True), ) expected_response = {"messages": inputs + [AIMessage(content="hi?", id="0")]} assert response == expected_response @@ -372,7 +418,9 @@ def _AnyIdHumanMessage(**kwargs: Any) -> HumanMessage: message.id = AnyStr() return message - saved = await checkpointer.aget_tuple(thread_agent_config) + saved = await run_on_background( + async_engine, checkpointer.aget_tuple(thread_agent_config) + ) assert saved is not None assert ( _AnyIdHumanMessage(content="hi?") @@ -392,6 +440,7 @@ def _AnyIdHumanMessage(**kwargs: Any) -> HumanMessage: @pytest.mark.asyncio async def test_checkpoint_aget_tuple( + async_engine: PostgresEngine, checkpointer: AsyncPostgresSaver, test_data: dict[str, Any], ) -> None: @@ -399,30 +448,48 @@ async def test_checkpoint_aget_tuple( checkpoints = test_data["checkpoints"] metadata = test_data["metadata"] - new_config = await checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) + new_config = await run_on_background( + async_engine, checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) + ) # Matching checkpoint - search_results_1 = await checkpointer.aget_tuple(new_config) + search_results_1 = await run_on_background( + async_engine, checkpointer.aget_tuple(new_config) + ) assert search_results_1.metadata == metadata[0] # type: ignore # No matching checkpoint - assert await checkpointer.aget_tuple(configs[0]) is None + assert ( + await run_on_background(async_engine, checkpointer.aget_tuple(configs[0])) + is None + ) @pytest.mark.asyncio async def test_metadata( + async_engine: PostgresEngine, checkpointer: AsyncPostgresSaver, test_data: dict[str, Any], ) -> None: - config = await checkpointer.aput( - test_data["configs"][0], - test_data["checkpoints"][0], - {"my_key": "abc"}, # type: ignore - {}, + # Wrap aput + config = await run_on_background( + async_engine, + checkpointer.aput( + test_data["configs"][0], + test_data["checkpoints"][0], + {"my_key": "abc"}, # type: ignore + {}, + ), + ) + tuple_result = await run_on_background( + async_engine, checkpointer.aget_tuple(config) + ) + assert tuple_result.metadata["my_key"] == "abc" # type: ignore + + async def consume_alist(config, filter): + return [c async for c in checkpointer.alist(config, filter=filter)] + + alist_results = await run_on_background( + async_engine, consume_alist(None, filter={"my_key": "abc"}) ) - assert (await checkpointer.aget_tuple(config)).metadata["my_key"] == "abc" # type: ignore - assert [c async for c in checkpointer.alist(None, filter={"my_key": "abc"})][ - 0 - ].metadata[ - "my_key" # type: ignore - ] == "abc" # type: ignore + assert alist_results[0].metadata["my_key"] == "abc" # type: ignore diff --git a/tests/test_async_loader.py b/tests/test_async_loader.py index c29a82f7..61316519 100644 --- a/tests/test_async_loader.py +++ b/tests/test_async_loader.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import json import os import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -34,10 +36,23 @@ table_name = "test-table" + str(uuid.uuid4()) +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _action(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _action()) @pytest.mark.asyncio(scope="class") @@ -45,7 +60,6 @@ class TestLoaderAsync: @pytest_asyncio.fixture(scope="class") async def engine(self): - PostgresEngine._connector = None engine = await PostgresEngine.afrom_instance( project_id=project_id, instance=instance_id, @@ -56,37 +70,50 @@ async def engine(self): await engine.close() - async def _collect_async_items(self, docs_generator): - """Collects items from an async generator.""" - docs = [] - async for doc in docs_generator: - docs.append(doc) - return docs + async def _collect_async_items(self, engine, docs_generator): + """Collects items from an async generator, running on background loop.""" + + async def _consume(): + docs = [] + async for doc in docs_generator: + docs.append(doc) + return docs + + return await run_on_background(engine, _consume()) async def _cleanup_table(self, engine): await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') async def test_create_loader_with_invalid_parameters(self, engine): with pytest.raises(ValueError): - await AsyncPostgresLoader.create( - engine=engine, + await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + ), ) with pytest.raises(ValueError): def fake_formatter(): return None - await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - format="text", - formatter=fake_formatter, + await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + format="text", + formatter=fake_formatter, + ), ) with pytest.raises(ValueError): - await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - format="fake_format", + await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + format="fake_format", + ), ) async def test_load_from_query_default(self, engine): @@ -110,12 +137,15 @@ async def test_load_from_query_default(self, engine): """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -153,20 +183,23 @@ async def test_load_from_query_customized_content_customized_metadata(self, engi """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "fruit_name", - "variety", - "quantity_in_stock", - "price_per_unit", - "organic", - ], - metadata_columns=["fruit_id"], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "fruit_name", + "variety", + "quantity_in_stock", + "price_per_unit", + "organic", + ], + metadata_columns=["fruit_id"], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -205,19 +238,20 @@ async def test_load_from_query_customized_content_default_metadata(self, engine) """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + ), ) - documents = [] - async for docs in loader.alazy_load(): - documents.append(docs) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -230,18 +264,21 @@ async def test_load_from_query_customized_content_default_metadata(self, engine) ) ] - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], - format="JSON", + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + format="JSON", + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -280,13 +317,16 @@ async def test_load_from_query_default_content_customized_metadata(self, engine) """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - metadata_columns=["fruit_name", "organic"], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + metadata_columns=["fruit_name", "organic"], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -317,16 +357,19 @@ async def test_load_from_query_with_langchain_metadata(self, engine): VALUES ('Apple', 'Granny Smith', 150, 1, '{metadata}');""" await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - metadata_columns=[ - "fruit_name", - "langchain_metadata", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + metadata_columns=[ + "fruit_name", + "langchain_metadata", + ], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -362,15 +405,18 @@ async def test_load_from_query_with_json(self, engine): VALUES ('Apple', '{variety}', 150, 1, '{metadata}');""" await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - metadata_columns=[ - "variety", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + metadata_columns=[ + "variety", + ], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -411,18 +457,21 @@ def my_formatter(row, content_columns): str(row[column]) for column in content_columns if column in row ) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], - formatter=my_formatter, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + formatter=my_formatter, + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -458,18 +507,21 @@ async def test_load_from_query_customized_content_default_metadata_custom_page_c """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], - format="YAML", + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + format="YAML", + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -487,7 +539,7 @@ async def test_load_from_query_customized_content_default_metadata_custom_page_c async def test_save_doc_with_default_metadata(self, engine): await self._cleanup_table(engine) - await engine._ainit_document_table(table_name) + await run_on_background(engine, engine._ainit_document_table(table_name)) test_docs = [ Document( page_content="Apple Granny Smith 150 0.99 1", @@ -502,16 +554,21 @@ async def test_save_doc_with_default_metadata(self, engine): metadata={"fruit_id": 3}, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), + ) + loader = await run_on_background( + engine, AsyncPostgresLoader.create(engine=engine, table_name=table_name) ) - loader = await AsyncPostgresLoader.create(engine=engine, table_name=table_name) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert docs == test_docs - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + + schema = await run_on_background(engine, engine._aload_table_schema(table_name)) + assert schema.columns.keys() == [ "page_content", "langchain_metadata", ] @@ -520,13 +577,16 @@ async def test_save_doc_with_default_metadata(self, engine): @pytest.mark.parametrize("store_metadata", [True, False]) async def test_save_doc_with_customized_metadata(self, engine, store_metadata): table_name = "test-table" + str(uuid.uuid4()) - await engine._ainit_document_table( - table_name, - metadata_columns=[ - Column("fruit_name", "VARCHAR"), - Column("organic", "BOOLEAN"), - ], - store_metadata=store_metadata, + await run_on_background( + engine, + engine._ainit_document_table( + table_name, + metadata_columns=[ + Column("fruit_name", "VARCHAR"), + Column("organic", "BOOLEAN"), + ], + store_metadata=store_metadata, + ), ) test_docs = [ Document( @@ -538,24 +598,30 @@ async def test_save_doc_with_customized_metadata(self, engine, store_metadata): }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), ) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - metadata_columns=[ - "fruit_name", - "organic", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + metadata_columns=[ + "fruit_name", + "organic", + ], + ), ) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) + + schema = await run_on_background(engine, engine._aload_table_schema(table_name)) if store_metadata: docs == test_docs - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + assert schema.columns.keys() == [ "page_content", "fruit_name", "organic", @@ -568,7 +634,7 @@ async def test_save_doc_with_customized_metadata(self, engine, store_metadata): metadata={"fruit_name": "Apple", "organic": True}, ), ] - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + assert schema.columns.keys() == [ "page_content", "fruit_name", "organic", @@ -577,7 +643,9 @@ async def test_save_doc_with_customized_metadata(self, engine, store_metadata): async def test_save_doc_without_metadata(self, engine): table_name = "test-table" + str(uuid.uuid4()) - await engine._ainit_document_table(table_name, store_metadata=False) + await run_on_background( + engine, engine._ainit_document_table(table_name, store_metadata=False) + ) test_docs = [ Document( page_content="Granny Smith 150 0.99", @@ -588,17 +656,21 @@ async def test_save_doc_without_metadata(self, engine): }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), ) - await saver.aadd_documents(test_docs) + await run_on_background(engine, saver.aadd_documents(test_docs)) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + ), ) - docs = await self._collect_async_items(loader.alazy_load()) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert docs == [ Document( @@ -606,14 +678,15 @@ async def test_save_doc_without_metadata(self, engine): metadata={}, ), ] - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + schema = await run_on_background(engine, engine._aload_table_schema(table_name)) + assert schema.columns.keys() == [ "page_content", ] await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') async def test_delete_doc_with_default_metadata(self, engine): table_name = "test-table" + str(uuid.uuid4()) - await engine._ainit_document_table(table_name) + await run_on_background(engine, engine._ainit_document_table(table_name)) test_docs = [ Document( @@ -625,37 +698,43 @@ async def test_delete_doc_with_default_metadata(self, engine): metadata={"fruit_id": 2}, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), + ) + loader = await run_on_background( + engine, AsyncPostgresLoader.create(engine=engine, table_name=table_name) ) - loader = await AsyncPostgresLoader.create(engine=engine, table_name=table_name) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert docs == test_docs - await saver.adelete(docs[:1]) - assert len(await self._collect_async_items(loader.alazy_load())) == 1 + await run_on_background(engine, saver.adelete(docs[:1])) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 1 - await saver.adelete(docs) - assert len(await self._collect_async_items(loader.alazy_load())) == 0 + await run_on_background(engine, saver.adelete(docs)) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 0 await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') async def test_delete_doc_with_query(self, engine): await self._cleanup_table(engine) - await engine._ainit_document_table( - table_name, - metadata_columns=[ - Column( - "fruit_name", - "VARCHAR", - ), - Column( - "organic", - "BOOLEAN", - ), - ], - store_metadata=True, + await run_on_background( + engine, + engine._ainit_document_table( + table_name, + metadata_columns=[ + Column( + "fruit_name", + "VARCHAR", + ), + Column( + "organic", + "BOOLEAN", + ), + ], + store_metadata=True, + ), ) test_docs = [ @@ -684,18 +763,21 @@ async def test_delete_doc_with_query(self, engine): }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), ) query = f"SELECT * FROM \"{table_name}\" WHERE fruit_name='Apple';" - loader = await AsyncPostgresLoader.create(engine=engine, query=query) + loader = await run_on_background( + engine, AsyncPostgresLoader.create(engine=engine, query=query) + ) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert len(docs) == 1 - await saver.adelete(docs) - assert len(await self._collect_async_items(loader.alazy_load())) == 0 + await run_on_background(engine, saver.adelete(docs)) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 0 await self._cleanup_table(engine) @pytest.mark.parametrize("metadata_json_column", [None, "metadata_col_test"]) @@ -704,14 +786,17 @@ async def test_delete_doc_with_customized_metadata( ): table_name = "test-table" + str(uuid.uuid4()) content_column = "content_col_test" - await engine._ainit_document_table( - table_name, - metadata_columns=[ - Column("fruit_name", "VARCHAR"), - Column("organic", "BOOLEAN"), - ], - content_column=content_column, - metadata_json_column=metadata_json_column, + await run_on_background( + engine, + engine._ainit_document_table( + table_name, + metadata_columns=[ + Column("fruit_name", "VARCHAR"), + Column("organic", "BOOLEAN"), + ], + content_column=content_column, + metadata_json_column=metadata_json_column, + ), ) test_docs = [ Document( @@ -731,27 +816,33 @@ async def test_delete_doc_with_customized_metadata( }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, - table_name=table_name, - content_column=content_column, - metadata_json_column=metadata_json_column, + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create( + engine=engine, + table_name=table_name, + content_column=content_column, + metadata_json_column=metadata_json_column, + ), ) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - content_columns=[content_column], - metadata_json_column=metadata_json_column, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + content_columns=[content_column], + metadata_json_column=metadata_json_column, + ), ) - await saver.aadd_documents(test_docs) + await run_on_background(engine, saver.aadd_documents(test_docs)) - docs = await loader.aload() + docs = await run_on_background(engine, loader.aload()) assert len(docs) == 2 - await saver.adelete(docs[:1]) - assert len(await self._collect_async_items(loader.alazy_load())) == 1 + await run_on_background(engine, saver.adelete(docs[:1])) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 1 - await saver.adelete(docs) - assert len(await self._collect_async_items(loader.alazy_load())) == 0 + await run_on_background(engine, saver.adelete(docs)) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 0 await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') diff --git a/tests/test_async_vectorstore.py b/tests/test_async_vectorstore.py index d0e85d0b..6bcd58f5 100644 --- a/tests/test_async_vectorstore.py +++ b/tests/test_async_vectorstore.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid -from typing import Sequence +from typing import Any, Coroutine, Sequence import pytest import pytest_asyncio @@ -50,18 +51,35 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + # Run on background loop + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async with engine._pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + async def _impl(): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + return result_map.fetchall() + + # Run on background loop + return await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="class") @@ -98,34 +116,50 @@ async def engine(self, db_project, db_region, db_instance, db_name): @pytest_asyncio.fixture(scope="class") async def vs(self, engine): - await engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) - vs = await AsyncPostgresVectorStore.create( + # Wrap private init method + await run_on_background( + engine, engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + ) + # Wrap creation of the async vectorstore + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + ), ) yield vs @pytest_asyncio.fixture(scope="class") async def vs_custom(self, engine): - await engine._ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - metadata_json_column="mymeta", + # Wrap private init method + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + metadata_json_column="mymeta", + ), ) - vs = await AsyncPostgresVectorStore.create( + + # Wrap creation of the async vectorstore + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], - metadata_json_column="mymeta", + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + metadata_json_column="mymeta", + ), ) yield vs @@ -144,32 +178,44 @@ async def test_init_with_constructor(self, engine): async def test_post_init(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="noname", - embedding_column="myembedding", - metadata_columns=["page", "source"], - metadata_json_column="mymeta", + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="noname", + embedding_column="myembedding", + metadata_columns=["page", "source"], + metadata_json_column="mymeta", + ), ) async def test_id_metadata_column(self, engine): table_name = "id_metadata" + str(uuid.uuid4()) - await engine._ainit_vectorstore_table( - table_name, - VECTOR_SIZE, - metadata_columns=[Column("id", "TEXT")], + await run_on_background( + engine, + engine._ainit_vectorstore_table( + table_name, + VECTOR_SIZE, + metadata_columns=[Column("id", "TEXT")], + ), ) - custom_vs = await AsyncPostgresVectorStore.create( + custom_vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=table_name, - metadata_columns=["id"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=table_name, + metadata_columns=["id"], + ), ) ids = [str(uuid.uuid4()) for i in range(len(texts))] - await custom_vs.aadd_texts(texts, id_column_as_metadata, ids) + # Wrap aadd_texts + await run_on_background( + engine, custom_vs.aadd_texts(texts, id_column_as_metadata, ids) + ) results = await afetch(engine, f'SELECT * FROM "{table_name}"') assert len(results) == 3 @@ -180,12 +226,14 @@ async def test_id_metadata_column(self, engine): async def test_aadd_texts(self, engine, vs): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, ids=ids) + # Wrap aadd_texts + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, metadatas, ids) + # Wrap aadd_texts + await run_on_background(engine, vs.aadd_texts(texts, metadatas, ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 6 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') @@ -193,42 +241,43 @@ async def test_aadd_texts(self, engine, vs): async def test_aadd_texts_edge_cases(self, engine, vs): texts = ["Taylor's", '"Swift"', "best-friend"] ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, ids=ids) + # Wrap aadd_texts + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') async def test_aadd_docs(self, engine, vs): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_documents(docs, ids=ids) + # Wrap aadd_documents + await run_on_background(engine, vs.aadd_documents(docs, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') async def test_aadd_docs_no_ids(self, engine, vs): - await vs.aadd_documents(docs) + # Wrap aadd_documents + await run_on_background(engine, vs.aadd_documents(docs)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') async def test_adelete(self, engine, vs): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, ids=ids) + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 - # delete an ID - await vs.adelete([ids[0]]) + await run_on_background(engine, vs.adelete([ids[0]])) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 2 - # delete with no ids - result = await vs.adelete() + result = await run_on_background(engine, vs.adelete()) assert result == False ##### Custom Vector Store ##### async def test_aadd_texts_custom(self, engine, vs_custom): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs_custom.aadd_texts(texts, ids=ids) + await run_on_background(engine, vs_custom.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') assert len(results) == 3 assert results[0]["mycontent"] == "foo" @@ -237,7 +286,7 @@ async def test_aadd_texts_custom(self, engine, vs_custom): assert results[0]["source"] is None ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs_custom.aadd_texts(texts, metadatas, ids) + await run_on_background(engine, vs_custom.aadd_texts(texts, metadatas, ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') assert len(results) == 6 await aexecute(engine, f'TRUNCATE TABLE "{CUSTOM_TABLE}"') @@ -251,7 +300,7 @@ async def test_aadd_docs_custom(self, engine, vs_custom): ) for i in range(len(texts)) ] - await vs_custom.aadd_documents(docs, ids=ids) + await run_on_background(engine, vs_custom.aadd_documents(docs, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') assert len(results) == 3 @@ -263,13 +312,12 @@ async def test_aadd_docs_custom(self, engine, vs_custom): async def test_adelete_custom(self, engine, vs_custom): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs_custom.aadd_texts(texts, ids=ids) + await run_on_background(engine, vs_custom.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') content = [result["mycontent"] for result in results] assert len(results) == 3 assert "foo" in content - # delete an ID - await vs_custom.adelete([ids[0]]) + await run_on_background(engine, vs_custom.adelete([ids[0]])) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') content = [result["mycontent"] for result in results] assert len(results) == 2 @@ -277,90 +325,111 @@ async def test_adelete_custom(self, engine, vs_custom): async def test_ignore_metadata_columns(self, engine): column_to_ignore = "source" - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - ignore_metadata_columns=[column_to_ignore], - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_json_column="mymeta", - ) - assert column_to_ignore not in vs.metadata_columns - - async def test_create_vectorstore_with_invalid_parameters_1(self, engine): - with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + AsyncPostgresVectorStore.create( engine, embedding_service=embeddings_service, table_name=CUSTOM_TABLE, + ignore_metadata_columns=[column_to_ignore], id_column="myid", content_column="mycontent", embedding_column="myembedding", - metadata_columns=["random_column"], # invalid metadata column + metadata_json_column="mymeta", + ), + ) + assert column_to_ignore not in vs.metadata_columns + + async def test_create_vectorstore_with_invalid_parameters_1(self, engine): + with pytest.raises(ValueError): + await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["random_column"], # invalid metadata column + ), ) async def test_create_vectorstore_with_invalid_parameters_2(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="langchain_id", # invalid content column type - embedding_column="myembedding", - metadata_columns=["random_column"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="langchain_id", # invalid content column type + embedding_column="myembedding", + metadata_columns=["random_column"], + ), ) async def test_create_vectorstore_with_invalid_parameters_3(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="random_column", # invalid embedding column - metadata_columns=["random_column"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="random_column", # invalid embedding column + metadata_columns=["random_column"], + ), ) async def test_create_vectorstore_with_invalid_parameters_4(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="langchain_id", # invalid embedding column data type - metadata_columns=["random_column"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="langchain_id", # invalid embedding column data type + metadata_columns=["random_column"], + ), ) async def test_create_vectorstore_with_invalid_parameters_5(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="langchain_id", - metadata_columns=["random_column"], - ignore_metadata_columns=[ - "one", - "two", - ], # invalid use of metadata_columns and ignore columns + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="langchain_id", + metadata_columns=["random_column"], + ignore_metadata_columns=[ + "one", + "two", + ], # invalid use of metadata_columns and ignore columns + ), ) async def test_create_vectorstore_with_init(self, engine): with pytest.raises(Exception): - await AsyncPostgresVectorStore( - engine._pool, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["random_column"], # invalid metadata column + await run_on_background( + engine, + AsyncPostgresVectorStore( + engine._pool, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["random_column"], # invalid metadata column + ), ) diff --git a/tests/test_async_vectorstore_from_methods.py b/tests/test_async_vectorstore_from_methods.py index 529675c2..aeba3995 100644 --- a/tests/test_async_vectorstore_from_methods.py +++ b/tests/test_async_vectorstore_from_methods.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid -from typing import Sequence +from typing import Any, Coroutine, Sequence import pytest import pytest_asyncio @@ -51,18 +52,33 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async with engine._pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + async def _impl(): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + return result_map.fetchall() + + return await run_on_background(engine, _impl()) @pytest.mark.asyncio @@ -91,24 +107,34 @@ async def engine(self, db_project, db_region, db_instance, db_name): region=db_region, database=db_name, ) - await engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) - await engine._ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=False, + await run_on_background( + engine, engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + ) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=False, + ), ) - await engine._ainit_vectorstore_table( - CUSTOM_TABLE_WITH_INT_ID, - VECTOR_SIZE, - id_column=Column(name="integer_id", data_type="INTEGER", nullable="False"), - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=False, + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE_WITH_INT_ID, + VECTOR_SIZE, + id_column=Column( + name="integer_id", data_type="INTEGER", nullable="False" + ), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=False, + ), ) yield engine await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_TABLE}") @@ -118,13 +144,16 @@ async def engine(self, db_project, db_region, db_instance, db_name): async def test_afrom_texts(self, engine): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await AsyncPostgresVectorStore.afrom_texts( - texts, - embeddings_service, + await run_on_background( engine, - DEFAULT_TABLE, - metadatas=metadatas, - ids=ids, + AsyncPostgresVectorStore.afrom_texts( + texts, + embeddings_service, + engine, + DEFAULT_TABLE, + metadatas=metadatas, + ids=ids, + ), ) results = await afetch(engine, f"SELECT * FROM {DEFAULT_TABLE}") assert len(results) == 3 @@ -132,12 +161,15 @@ async def test_afrom_texts(self, engine): async def test_afrom_docs(self, engine): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await AsyncPostgresVectorStore.afrom_documents( - docs, - embeddings_service, + await run_on_background( engine, - DEFAULT_TABLE, - ids=ids, + AsyncPostgresVectorStore.afrom_documents( + docs, + embeddings_service, + engine, + DEFAULT_TABLE, + ids=ids, + ), ) results = await afetch(engine, f"SELECT * FROM {DEFAULT_TABLE}") assert len(results) == 3 @@ -145,16 +177,19 @@ async def test_afrom_docs(self, engine): async def test_afrom_texts_custom(self, engine): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await AsyncPostgresVectorStore.afrom_texts( - texts, - embeddings_service, + await run_on_background( engine, - CUSTOM_TABLE, - ids=ids, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], + AsyncPostgresVectorStore.afrom_texts( + texts, + embeddings_service, + engine, + CUSTOM_TABLE, + ids=ids, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + ), ) results = await afetch(engine, f"SELECT * FROM {CUSTOM_TABLE}") assert len(results) == 3 @@ -172,16 +207,19 @@ async def test_afrom_docs_custom(self, engine): ) for i in range(len(texts)) ] - await AsyncPostgresVectorStore.afrom_documents( - docs, - embeddings_service, + await run_on_background( engine, - CUSTOM_TABLE, - ids=ids, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], + AsyncPostgresVectorStore.afrom_documents( + docs, + embeddings_service, + engine, + CUSTOM_TABLE, + ids=ids, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + ), ) results = await afetch(engine, f"SELECT * FROM {CUSTOM_TABLE}") @@ -201,16 +239,19 @@ async def test_afrom_docs_custom_with_int_id(self, engine): ) for i in range(len(texts)) ] - await AsyncPostgresVectorStore.afrom_documents( - docs, - embeddings_service, + await run_on_background( engine, - CUSTOM_TABLE_WITH_INT_ID, - ids=ids, - id_column="integer_id", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], + AsyncPostgresVectorStore.afrom_documents( + docs, + embeddings_service, + engine, + CUSTOM_TABLE_WITH_INT_ID, + ids=ids, + id_column="integer_id", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + ), ) results = await afetch(engine, f"SELECT * FROM {CUSTOM_TABLE_WITH_INT_ID}") diff --git a/tests/test_async_vectorstore_index.py b/tests/test_async_vectorstore_index.py index d45e114f..be61a9fa 100644 --- a/tests/test_async_vectorstore_index.py +++ b/tests/test_async_vectorstore_index.py @@ -13,8 +13,10 @@ # limitations under the License. +import asyncio import os import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -60,10 +62,23 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="class") @@ -100,74 +115,90 @@ async def engine(self, db_project, db_region, db_instance, db_name): @pytest_asyncio.fixture(scope="class") async def vs(self, engine): - await engine._ainit_vectorstore_table( - DEFAULT_TABLE, VECTOR_SIZE, overwrite_existing=True + await run_on_background( + engine, + engine._ainit_vectorstore_table( + DEFAULT_TABLE, VECTOR_SIZE, overwrite_existing=True + ), ) - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + ), ) - await vs.aadd_texts(texts, ids=ids) - await vs.adrop_vector_index() + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) + await run_on_background(engine, vs.adrop_vector_index()) yield vs async def test_apply_default_name_vector_index(self, engine): - await engine._ainit_vectorstore_table( - SIMPLE_TABLE, VECTOR_SIZE, overwrite_existing=True + await run_on_background( + engine, + engine._ainit_vectorstore_table( + SIMPLE_TABLE, VECTOR_SIZE, overwrite_existing=True + ), ) - vs = await AsyncPostgresVectorStore.create( + + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=SIMPLE_TABLE, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=SIMPLE_TABLE, + ), ) - await vs.aadd_texts(texts, ids=ids) - await vs.adrop_vector_index() + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) + await run_on_background(engine, vs.adrop_vector_index()) + index = HNSWIndex() - await vs.aapply_vector_index(index) - assert await vs.is_valid_index() - await vs.adrop_vector_index() + await run_on_background(engine, vs.aapply_vector_index(index)) + assert await run_on_background(engine, vs.is_valid_index()) + await run_on_background(engine, vs.adrop_vector_index()) - async def test_aapply_vector_index(self, vs): - await vs.adrop_vector_index(DEFAULT_INDEX_NAME) + async def test_aapply_vector_index(self, engine, vs): + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) index = HNSWIndex(name=DEFAULT_INDEX_NAME) - await vs.aapply_vector_index(index) - assert await vs.is_valid_index(DEFAULT_INDEX_NAME) - await vs.adrop_vector_index() + await run_on_background(engine, vs.aapply_vector_index(index)) + assert await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) + await run_on_background(engine, vs.adrop_vector_index()) - async def test_areindex(self, vs): - if not await vs.is_valid_index(DEFAULT_INDEX_NAME): + async def test_areindex(self, engine, vs): + if not await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)): index = HNSWIndex() - await vs.aapply_vector_index(index) - await vs.areindex(DEFAULT_INDEX_NAME) - await vs.areindex(DEFAULT_INDEX_NAME) - assert await vs.is_valid_index(DEFAULT_INDEX_NAME) - await vs.adrop_vector_index() - - async def test_dropindex(self, vs): - await vs.adrop_vector_index(DEFAULT_INDEX_NAME) - result = await vs.is_valid_index(DEFAULT_INDEX_NAME) + await run_on_background(engine, vs.aapply_vector_index(index)) + await run_on_background(engine, vs.areindex(DEFAULT_INDEX_NAME)) + await run_on_background(engine, vs.areindex(DEFAULT_INDEX_NAME)) + assert await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) + await run_on_background(engine, vs.adrop_vector_index()) + + async def test_dropindex(self, engine, vs): + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) + result = await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) assert not result - async def test_aapply_vector_index_ivfflat(self, vs): - await vs.adrop_vector_index(DEFAULT_INDEX_NAME) + async def test_aapply_vector_index_ivfflat(self, engine, vs): + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) index = IVFFlatIndex( name=DEFAULT_INDEX_NAME, distance_strategy=DistanceStrategy.EUCLIDEAN ) - await vs.aapply_vector_index(index, concurrently=True) - assert await vs.is_valid_index(DEFAULT_INDEX_NAME) + await run_on_background( + engine, vs.aapply_vector_index(index, concurrently=True) + ) + assert await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) index = IVFFlatIndex( name="secondindex", distance_strategy=DistanceStrategy.INNER_PRODUCT, ) - await vs.aapply_vector_index(index) - assert await vs.is_valid_index("secondindex") - await vs.adrop_vector_index("secondindex") - await vs.adrop_vector_index(DEFAULT_INDEX_NAME) + await run_on_background(engine, vs.aapply_vector_index(index)) + assert await run_on_background(engine, vs.is_valid_index("secondindex")) + await run_on_background(engine, vs.adrop_vector_index("secondindex")) + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) - async def test_is_valid_index(self, vs): - is_valid = await vs.is_valid_index("invalid_index") + async def test_is_valid_index(self, engine, vs): + is_valid = await run_on_background(engine, vs.is_valid_index("invalid_index")) assert is_valid == False async def test_aapply_hybrid_search_index_table_without_tsv_column( @@ -175,18 +206,25 @@ async def test_aapply_hybrid_search_index_table_without_tsv_column( ): # overwriting vs to get a hybrid vs tsv_index_name = "index_without_tsv_column_" + UUID_STR - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, - hybrid_search_config=HybridSearchConfig(index_name=tsv_index_name), + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + hybrid_search_config=HybridSearchConfig(index_name=tsv_index_name), + ), + ) + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) ) - is_valid_index = await vs.is_valid_index(tsv_index_name) assert is_valid_index == False - await vs.aapply_hybrid_search_index() - assert await vs.is_valid_index(tsv_index_name) - await vs.adrop_vector_index(tsv_index_name) - is_valid_index = await vs.is_valid_index(tsv_index_name) + await run_on_background(engine, vs.aapply_hybrid_search_index()) + assert await run_on_background(engine, vs.is_valid_index(tsv_index_name)) + await run_on_background(engine, vs.adrop_vector_index(tsv_index_name)) + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) + ) assert is_valid_index == False async def test_aapply_hybrid_search_index_table_with_tsv_column(self, engine): @@ -196,23 +234,34 @@ async def test_aapply_hybrid_search_index_table_with_tsv_column(self, engine): tsv_lang="pg_catalog.english", index_name=tsv_index_name, ) - await engine._ainit_vectorstore_table( - DEFAULT_HYBRID_TABLE, - VECTOR_SIZE, - hybrid_search_config=config, + await run_on_background( + engine, + engine._ainit_vectorstore_table( + DEFAULT_HYBRID_TABLE, + VECTOR_SIZE, + hybrid_search_config=config, + ), ) - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_HYBRID_TABLE, - hybrid_search_config=config, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_HYBRID_TABLE, + hybrid_search_config=config, + ), + ) + + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) ) - is_valid_index = await vs.is_valid_index(tsv_index_name) assert is_valid_index == False - await vs.aapply_hybrid_search_index() - assert await vs.is_valid_index(tsv_index_name) - await vs.areindex(tsv_index_name) - assert await vs.is_valid_index(tsv_index_name) - await vs.adrop_vector_index(tsv_index_name) - is_valid_index = await vs.is_valid_index(tsv_index_name) + await run_on_background(engine, vs.aapply_hybrid_search_index()) + assert await run_on_background(engine, vs.is_valid_index(tsv_index_name)) + await run_on_background(engine, vs.areindex(tsv_index_name)) + assert await run_on_background(engine, vs.is_valid_index(tsv_index_name)) + await run_on_background(engine, vs.adrop_vector_index(tsv_index_name)) + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) + ) assert is_valid_index == False diff --git a/tests/test_async_vectorstore_search.py b/tests/test_async_vectorstore_search.py index 9f496503..16a63911 100644 --- a/tests/test_async_vectorstore_search.py +++ b/tests/test_async_vectorstore_search.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -73,13 +75,26 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute( engine: PostgresEngine, query: str, ) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="class") @@ -118,78 +133,98 @@ async def engine(self, db_project, db_region, db_instance, db_name): @pytest_asyncio.fixture(scope="class") async def vs(self, engine): - await engine._ainit_vectorstore_table( - DEFAULT_TABLE, VECTOR_SIZE, store_metadata=False + await run_on_background( + engine, + engine._ainit_vectorstore_table( + DEFAULT_TABLE, VECTOR_SIZE, store_metadata=False + ), ) - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + ), ) - await vs.aadd_documents(docs, ids=ids) + await run_on_background(engine, vs.aadd_documents(docs, ids=ids)) yield vs @pytest_asyncio.fixture(scope="class") async def vs_custom(self, engine): - await engine._ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[ - Column("page", "TEXT"), - Column("source", "TEXT"), - ], - store_metadata=False, - ) - - vs_custom = await AsyncPostgresVectorStore.create( - engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - index_query_options=HNSWQueryOptions(ef_search=1), - ) - await vs_custom.aadd_documents(docs, ids=ids) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + ], + store_metadata=False, + ), + ) + + vs_custom = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + index_query_options=HNSWQueryOptions(ef_search=1), + ), + ) + await run_on_background(engine, vs_custom.aadd_documents(docs, ids=ids)) yield vs_custom @pytest_asyncio.fixture(scope="class") async def vs_custom_filter(self, engine): - await engine._ainit_vectorstore_table( - CUSTOM_FILTER_TABLE, - VECTOR_SIZE, - metadata_columns=[ - Column("name", "TEXT"), - Column("code", "TEXT"), - Column("price", "FLOAT"), - Column("is_available", "BOOLEAN"), - Column("tags", "TEXT[]"), - Column("inventory_location", "INTEGER[]"), - Column("available_quantity", "INTEGER", nullable=True), - ], - id_column="langchain_id", - store_metadata=False, - ) - - vs_custom_filter = await AsyncPostgresVectorStore.create( - engine, - embedding_service=embeddings_service, - table_name=CUSTOM_FILTER_TABLE, - metadata_columns=[ - "name", - "code", - "price", - "is_available", - "tags", - "inventory_location", - "available_quantity", - ], - id_column="langchain_id", - ) - await vs_custom_filter.aadd_documents(filter_docs, ids=ids) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_FILTER_TABLE, + VECTOR_SIZE, + metadata_columns=[ + Column("name", "TEXT"), + Column("code", "TEXT"), + Column("price", "FLOAT"), + Column("is_available", "BOOLEAN"), + Column("tags", "TEXT[]"), + Column("inventory_location", "INTEGER[]"), + Column("available_quantity", "INTEGER", nullable=True), + ], + id_column="langchain_id", + store_metadata=False, + ), + ) + + vs_custom_filter = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_FILTER_TABLE, + metadata_columns=[ + "name", + "code", + "price", + "is_available", + "tags", + "inventory_location", + "available_quantity", + ], + id_column="langchain_id", + ), + ) + await run_on_background( + engine, vs_custom_filter.aadd_documents(filter_docs, ids=ids) + ) yield vs_custom_filter @pytest_asyncio.fixture(scope="class") @@ -204,188 +239,239 @@ async def vs_hybrid_search_with_tsv_column(self, engine): "fetch_top_k": 10, }, ) - await engine._ainit_vectorstore_table( - HYBRID_SEARCH_TABLE1, - VECTOR_SIZE, - id_column=Column("myid", "TEXT"), - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[ - Column("page", "TEXT"), - Column("source", "TEXT"), - Column("doc_id_key", "TEXT"), - ], - metadata_json_column="mymetadata", # ignored - store_metadata=False, - hybrid_search_config=hybrid_search_config, - ) - - vs_custom = await AsyncPostgresVectorStore.create( - engine, - embedding_service=embeddings_service, - table_name=HYBRID_SEARCH_TABLE1, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_json_column="mymetadata", - metadata_columns=["doc_id_key"], - index_query_options=HNSWQueryOptions(ef_search=1), - hybrid_search_config=hybrid_search_config, - ) - await vs_custom.aadd_documents(hybrid_docs) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + HYBRID_SEARCH_TABLE1, + VECTOR_SIZE, + id_column=Column("myid", "TEXT"), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + Column("doc_id_key", "TEXT"), + ], + metadata_json_column="mymetadata", # ignored + store_metadata=False, + hybrid_search_config=hybrid_search_config, + ), + ) + + vs_custom = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE1, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_json_column="mymetadata", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=hybrid_search_config, + ), + ) + await run_on_background(engine, vs_custom.aadd_documents(hybrid_docs)) yield vs_custom - async def test_asimilarity_search(self, vs): - results = await vs.asimilarity_search("foo", k=1) + async def test_asimilarity_search(self, engine, vs): + results = await run_on_background(engine, vs.asimilarity_search("foo", k=1)) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs.asimilarity_search("foo", k=1, filter={"content": "bar"}) + results = await run_on_background( + engine, vs.asimilarity_search("foo", k=1, filter={"content": "bar"}) + ) assert results == [Document(page_content="bar", id=ids[1])] - async def test_asimilarity_search_score(self, vs): - results = await vs.asimilarity_search_with_score("foo") + async def test_asimilarity_search_score(self, engine, vs): + results = await run_on_background( + engine, vs.asimilarity_search_with_score("foo") + ) assert len(results) == 4 assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_asimilarity_search_by_vector(self, vs): + async def test_asimilarity_search_by_vector(self, engine, vs): embedding = embeddings_service.embed_query("foo") - results = await vs.asimilarity_search_by_vector(embedding) + results = await run_on_background( + engine, vs.asimilarity_search_by_vector(embedding) + ) assert len(results) == 4 assert results[0] == Document(page_content="foo", id=ids[0]) - results = await vs.asimilarity_search_with_score_by_vector(embedding) + results = await run_on_background( + engine, vs.asimilarity_search_with_score_by_vector(embedding) + ) assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_similarity_search_with_relevance_scores_threshold_cosine(self, vs): + async def test_similarity_search_with_relevance_scores_threshold_cosine( + self, engine, vs + ): score_threshold = {"score_threshold": 0} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) # Note: Since tests use FakeEmbeddings which are non-normalized vectors, results might have scores beyond the range [0,1]. # For a normalized embedding service, a threshold of zero will yield all matched documents. assert len(results) == 2 score_threshold = {"score_threshold": 0.02} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 2 score_threshold = {"score_threshold": 0.9} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 1 assert results[0][0] == Document(page_content="foo", id=ids[0]) score_threshold = {"score_threshold": 0.02} vs.distance_strategy = DistanceStrategy.EUCLIDEAN - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 1 async def test_similarity_search_with_relevance_scores_threshold_euclidean( self, engine ): - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, - distance_strategy=DistanceStrategy.EUCLIDEAN, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + distance_strategy=DistanceStrategy.EUCLIDEAN, + ), ) score_threshold = {"score_threshold": 0.9} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 1 assert results[0][0] == Document(page_content="foo", id=ids[0]) - async def test_amax_marginal_relevance_search(self, vs): - results = await vs.amax_marginal_relevance_search("bar") + async def test_amax_marginal_relevance_search(self, engine, vs): + results = await run_on_background( + engine, vs.amax_marginal_relevance_search("bar") + ) assert results[0] == Document(page_content="bar", id=ids[1]) - results = await vs.amax_marginal_relevance_search( - "bar", filter={"content": "boo"} + results = await run_on_background( + engine, vs.amax_marginal_relevance_search("bar", filter={"content": "boo"}) ) assert results[0] == Document(page_content="boo", id=ids[3]) - async def test_amax_marginal_relevance_search_vector(self, vs): + async def test_amax_marginal_relevance_search_vector(self, engine, vs): embedding = embeddings_service.embed_query("bar") - results = await vs.amax_marginal_relevance_search_by_vector(embedding) + results = await run_on_background( + engine, vs.amax_marginal_relevance_search_by_vector(embedding) + ) assert results[0] == Document(page_content="bar", id=ids[1]) - async def test_amax_marginal_relevance_search_vector_score(self, vs): + async def test_amax_marginal_relevance_search_vector_score(self, engine, vs): embedding = embeddings_service.embed_query("bar") - results = await vs.amax_marginal_relevance_search_with_score_by_vector( - embedding + results = await run_on_background( + engine, vs.amax_marginal_relevance_search_with_score_by_vector(embedding) ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - results = await vs.amax_marginal_relevance_search_with_score_by_vector( - embedding, lambda_mult=0.75, fetch_k=10 + results = await run_on_background( + engine, + vs.amax_marginal_relevance_search_with_score_by_vector( + embedding, lambda_mult=0.75, fetch_k=10 + ), ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - async def test_similarity_search(self, vs_custom): - results = await vs_custom.asimilarity_search("foo", k=1) + async def test_similarity_search(self, engine, vs_custom): + results = await run_on_background( + engine, vs_custom.asimilarity_search("foo", k=1) + ) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs_custom.asimilarity_search( - "foo", k=1, filter={"mycontent": "bar"} + results = await run_on_background( + engine, + vs_custom.asimilarity_search("foo", k=1, filter={"mycontent": "bar"}), ) assert results == [Document(page_content="bar", id=ids[1])] - async def test_similarity_search_score(self, vs_custom): - results = await vs_custom.asimilarity_search_with_score("foo") + async def test_similarity_search_score(self, engine, vs_custom): + results = await run_on_background( + engine, vs_custom.asimilarity_search_with_score("foo") + ) assert len(results) == 4 assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_similarity_search_by_vector(self, vs_custom): + async def test_similarity_search_by_vector(self, engine, vs_custom): embedding = embeddings_service.embed_query("foo") - results = await vs_custom.asimilarity_search_by_vector(embedding) + results = await run_on_background( + engine, vs_custom.asimilarity_search_by_vector(embedding) + ) assert len(results) == 4 assert results[0] == Document(page_content="foo", id=ids[0]) - results = await vs_custom.asimilarity_search_with_score_by_vector(embedding) + results = await run_on_background( + engine, vs_custom.asimilarity_search_with_score_by_vector(embedding) + ) assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_max_marginal_relevance_search(self, vs_custom): - results = await vs_custom.amax_marginal_relevance_search("bar") + async def test_max_marginal_relevance_search(self, engine, vs_custom): + results = await run_on_background( + engine, vs_custom.amax_marginal_relevance_search("bar") + ) assert results[0] == Document(page_content="bar", id=ids[1]) - results = await vs_custom.amax_marginal_relevance_search( - "bar", filter={"mycontent": "boo"} + results = await run_on_background( + engine, + vs_custom.amax_marginal_relevance_search( + "bar", filter={"mycontent": "boo"} + ), ) assert results[0] == Document(page_content="boo", id=ids[3]) - async def test_max_marginal_relevance_search_vector(self, vs_custom): + async def test_max_marginal_relevance_search_vector(self, engine, vs_custom): embedding = embeddings_service.embed_query("bar") - results = await vs_custom.amax_marginal_relevance_search_by_vector(embedding) + results = await run_on_background( + engine, vs_custom.amax_marginal_relevance_search_by_vector(embedding) + ) assert results[0] == Document(page_content="bar", id=ids[1]) - async def test_max_marginal_relevance_search_vector_score(self, vs_custom): + async def test_max_marginal_relevance_search_vector_score(self, engine, vs_custom): embedding = embeddings_service.embed_query("bar") - results = await vs_custom.amax_marginal_relevance_search_with_score_by_vector( - embedding + results = await run_on_background( + engine, + vs_custom.amax_marginal_relevance_search_with_score_by_vector(embedding), ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - results = await vs_custom.amax_marginal_relevance_search_with_score_by_vector( - embedding, lambda_mult=0.75, fetch_k=10 + results = await run_on_background( + engine, + vs_custom.amax_marginal_relevance_search_with_score_by_vector( + embedding, lambda_mult=0.75, fetch_k=10 + ), ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - async def test_aget_by_ids(self, vs): + async def test_aget_by_ids(self, engine, vs): test_ids = [ids[0]] - results = await vs.aget_by_ids(ids=test_ids) + results = await run_on_background(engine, vs.aget_by_ids(ids=test_ids)) assert results[0] == Document(page_content="foo", id=ids[0]) - async def test_aget_by_ids_custom_vs(self, vs_custom): + async def test_aget_by_ids_custom_vs(self, engine, vs_custom): test_ids = [ids[0]] - results = await vs_custom.aget_by_ids(ids=test_ids) + results = await run_on_background(engine, vs_custom.aget_by_ids(ids=test_ids)) assert results[0] == Document(page_content="foo", id=ids[0]) @@ -397,45 +483,52 @@ def test_get_by_ids(self, vs): @pytest.mark.parametrize("test_filter, expected_ids", FILTERING_TEST_CASES) async def test_vectorstore_with_metadata_filters( self, + engine, vs_custom_filter, test_filter, expected_ids, ): """Test end to end construction and search.""" - docs = await vs_custom_filter.asimilarity_search( - "meow", k=5, filter=test_filter + docs = await run_on_background( + engine, vs_custom_filter.asimilarity_search("meow", k=5, filter=test_filter) ) assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter - async def test_asimilarity_hybrid_search_rrk(self, vs): - results = await vs.asimilarity_search( - "foo", - k=1, - hybrid_search_config=HybridSearchConfig( - fusion_function=reciprocal_rank_fusion + async def test_asimilarity_hybrid_search_rrk(self, engine, vs): + results = await run_on_background( + engine, + vs.asimilarity_search( + "foo", + k=1, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion + ), ), ) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs.asimilarity_search( - "bar", - k=1, - filter={"content": {"$ne": "baz"}}, - hybrid_search_config=HybridSearchConfig( - fusion_function=reciprocal_rank_fusion, - fusion_function_parameters={ - "rrf_k": 100, - "fetch_top_k": 10, - }, - primary_top_k=1, - secondary_top_k=1, + results = await run_on_background( + engine, + vs.asimilarity_search( + "bar", + k=1, + filter={"content": {"$ne": "baz"}}, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion, + fusion_function_parameters={ + "rrf_k": 100, + "fetch_top_k": 10, + }, + primary_top_k=1, + secondary_top_k=1, + ), ), ) assert results == [Document(page_content="bar", id=ids[1])] async def test_hybrid_search_weighted_sum_default( - self, vs_hybrid_search_with_tsv_column + self, engine, vs_hybrid_search_with_tsv_column ): """Test hybrid search with default weighted sum (0.5 vector, 0.5 FTS).""" query = "apple" # Should match "apple" in FTS and vector @@ -443,10 +536,9 @@ async def test_hybrid_search_weighted_sum_default( # The vs_hybrid_search_with_tsv_column instance is already configured for hybrid search. # Default fusion is weighted_sum_ranking with 0.5/0.5 weights. # fts_query will default to the main query. - results_with_scores = ( - await vs_hybrid_search_with_tsv_column.asimilarity_search_with_score( - query, k=3 - ) + results_with_scores = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search_with_score(query, k=3), ) assert len(results_with_scores) > 1 @@ -463,7 +555,7 @@ async def test_hybrid_search_weighted_sum_default( assert results_with_scores[0][1] >= results_with_scores[1][1] async def test_hybrid_search_weighted_sum_vector_bias( - self, vs_hybrid_search_with_tsv_column + self, engine, vs_hybrid_search_with_tsv_column ): """Test weighted sum with higher weight for vector results.""" query = "Apple Inc technology" # More specific for vector similarity @@ -476,8 +568,11 @@ async def test_hybrid_search_weighted_sum_vector_bias( }, # fts_query will default to main query ) - results = await vs_hybrid_search_with_tsv_column.asimilarity_search( - query, k=2, hybrid_search_config=config + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ), ) result_ids = [doc.metadata["doc_id_key"] for doc in results] @@ -485,7 +580,7 @@ async def test_hybrid_search_weighted_sum_vector_bias( assert result_ids[0] == "hs_doc_generic_tech" async def test_hybrid_search_weighted_sum_fts_bias( - self, vs_hybrid_search_with_tsv_column + self, engine, vs_hybrid_search_with_tsv_column ): """Test weighted sum with higher weight for FTS results.""" query = "fruit common tasty" # Strong FTS signal for fruit docs @@ -498,8 +593,11 @@ async def test_hybrid_search_weighted_sum_fts_bias( "secondary_results_weight": 0.99, # FTS bias }, ) - results = await vs_hybrid_search_with_tsv_column.asimilarity_search( - query, k=2, hybrid_search_config=config + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ), ) result_ids = [doc.metadata["doc_id_key"] for doc in results] @@ -507,7 +605,7 @@ async def test_hybrid_search_weighted_sum_fts_bias( assert "hs_doc_apple_fruit" in result_ids async def test_hybrid_search_reciprocal_rank_fusion( - self, vs_hybrid_search_with_tsv_column + self, engine, vs_hybrid_search_with_tsv_column ): """Test hybrid search with Reciprocal Rank Fusion.""" query = "technology company" @@ -524,10 +622,11 @@ async def test_hybrid_search_reciprocal_rank_fusion( "fetch_top_k": 2, }, # RRF specific params ) - # The `k` in asimilarity_search here is the final desired number of results, - # which should align with fusion_function_parameters.fetch_top_k for RRF. - results = await vs_hybrid_search_with_tsv_column.asimilarity_search( - query, k=2, hybrid_search_config=config + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ), ) result_ids = [doc.metadata["doc_id_key"] for doc in results] @@ -539,7 +638,7 @@ async def test_hybrid_search_reciprocal_rank_fusion( assert result_ids[0] == "hs_doc_apple_tech" # Stronger combined signal async def test_hybrid_search_explicit_fts_query( - self, vs_hybrid_search_with_tsv_column + self, engine, vs_hybrid_search_with_tsv_column ): """Test hybrid search when fts_query in HybridSearchConfig is different from main query.""" main_vector_query = "Apple Inc." # For vector search @@ -553,8 +652,11 @@ async def test_hybrid_search_explicit_fts_query( "secondary_results_weight": 0.5, }, ) - results = await vs_hybrid_search_with_tsv_column.asimilarity_search( - main_vector_query, k=2, hybrid_search_config=config + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + main_vector_query, k=2, hybrid_search_config=config + ), ) result_ids = [doc.metadata["doc_id_key"] for doc in results] @@ -569,7 +671,9 @@ async def test_hybrid_search_explicit_fts_query( or "hs_doc_orange_fruit" in result_ids ) - async def test_hybrid_search_with_filter(self, vs_hybrid_search_with_tsv_column): + async def test_hybrid_search_with_filter( + self, engine, vs_hybrid_search_with_tsv_column + ): """Test hybrid search with a metadata filter applied.""" query = "apple" # Filter to only include "tech" related apple docs using metadata @@ -579,8 +683,11 @@ async def test_hybrid_search_with_filter(self, vs_hybrid_search_with_tsv_column) config = HybridSearchConfig( tsv_column="mycontent_tsv", ) - results = await vs_hybrid_search_with_tsv_column.asimilarity_search( - query, k=2, filter=doc_filter, hybrid_search_config=config + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, filter=doc_filter, hybrid_search_config=config + ), ) result_ids = [doc.metadata["doc_id_key"] for doc in results] @@ -588,7 +695,7 @@ async def test_hybrid_search_with_filter(self, vs_hybrid_search_with_tsv_column) assert result_ids[0] == "hs_doc_apple_tech" async def test_hybrid_search_fts_empty_results( - self, vs_hybrid_search_with_tsv_column + self, engine, vs_hybrid_search_with_tsv_column ): """Test when FTS query yields no results, should fall back to vector search.""" vector_query = "apple" @@ -602,8 +709,11 @@ async def test_hybrid_search_fts_empty_results( "secondary_results_weight": 0.4, }, ) - results = await vs_hybrid_search_with_tsv_column.asimilarity_search( - vector_query, k=2, hybrid_search_config=config + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + vector_query, k=2, hybrid_search_config=config + ), ) result_ids = [doc.metadata["doc_id_key"] for doc in results] @@ -614,7 +724,7 @@ async def test_hybrid_search_fts_empty_results( assert results[0].metadata["doc_id_key"].startswith("hs_doc_apple_fruit") async def test_hybrid_search_vector_empty_results_effectively( - self, vs_hybrid_search_with_tsv_column + self, engine, vs_hybrid_search_with_tsv_column ): """Test when vector query is very dissimilar to docs, should rely on FTS.""" # This is hard to guarantee with fake embeddings, but we try. @@ -631,8 +741,11 @@ async def test_hybrid_search_vector_empty_results_effectively( "secondary_results_weight": 0.6, }, ) - results = await vs_hybrid_search_with_tsv_column.asimilarity_search( - vector_query_far_off, k=1, hybrid_search_config=config + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ), ) result_ids = [doc.metadata["doc_id_key"] for doc in results] @@ -656,35 +769,41 @@ async def test_hybrid_search_without_tsv_column(self, engine): "secondary_results_weight": 0.9, }, ) - await engine._ainit_vectorstore_table( - HYBRID_SEARCH_TABLE2, - VECTOR_SIZE, - id_column=Column("myid", "TEXT"), - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[ - Column("page", "TEXT"), - Column("source", "TEXT"), - Column("doc_id_key", "TEXT"), - ], - store_metadata=False, - hybrid_search_config=config, - ) - - vs_with_tsv_column = await AsyncPostgresVectorStore.create( - engine, - embedding_service=embeddings_service, - table_name=HYBRID_SEARCH_TABLE2, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["doc_id_key"], - index_query_options=HNSWQueryOptions(ef_search=1), - hybrid_search_config=config, - ) - await vs_with_tsv_column.aadd_documents(hybrid_docs) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + HYBRID_SEARCH_TABLE2, + VECTOR_SIZE, + id_column=Column("myid", "TEXT"), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + Column("doc_id_key", "TEXT"), + ], + store_metadata=False, + hybrid_search_config=config, + ), + ) - config = HybridSearchConfig( + vs_with_tsv_column = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE2, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=config, + ), + ) + await run_on_background(engine, vs_with_tsv_column.aadd_documents(hybrid_docs)) + + config_no_tsv = HybridSearchConfig( tsv_column="", # no TSV column fts_query=fts_query_match, fusion_function_parameters={ @@ -692,23 +811,32 @@ async def test_hybrid_search_without_tsv_column(self, engine): "secondary_results_weight": 0.1, }, ) - vs_without_tsv_column = await AsyncPostgresVectorStore.create( + vs_without_tsv_column = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=HYBRID_SEARCH_TABLE2, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["doc_id_key"], - index_query_options=HNSWQueryOptions(ef_search=1), - hybrid_search_config=config, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE2, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=config_no_tsv, + ), ) - results_with_tsv_column = await vs_with_tsv_column.asimilarity_search( - vector_query_far_off, k=1, hybrid_search_config=config + results_with_tsv_column = await run_on_background( + engine, + vs_with_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ), ) - results_without_tsv_column = await vs_without_tsv_column.asimilarity_search( - vector_query_far_off, k=1, hybrid_search_config=config + results_without_tsv_column = await run_on_background( + engine, + vs_without_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ), ) result_ids_with_tsv_column = [ doc.metadata["doc_id_key"] for doc in results_with_tsv_column @@ -720,5 +848,5 @@ async def test_hybrid_search_without_tsv_column(self, engine): # Expect results based purely on FTS search for "orange fruit" assert len(result_ids_with_tsv_column) == 1 assert len(result_ids_without_tsv_column) == 1 - assert result_ids_with_tsv_column[0] == "hs_doc_apple_tech" - assert result_ids_without_tsv_column[0] == "hs_doc_apple_tech" + assert result_ids_with_tsv_column[0] == "hs_doc_apple_fruit" + assert result_ids_without_tsv_column[0] == "hs_doc_apple_fruit" diff --git a/tests/test_engine.py b/tests/test_engine.py index 4a34c575..ca26236e 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid -from typing import Sequence +from typing import Any, Coroutine, Sequence import asyncpg # type: ignore import pytest @@ -52,27 +53,36 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop (if it exists).""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute( engine: PostgresEngine, query: str, ) -> None: - async def run(engine, query): + async def _impl(): async with engine._pool.connect() as conn: await conn.execute(text(query)) await conn.commit() - await engine._run_as_async(run(engine, query)) + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async def run(engine, query): + async def _impl(): async with engine._pool.connect() as conn: result = await conn.execute(text(query)) result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + return result_map.fetchall() - return await engine._run_as_async(run(engine, query)) + return await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="module") @@ -126,10 +136,14 @@ async def engine(self, db_project, db_region, db_instance, db_name): await engine.close() async def test_engine_args(self, engine): + # Accessing engine._pool.pool.status() is synchronous and safe on main loop objects + # assuming SQLAlchemy pool status doesn't strictly require loop context assert "Pool size: 3" in engine._pool.pool.status() async def test_init_table(self, engine): - await engine.ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + await run_on_background( + engine, engine.ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + ) id = str(uuid.uuid4()) content = "coffee" embedding = await embeddings_service.aembed_query(content) @@ -139,14 +153,17 @@ async def test_init_table(self, engine): await aexecute(engine, stmt) async def test_init_table_custom(self, engine): - await engine.ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="uuid", - content_column="my-content", - embedding_column="my_embedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=True, + await run_on_background( + engine, + engine.ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="uuid", + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + ), ) stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{CUSTOM_TABLE}';" results = await afetch(engine, stmt) @@ -162,14 +179,19 @@ async def test_init_table_custom(self, engine): assert row in expected async def test_init_table_with_int_id(self, engine): - await engine.ainit_vectorstore_table( - INT_ID_CUSTOM_TABLE, - VECTOR_SIZE, - id_column=Column(name="integer_id", data_type="INTEGER", nullable="False"), - content_column="my-content", - embedding_column="my_embedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=True, + await run_on_background( + engine, + engine.ainit_vectorstore_table( + INT_ID_CUSTOM_TABLE, + VECTOR_SIZE, + id_column=Column( + name="integer_id", data_type="INTEGER", nullable="False" + ), + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + ), ) stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{INT_ID_CUSTOM_TABLE}';" results = await afetch(engine, stmt) @@ -193,7 +215,10 @@ async def test_password( user, password, ): - PostgresEngine._connector = None + # Note: PostgresEngine._connector is no longer a class attribute in fixed engine.py + # But for test cleanup safety regarding the OLD code structure, we can ignore this. + # PostgresEngine._connector = None + engine = await PostgresEngine.afrom_instance( project_id=db_project, instance=db_instance, @@ -204,7 +229,6 @@ async def test_password( ) assert engine await aexecute(engine, "SELECT 1") - PostgresEngine._connector = None await engine.close() async def test_from_engine( @@ -216,7 +240,7 @@ async def test_from_engine( user, password, ): - async with Connector() as connector: + async with Connector(loop=asyncio.get_running_loop()) as connector: async def getconn() -> asyncpg.Connection: conn = await connector.connect_async( # type: ignore @@ -230,12 +254,12 @@ async def getconn() -> asyncpg.Connection: ) return conn - engine = create_async_engine( + engine_async = create_async_engine( "postgresql+asyncpg://", async_creator=getconn, ) - engine = PostgresEngine.from_engine(engine) + engine = PostgresEngine.from_engine(engine_async) await aexecute(engine, "SELECT 1") await engine.close() @@ -331,7 +355,11 @@ async def test_iam_account_override( async def test_ainit_checkpoint_writes_table(self, engine): table_name = f"checkpoint{uuid.uuid4()}" table_name_writes = f"{table_name}_writes" - await engine.ainit_checkpoint_table(table_name=table_name) + + await run_on_background( + engine, engine.ainit_checkpoint_table(table_name=table_name) + ) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name_writes}';" results = await afetch(engine, stmt) expected = [ @@ -354,9 +382,9 @@ async def test_ainit_checkpoint_writes_table(self, engine): {"column_name": "checkpoint_ns", "data_type": "text"}, {"column_name": "checkpoint_id", "data_type": "text"}, {"column_name": "parent_checkpoint_id", "data_type": "text"}, + {"column_name": "type", "data_type": "text"}, {"column_name": "checkpoint", "data_type": "bytea"}, {"column_name": "metadata", "data_type": "bytea"}, - {"column_name": "type", "data_type": "text"}, ] for row in results: assert row in expected @@ -364,15 +392,18 @@ async def test_ainit_checkpoint_writes_table(self, engine): await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name_writes}"') async def test_init_table_hybrid_search(self, engine): - await engine.ainit_vectorstore_table( - HYBRID_SEARCH_TABLE, - VECTOR_SIZE, - id_column="uuid", - content_column="my-content", - embedding_column="my_embedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=True, - hybrid_search_config=HybridSearchConfig(), + await run_on_background( + engine, + engine.ainit_vectorstore_table( + HYBRID_SEARCH_TABLE, + VECTOR_SIZE, + id_column="uuid", + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + hybrid_search_config=HybridSearchConfig(), + ), ) stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{HYBRID_SEARCH_TABLE}';" results = await afetch(engine, stmt) @@ -435,11 +466,12 @@ async def engine(self, db_project, db_region, db_instance, db_name): await engine.close() async def test_init_table(self, engine): + # Sync method uses _run_as_sync internally -> safe to call on Main Loop engine.init_vectorstore_table(DEFAULT_TABLE_SYNC, VECTOR_SIZE) + id = str(uuid.uuid4()) content = "coffee" embedding = await embeddings_service.aembed_query(content) - # Note: DeterministicFakeEmbedding generates a numpy array, converting to list a list of float values embedding_string = [float(dimension) for dimension in embedding] stmt = f"INSERT INTO {DEFAULT_TABLE_SYNC} (langchain_id, content, embedding) VALUES ('{id}', '{content}','{embedding_string}');" await aexecute(engine, stmt) @@ -499,7 +531,6 @@ async def test_password( user, password, ): - PostgresEngine._connector = None engine = PostgresEngine.from_instance( project_id=db_project, instance=db_instance, @@ -511,7 +542,6 @@ async def test_password( ) assert engine await aexecute(engine, "SELECT 1") - PostgresEngine._connector = None await engine.close() async def test_engine_constructor_key( @@ -520,7 +550,7 @@ async def test_engine_constructor_key( ): key = object() with pytest.raises(Exception): - PostgresEngine(key, engine) + PostgresEngine(key, engine, None, None) async def test_iam_account_override( self, @@ -545,7 +575,9 @@ async def test_iam_account_override( async def test_init_checkpoints_table(self, engine): table_name = f"checkpoint{uuid.uuid4()}" table_name_writes = f"{table_name}_writes" + engine.init_checkpoint_table(table_name=table_name) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}';" results = await afetch(engine, stmt) expected = [ diff --git a/tests/test_vectorstore.py b/tests/test_vectorstore.py index 4e82cab6..ca0c6786 100644 --- a/tests/test_vectorstore.py +++ b/tests/test_vectorstore.py @@ -364,7 +364,7 @@ async def test_from_engine( user, password, ): - async with Connector() as connector: + async with Connector(loop=asyncio.get_running_loop()) as connector: async def getconn(): conn = await connector.connect_async( # type: ignore From b97060e1fd69f1902c370c90218b1e61b72050b8 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Fri, 12 Dec 2025 09:02:15 +0000 Subject: [PATCH 48/61] feat: Disable support for python 3.9 and enable support for python3.13 (#378) * chore!: Disable support for python 3.9 and enable support for python3.13 * Update supported Python version to 3.10 * Update Python version requirements to 3.10 * Update requirements.txt to remove old numpy version Removed numpy version constraint for Python <= 3.9. * Update integration.cloudbuild.yaml * Clean up Renovate config by removing disabled rules Removed disabled numpy and isort update configurations for Python versions. * trigger cloud build * trigger cloud build --------- Co-authored-by: Averi Kitsch --- .github/renovate.json5 | 20 -------------------- DEVELOPER.md | 6 +++--- README.rst | 2 +- integration.cloudbuild.yaml | 2 +- pyproject.toml | 10 ++++------ requirements.txt | 1 - 6 files changed, 9 insertions(+), 32 deletions(-) diff --git a/.github/renovate.json5 b/.github/renovate.json5 index 93a38cf3..05a3fdf6 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -48,12 +48,6 @@ "matchCurrentVersion": "<=2.2.6", "enabled": false }, - { - "description": "Disable numpy updates for python <=3.9 in requirements.txt", - "matchPackageNames": ["numpy"], - "matchCurrentVersion": "<=2.0.2", - "enabled": false - }, { "description": "Disable numpy updates for python 3.10 in pyproject.toml", "matchFileNames": ["pyproject.toml"], @@ -61,25 +55,11 @@ "matchCurrentValue": ">=1.24.4, <=2.2.6", "enabled": false }, - { - "description": "Disable numpy updates for python <=3.9 in pyproject.toml", - "matchFileNames": ["pyproject.toml"], - "matchPackageNames": ["numpy"], - "matchCurrentValue": ">=1.24.4, <=2.0.2", - "enabled": false - }, { "description": "Use feat commit type for LangChain Postgres dependency updates", "matchPackageNames": ["langchain-postgres"], "semanticCommitType": "feat", "groupName": "langchain-postgres" }, - { - "description": "Disable isort updates for python <=3.9 in pyproject.toml", - "matchFileNames": ["pyproject.toml"], - "matchPackageNames": ["isort"], - "matchCurrentValue": "==6.1.0", - "enabled": false - } ], } diff --git a/DEVELOPER.md b/DEVELOPER.md index 899f62b6..751df2e7 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -42,11 +42,11 @@ These tests are registered as required tests in `.github/sync-repo-settings.yaml #### Trigger Setup -Cloud Build triggers (for Python versions 3.9 to 3.11) were created with the following specs: +Cloud Build triggers (for Python versions 3.10 to 3.13) were created with the following specs: ```YAML name: pg-integration-test-pr-py39 -description: Run integration tests on PR for Python 3.9 +description: Run integration tests on PR for Python 3.10 filename: integration.cloudbuild.yaml github: name: langchain-google-cloud-sql-pg-python @@ -64,7 +64,7 @@ substitutions: _DATABASE_ID: _INSTANCE_ID: _REGION: us-central1 - _VERSION: "3.9" + _VERSION: "3.10" ``` Use `gcloud builds triggers import --source=trigger.yaml` to create triggers via the command line diff --git a/README.rst b/README.rst index 9839b661..d1e258c5 100644 --- a/README.rst +++ b/README.rst @@ -56,7 +56,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.9 +Python >= 3.10 Mac/Linux ^^^^^^^^^ diff --git a/integration.cloudbuild.yaml b/integration.cloudbuild.yaml index 18414b8e..bc0b0d8f 100644 --- a/integration.cloudbuild.yaml +++ b/integration.cloudbuild.yaml @@ -62,7 +62,7 @@ substitutions: _DATABASE_PORT: "5432" _DATABASE_ID: test-database _REGION: us-central1 - _VERSION: "3.9" + _VERSION: "3.10" _IP_ADDRESS: "127.0.0.1" options: diff --git a/pyproject.toml b/pyproject.toml index c8232d20..24282b77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ dynamic = ["version"] description = "LangChain integrations for Google Cloud SQL for PostgreSQL" readme = "README.rst" license = {file = "LICENSE"} -requires-python = ">=3.9" +requires-python = ">=3.10" authors = [ {name = "Google LLC", email = "googleapis-packages@google.com"} ] @@ -13,7 +13,6 @@ dependencies = [ "cloud-sql-python-connector[asyncpg] >= 1.10.0, <2.0.0", "numpy>=1.24.4, <3.0.0; python_version >= '3.11'", "numpy>=1.24.4, <=2.2.6; python_version == '3.10'", - "numpy>=1.24.4, <=2.0.2; python_version <= '3.9'", "langchain-postgres>=0.0.16", ] @@ -22,10 +21,10 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] [tool.setuptools.dynamic] @@ -43,8 +42,7 @@ langgraph = [ ] test = [ "black[jupyter]==25.9.0", - "isort==6.1.0; python_version == '3.9'", - "isort==7.0.0; python_version >= '3.10'", + "isort==7.0.0", "mypy==1.18.2", "pytest-asyncio==0.26.0", "pytest==8.4.2", @@ -64,7 +62,7 @@ target-version = ['py39'] profile = "black" [tool.mypy] -python_version = 3.9 +python_version = "3.10" warn_unused_configs = true disallow_incomplete_defs = true diff --git a/requirements.txt b/requirements.txt index 9eacfe4b..adf7896c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ cloud-sql-python-connector[asyncpg]==1.18.5 numpy==2.3.3; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" -numpy==2.0.2; python_version <= "3.9" langgraph==0.6.10 langchain-postgres==0.0.16 From 8ad4f48fc08ec5723aa9c2e35f474c07f680d6bb Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Dec 2025 09:05:18 +0000 Subject: [PATCH 49/61] chore(deps): update github actions (#376) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- .github/workflows/docs.yml | 4 ++-- .github/workflows/lint.yml | 2 +- .github/workflows/schedule_reporter.yml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 36f543dd..686cc004 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: @@ -26,7 +26,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index afe53954..4344058a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,7 +31,7 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 - name: Setup Python uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index d3eed028..e476db7b 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@01b66222139586a87699ccccf938d1574ec94f85 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@593ae87530a872417f3edf063ef055d5a72034c8 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 49e600fb077683c1a0d926a3078ec478fbfdc0c1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 12 Dec 2025 15:54:49 +0000 Subject: [PATCH 50/61] chore(deps): update dependency langchain-google-vertexai to v3.2.0 (#371) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index cb85477a..1f866445 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-community==0.4.1 langchain-google-cloud-sql-pg==0.14.1 -langchain-google-vertexai==3.0.3 +langchain-google-vertexai==3.2.0 From 5ba551ed10f6b00519337e299688084990e82d04 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Mon, 15 Dec 2025 18:55:24 +0000 Subject: [PATCH 51/61] chore: Refactor query in reasoning engine sample to run on background thread (#381) * chore: Refactor query in sample to run on background thread * Update prebuilt_langchain_agent_template.py * Update embedding model name in retriever agent * Update model name for Vertex AI embeddings * Run table deletion logic in background coroutine Refactor delete_tables to run logic in background coroutine. * Update google-cloud-aiplatform version to 1.121.0 * Update create_embeddings.py * Update create_embeddings.py * Update clean_up.py * Update clean_up.py * Update clean_up.py * Update clean_up.py * Update clean_up.py * Update clean_up.py --- samples/langchain_on_vertexai/clean_up.py | 22 +++++++++++++---- .../create_embeddings.py | 24 +++++++++++++++---- .../prebuilt_langchain_agent_template.py | 2 +- .../langchain_on_vertexai/requirements.txt | 2 +- .../retriever_agent_with_history_template.py | 2 +- .../retriever_chain_template.py | 2 +- 6 files changed, 40 insertions(+), 14 deletions(-) diff --git a/samples/langchain_on_vertexai/clean_up.py b/samples/langchain_on_vertexai/clean_up.py index 45e57ae5..42c3866a 100644 --- a/samples/langchain_on_vertexai/clean_up.py +++ b/samples/langchain_on_vertexai/clean_up.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio import os +from typing import Any, Coroutine from config import ( CHAT_TABLE_NAME, @@ -32,6 +33,15 @@ TEST_NAME = os.getenv("DISPLAY_NAME") +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._default_loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._default_loop) + ) + return await coro + + async def delete_tables(): engine = await PostgresEngine.afrom_instance( PROJECT_ID, @@ -42,12 +52,14 @@ async def delete_tables(): password=PASSWORD, ) - async with engine._pool.connect() as conn: - await conn.execute(text("COMMIT")) - await conn.execute(text(f"DROP TABLE IF EXISTS {TABLE_NAME}")) - await conn.execute(text(f"DROP TABLE IF EXISTS {CHAT_TABLE_NAME}")) + async def _logic(): + async with engine._pool.connect() as conn: + await conn.execute(text("COMMIT")) + await conn.execute(text(f"DROP TABLE IF EXISTS {TABLE_NAME}")) + await conn.execute(text(f"DROP TABLE IF EXISTS {CHAT_TABLE_NAME}")) + + await run_on_background(engine, _logic()) await engine.close() - await engine._connector.close_async() def delete_engines(): diff --git a/samples/langchain_on_vertexai/create_embeddings.py b/samples/langchain_on_vertexai/create_embeddings.py index 105a86df..370d8262 100644 --- a/samples/langchain_on_vertexai/create_embeddings.py +++ b/samples/langchain_on_vertexai/create_embeddings.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio import uuid +from typing import Any, Coroutine from config import ( CHAT_TABLE_NAME, @@ -32,6 +33,15 @@ from langchain_google_cloud_sql_pg import PostgresEngine, PostgresVectorStore +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._default_loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._default_loop) + ) + return await coro + + async def create_databases(): engine = await PostgresEngine.afrom_instance( PROJECT_ID, @@ -41,10 +51,14 @@ async def create_databases(): user=USER, password=PASSWORD, ) - async with engine._pool.connect() as conn: - await conn.execute(text("COMMIT")) - await conn.execute(text(f'DROP DATABASE IF EXISTS "{DATABASE}"')) - await conn.execute(text(f'CREATE DATABASE "{DATABASE}"')) + + async def _logic(): + async with engine._pool.connect() as conn: + await conn.execute(text("COMMIT")) + await conn.execute(text(f'DROP DATABASE IF EXISTS "{DATABASE}"')) + await conn.execute(text(f'CREATE DATABASE "{DATABASE}"')) + + await run_on_background(engine, _logic()) await engine.close() @@ -95,7 +109,7 @@ async def grant_select(engine): engine, table_name=TABLE_NAME, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=PROJECT_ID + model_name="text-embedding-005", project=PROJECT_ID ), ) diff --git a/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py b/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py index 472b9da9..9e492783 100644 --- a/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py +++ b/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py @@ -65,7 +65,7 @@ def similarity_search(query: str) -> list[Document]: engine, table_name=TABLE_NAME, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=PROJECT_ID + model_name="text-embedding-005", project=PROJECT_ID ), ) retriever = vector_store.as_retriever() diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index f841a4c3..064bf76a 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.120.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.121.0 google-cloud-resource-manager==1.14.2 langchain-community==0.3.31 langchain-google-cloud-sql-pg==0.14.1 diff --git a/samples/langchain_on_vertexai/retriever_agent_with_history_template.py b/samples/langchain_on_vertexai/retriever_agent_with_history_template.py index 7d8a520e..2867d041 100644 --- a/samples/langchain_on_vertexai/retriever_agent_with_history_template.py +++ b/samples/langchain_on_vertexai/retriever_agent_with_history_template.py @@ -91,7 +91,7 @@ def set_up(self): engine, table_name=self.table, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=self.project + model_name="text-embedding-005", project=self.project ), ) retriever = vector_store.as_retriever() diff --git a/samples/langchain_on_vertexai/retriever_chain_template.py b/samples/langchain_on_vertexai/retriever_chain_template.py index d05780c3..8abfbb21 100644 --- a/samples/langchain_on_vertexai/retriever_chain_template.py +++ b/samples/langchain_on_vertexai/retriever_chain_template.py @@ -97,7 +97,7 @@ def set_up(self): engine, table_name=self.table, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=self.project + model_name="text-embedding-005", project=self.project ), ) retriever = vector_store.as_retriever() From bf1b11976c7fbef568f9e6787e796dd78b478fc0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 15 Dec 2025 19:09:01 +0000 Subject: [PATCH 52/61] chore(deps): update dependency langchain-tests to v1 (#356) Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 24282b77..2d456b60 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ test = [ "pytest-asyncio==0.26.0", "pytest==8.4.2", "pytest-cov==7.0.0", - "langchain-tests==0.3.22", + "langchain-tests==1.1.0", "langgraph==0.6.10" ] From 7a841b357c998bce7c6aede0e2e5fed8fa48f198 Mon Sep 17 00:00:00 2001 From: dishaprakash <57954147+dishaprakash@users.noreply.github.com> Date: Tue, 16 Dec 2025 18:55:55 +0000 Subject: [PATCH 53/61] feat: Update Langgraph dependency to v1 (#379) * chore: Update Langgraph dependency to v1 * Upgrade langgraph to version 1.0.4 Updated langgraph version to 1.0.4. * Update langgraph dependency version to 1.0.4 * Switch to json for metadata handling Replaced jsonplus_serde with json for metadata serialization and deserialization. * Update async_checkpoint.py * Refactor async functions to synchronous in create_embeddings.py * Convert database and vector store functions to async Refactor create_databases and create_vectorstore functions to be asynchronous. Update database initialization and vector store creation to use async/await syntax. * Update async_checkpoint.py --- pyproject.toml | 2 +- requirements.txt | 2 +- src/langchain_google_cloud_sql_pg/async_checkpoint.py | 8 +++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2d456b60..2b161795 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ test = [ "pytest==8.4.2", "pytest-cov==7.0.0", "langchain-tests==1.1.0", - "langgraph==0.6.10" + "langgraph==1.0.4" ] [build-system] diff --git a/requirements.txt b/requirements.txt index adf7896c..bacbe361 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ cloud-sql-python-connector[asyncpg]==1.18.5 numpy==2.3.3; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" -langgraph==0.6.10 +langgraph==1.0.4 langchain-postgres==0.0.16 diff --git a/src/langchain_google_cloud_sql_pg/async_checkpoint.py b/src/langchain_google_cloud_sql_pg/async_checkpoint.py index fc875991..32eef521 100644 --- a/src/langchain_google_cloud_sql_pg/async_checkpoint.py +++ b/src/langchain_google_cloud_sql_pg/async_checkpoint.py @@ -276,7 +276,9 @@ async def aput( async with self.pool.connect() as conn: type_, serialized_checkpoint = self.serde.dumps_typed(checkpoint) - serialized_metadata = self.jsonplus_serde.dumps(metadata) + serialized_metadata = json.dumps(metadata, ensure_ascii=False).encode( + "utf-8", "ignore" + ) await conn.execute( text(query), { @@ -409,7 +411,7 @@ async def alist( (value["type"], value["checkpoint"]) ), metadata=( - self.jsonplus_serde.loads(value["metadata"]) # type: ignore + json.loads(value["metadata"]) # type: ignore if value["metadata"] is not None else {} ), @@ -494,7 +496,7 @@ async def aget_tuple(self, config: RunnableConfig) -> Optional[CheckpointTuple]: }, checkpoint=self.serde.loads_typed((value["type"], value["checkpoint"])), metadata=( - self.jsonplus_serde.loads(value["metadata"]) # type: ignore + json.loads(value["metadata"]) # type: ignore if value["metadata"] is not None else {} ), From 83db4c2db6a6c40f39889d3c526db40826792244 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 16 Dec 2025 19:36:00 +0000 Subject: [PATCH 54/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 671ccaf (#382) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index e476db7b..e2e404fd 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@593ae87530a872417f3edf063ef055d5a72034c8 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@671ccaff2d9f785634afb734f88755fde4b123c8 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 991db4a7950c16e49a296a8b8930ed0ce1f47791 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 17 Dec 2025 08:05:02 +0000 Subject: [PATCH 55/61] chore(deps): update all non-major dependencies (#353) * chore(deps): update all non-major dependencies * Update requirements.txt * Update pyproject.toml * Update requirements.txt * Update prebuilt_langchain_agent_template.py * Update prebuilt_langchain_agent_template.py * Update retriever_agent_with_history_template.py * Update retriever_agent_with_history_template.py * Update retriever_chain_template.py * Update retriever_chain_template.py * Update retriever_agent_with_history_template.py * Update prebuilt_langchain_agent_template.py --------- Co-authored-by: dishaprakash <57954147+dishaprakash@users.noreply.github.com> --- pyproject.toml | 4 ++-- requirements.txt | 4 ++-- .../prebuilt_langchain_agent_template.py | 6 +++--- .../retriever_agent_with_history_template.py | 6 +++--- samples/langchain_on_vertexai/retriever_chain_template.py | 6 +++--- samples/requirements.txt | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2b161795..737ff6f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,9 +41,9 @@ langgraph = [ "langgraph-checkpoint>=3.0.0, <3.1.0" ] test = [ - "black[jupyter]==25.9.0", + "black[jupyter]==25.12.0", "isort==7.0.0", - "mypy==1.18.2", + "mypy==1.19.1", "pytest-asyncio==0.26.0", "pytest==8.4.2", "pytest-cov==7.0.0", diff --git a/requirements.txt b/requirements.txt index bacbe361..feb341ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -cloud-sql-python-connector[asyncpg]==1.18.5 -numpy==2.3.3; python_version >= "3.11" +cloud-sql-python-connector[asyncpg]==1.19.0 +numpy==2.3.5; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" langgraph==1.0.4 langchain-postgres==0.0.16 diff --git a/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py b/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py index 9e492783..efd7fb58 100644 --- a/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py +++ b/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py @@ -91,9 +91,9 @@ def similarity_search(query: str) -> list[Document]: DISPLAY_NAME = os.getenv("DISPLAY_NAME") or "PrebuiltAgent" remote_app = reasoning_engines.ReasoningEngine.create( - reasoning_engines.LangchainAgent( + reasoning_engines.LangchainAgent( # type: ignore[arg-type] model="gemini-2.0-flash-001", - tools=[similarity_search], + tools=[similarity_search], # type: ignore[list-item] model_kwargs={ "temperature": 0.1, }, @@ -104,4 +104,4 @@ def similarity_search(query: str) -> list[Document]: extra_packages=["config.py"], ) -print(remote_app.query(input="movies about engineers")) +print(remote_app.query(input="movies about engineers")) # type: ignore[attr-defined] diff --git a/samples/langchain_on_vertexai/retriever_agent_with_history_template.py b/samples/langchain_on_vertexai/retriever_agent_with_history_template.py index 2867d041..bba06a16 100644 --- a/samples/langchain_on_vertexai/retriever_agent_with_history_template.py +++ b/samples/langchain_on_vertexai/retriever_agent_with_history_template.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -from typing import Optional +from typing import Any, Optional import vertexai # type: ignore from config import ( @@ -132,7 +132,7 @@ def set_up(self): history_messages_key="chat_history", ) - def query(self, input: str, session_id: str) -> str: + def query(self, input: str, session_id: str, **kwargs: Any) -> str: # type: ignore[override] """Query the application. Args: @@ -192,4 +192,4 @@ def query(self, input: str, session_id: str) -> str: extra_packages=["config.py"], ) -print(remote_app.query(input="movies about engineers", session_id="abc123")) +print(remote_app.query(input="movies about engineers", session_id="abc123")) # type: ignore diff --git a/samples/langchain_on_vertexai/retriever_chain_template.py b/samples/langchain_on_vertexai/retriever_chain_template.py index 8abfbb21..0d322ba8 100644 --- a/samples/langchain_on_vertexai/retriever_chain_template.py +++ b/samples/langchain_on_vertexai/retriever_chain_template.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -from typing import Optional +from typing import Any, Optional import vertexai # type: ignore from config import ( @@ -106,7 +106,7 @@ def set_up(self): # an LLM to generate a response self.chain = create_retrieval_chain(retriever, combine_docs_chain) - def query(self, input: str) -> str: + def query(self, input: str, **kwargs: Any) -> str: # type: ignore[override] """Query the application. Args: @@ -161,4 +161,4 @@ def query(self, input: str) -> str: extra_packages=["config.py"], ) -print(remote_app.query(input="movies about engineers")) +print(remote_app.query(input="movies about engineers")) # type: ignore diff --git a/samples/requirements.txt b/samples/requirements.txt index b6b27ad1..fa739a07 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.97.0 -google-cloud-resource-manager==1.14.2 +google-cloud-aiplatform[reasoningengine,langchain]==1.130.0 +google-cloud-resource-manager==1.15.0 langchain-community==0.3.29 langchain-google-cloud-sql-pg==0.14.1 -langchain-google-vertexai==2.0.27 +langchain-google-vertexai==2.1.2 From 83113f9c0f671907eac0b3b76f66109eeecaba6a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 17 Dec 2025 08:07:31 +0000 Subject: [PATCH 56/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 9eb64e3 (#384) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index e2e404fd..685ec598 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@671ccaff2d9f785634afb734f88755fde4b123c8 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9eb64e3f8f38ddf07c49c03e57110891311bbdfb with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 8d204d9342c793131f0ee9c816186ea97a0ab726 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 18 Dec 2025 23:17:51 +0000 Subject: [PATCH 57/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to a1864c1 (#386) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 685ec598..4b098acb 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@9eb64e3f8f38ddf07c49c03e57110891311bbdfb + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@a1864c1466a227c83ba963c875e08e29dacecf5c with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From f40a975e944e52bd5053f4acbd7a5b9ab5212e7d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 20 Dec 2025 08:27:31 +0000 Subject: [PATCH 58/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to 380d297 (#387) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index 4b098acb..be70fba7 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@a1864c1466a227c83ba963c875e08e29dacecf5c + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@380d297eca7d0f3079200926bba37d89b5708448 with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 6f76135fcbc13943b95f97a6817edb508ec14003 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 5 Jan 2026 13:49:08 +0000 Subject: [PATCH 59/61] chore(deps): update googleapis/langchain-google-alloydb-pg-python digest to fabff9f (#388) --- .github/workflows/schedule_reporter.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index be70fba7..4448dfff 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@380d297eca7d0f3079200926bba37d89b5708448 + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@fabff9f2b5312824e0a3a3723b45dd302a83366c with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" From 348d92400ac9a5350c06be466a6ce4e70ec4f77a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 8 Jan 2026 11:53:47 +0000 Subject: [PATCH 60/61] chore(deps): update dependency langchain-google-vertexai to v3.2.1 (#390) --- samples/index_tuning_sample/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 1f866445..52a63c3a 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ langchain-community==0.4.1 langchain-google-cloud-sql-pg==0.14.1 -langchain-google-vertexai==3.2.0 +langchain-google-vertexai==3.2.1 From 3a15f707ca9afa97a5c674efb41c3e2b48615987 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 13 Jan 2026 14:00:31 +0530 Subject: [PATCH 61/61] chore(main): release 0.15.0 (#322) * chore(main): release 0.15.0 * Update CHANGELOG.md --------- Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Averi Kitsch --- CHANGELOG.md | 23 ++++++++++++++++++++ src/langchain_google_cloud_sql_pg/version.py | 2 +- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 445b05ea..8765db3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## [0.15.0](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/compare/v0.14.1...v0.15.0) (2026-01-08) + + +### ⚠ BREAKING CHANGES + +* Refactor PostgresVectorStore and PostgresEngine to depend on PGVectorstore and PGEngine respectively ([#316](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/316)) + +### Features + +* **deps:** Update langchain-postgres to v0.0.16 ([#366](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/366)) ([e773505](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/e773505453683dad5681e6155831b710cbc7fcc1)) +* Disable support for python 3.9 and enable support for python3.13 ([#378](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/378)) ([b97060e](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/b97060e1fd69f1902c370c90218b1e61b72050b8)) +* Update Langgraph dependency to v1 ([#379](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/379)) ([7a841b3](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/7a841b357c998bce7c6aede0e2e5fed8fa48f198)) + + +### Documentation + +* Add Hybrid Search documentation ([#329](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/329)) ([14098ca](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/14098ca7a6cf7116e6edbcb7a5c6c3ccbce76b4a)) + + +### Code Refactoring + +* Refactor PostgresVectorStore and PostgresEngine to depend on PGVectorstore and PGEngine respectively ([#316](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/316)) ([7917d62](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/7917d62c3f9ea2c6ca8ab8d6284cfa2c7e535401)) + ## [0.14.1](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/compare/v0.14.0...v0.14.1) (2025-07-11) diff --git a/src/langchain_google_cloud_sql_pg/version.py b/src/langchain_google_cloud_sql_pg/version.py index f735a04c..a9b14a39 100644 --- a/src/langchain_google_cloud_sql_pg/version.py +++ b/src/langchain_google_cloud_sql_pg/version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "0.14.1" +__version__ = "0.15.0"