diff --git a/.github/renovate.json5 b/.github/renovate.json5 index 995819bb..05a3fdf6 100644 --- a/.github/renovate.json5 +++ b/.github/renovate.json5 @@ -1,49 +1,65 @@ { - "extends": [ - "config:base", // https://docs.renovatebot.com/presets-config/#configbase - ":semanticCommitTypeAll(chore)", // https://docs.renovatebot.com/presets-default/#semanticcommittypeallarg0 - ":ignoreUnstable", // https://docs.renovatebot.com/presets-default/#ignoreunstable - "group:allNonMajor", // https://docs.renovatebot.com/presets-group/#groupallnonmajor - ":separateMajorReleases", // https://docs.renovatebot.com/presets-default/#separatemajorreleases - ":prConcurrentLimitNone", // View complete backlog as PRs. https://docs.renovatebot.com/presets-default/#prconcurrentlimitnone - ":prHourlyLimitNone", // https://docs.renovatebot.com/presets-default/#prhourlylimitnone - ":preserveSemverRanges", + extends: [ + 'config:recommended', + ':semanticCommitTypeAll(chore)', + ':ignoreUnstable', + 'group:allNonMajor', + ':separateMajorReleases', + ':prConcurrentLimitNone', + ':prHourlyLimitNone', + ':preserveSemverRanges', ], - "ignorePaths": [".kokoro/**"], // Dependabot will make security updates - - // Give ecosystem time to catch up. - // npm allows maintainers to unpublish a release up to 3 days later. - // https://docs.renovatebot.com/configuration-options/#minimumreleaseage - "minimumReleaseAge": "3", - - // Create PRs, but do not update them without manual action. - // Reduces spurious retesting in repositories that have many PRs at a time. - // https://docs.renovatebot.com/configuration-options/#rebasewhen - "rebaseWhen": "never", - - // Organizational processes. - // https://docs.renovatebot.com/configuration-options/#dependencydashboardlabels - "dependencyDashboardLabels": [ - "type: process", + ignorePaths: [ + '.kokoro/**', ], - "packageRules": [ - + minimumReleaseAge: '3', + rebaseWhen: 'conflicted', + dependencyDashboardLabels: [ + 'type: process', + ], + packageRules: [ + { + matchFileNames: ["samples/index_tuning_sample/**"], + groupName: "samples-index_tuning_sample", + }, + { + matchFileNames: ["samples/langchain_on_vertexai/**"], + groupName: "samples-langchain_on_vertexai", + }, + { + groupName: 'GitHub Actions', + matchManagers: [ + 'github-actions', + ], + pinDigests: true, + }, + { + matchPackageNames: [ + 'pytest', + ], + matchUpdateTypes: [ + 'minor', + 'major', + ], + }, { - "groupName": "GitHub Actions", - "matchManagers": ["github-actions"], - "pinDigests": true, + "description": "Disable numpy updates for python 3.10 in requirements.txt", + "matchPackageNames": ["numpy"], + "matchCurrentVersion": "<=2.2.6", + "enabled": false }, - - // Python Specific { - "matchPackageNames": ["pytest"], - "matchUpdateTypes": ["minor", "major"] + "description": "Disable numpy updates for python 3.10 in pyproject.toml", + "matchFileNames": ["pyproject.toml"], + "matchPackageNames": ["numpy"], + "matchCurrentValue": ">=1.24.4, <=2.2.6", + "enabled": false }, { - "groupName": "python-nonmajor", - "matchLanguages": ["python"], - "matchUpdateTypes": ["minor", "patch"], + "description": "Use feat commit type for LangChain Postgres dependency updates", + "matchPackageNames": ["langchain-postgres"], + "semanticCommitType": "feat", + "groupName": "langchain-postgres" }, - ], } diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index d6cbeaef..686cc004 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -10,9 +10,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: python-version: "3.10" - name: Install nox @@ -26,9 +26,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6 with: python-version: "3.10" - name: Install nox diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b9721507..4344058a 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -31,10 +31,10 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6 - name: Setup Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 + uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0 with: python-version: "3.11" diff --git a/.github/workflows/schedule_reporter.yml b/.github/workflows/schedule_reporter.yml index b449c599..4448dfff 100644 --- a/.github/workflows/schedule_reporter.yml +++ b/.github/workflows/schedule_reporter.yml @@ -24,6 +24,6 @@ jobs: issues: 'write' checks: 'read' contents: 'read' - uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@fb89146037a50ca9d96801be3208bc1c3efcd50d + uses: googleapis/langchain-google-alloydb-pg-python/.github/workflows/cloud_build_failure_reporter.yml@fabff9f2b5312824e0a3a3723b45dd302a83366c with: trigger_names: "pg-integration-test-nightly,pg-continuous-test-on-merge" diff --git a/CHANGELOG.md b/CHANGELOG.md index 445b05ea..8765db3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,28 @@ # Changelog +## [0.15.0](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/compare/v0.14.1...v0.15.0) (2026-01-08) + + +### ⚠ BREAKING CHANGES + +* Refactor PostgresVectorStore and PostgresEngine to depend on PGVectorstore and PGEngine respectively ([#316](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/316)) + +### Features + +* **deps:** Update langchain-postgres to v0.0.16 ([#366](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/366)) ([e773505](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/e773505453683dad5681e6155831b710cbc7fcc1)) +* Disable support for python 3.9 and enable support for python3.13 ([#378](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/378)) ([b97060e](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/b97060e1fd69f1902c370c90218b1e61b72050b8)) +* Update Langgraph dependency to v1 ([#379](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/379)) ([7a841b3](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/7a841b357c998bce7c6aede0e2e5fed8fa48f198)) + + +### Documentation + +* Add Hybrid Search documentation ([#329](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/329)) ([14098ca](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/14098ca7a6cf7116e6edbcb7a5c6c3ccbce76b4a)) + + +### Code Refactoring + +* Refactor PostgresVectorStore and PostgresEngine to depend on PGVectorstore and PGEngine respectively ([#316](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/issues/316)) ([7917d62](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/commit/7917d62c3f9ea2c6ca8ab8d6284cfa2c7e535401)) + ## [0.14.1](https://github.com/googleapis/langchain-google-cloud-sql-pg-python/compare/v0.14.0...v0.14.1) (2025-07-11) diff --git a/DEVELOPER.md b/DEVELOPER.md index 899f62b6..751df2e7 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -42,11 +42,11 @@ These tests are registered as required tests in `.github/sync-repo-settings.yaml #### Trigger Setup -Cloud Build triggers (for Python versions 3.9 to 3.11) were created with the following specs: +Cloud Build triggers (for Python versions 3.10 to 3.13) were created with the following specs: ```YAML name: pg-integration-test-pr-py39 -description: Run integration tests on PR for Python 3.9 +description: Run integration tests on PR for Python 3.10 filename: integration.cloudbuild.yaml github: name: langchain-google-cloud-sql-pg-python @@ -64,7 +64,7 @@ substitutions: _DATABASE_ID: _INSTANCE_ID: _REGION: us-central1 - _VERSION: "3.9" + _VERSION: "3.10" ``` Use `gcloud builds triggers import --source=trigger.yaml` to create triggers via the command line diff --git a/README.rst b/README.rst index 6833433b..d1e258c5 100644 --- a/README.rst +++ b/README.rst @@ -56,7 +56,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.9 +Python >= 3.10 Mac/Linux ^^^^^^^^^ @@ -111,6 +111,26 @@ Use a Vector Store to store embedded data and perform vector search. embeddings=embedding_service ) +Hybrid search +~~~~~~~~~~~~~ + +The `PostgresVectorStore` supports hybrid search (dense vectors + full text) for more comprehensive and relevant search results. + +.. code-block:: python + + from langchain_google_cloud_sql_pg import HybridSearchConfig, reciprocal_rank_fusion + + vs = PostgresVectorStore.create_sync( + engine=engine, + table_name=TABLE_NAME, + embedding_service=embedding, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion + ), + ) + hybrid_docs = vector_store.similarity_search("products", k=5) + + See the full `Vector Store`_ tutorial. .. _`Vector Store`: https://github.com/googleapis/langchain-google-cloud-sql-pg-python/tree/main/docs/vector_store.ipynb diff --git a/docs/vector_store.ipynb b/docs/vector_store.ipynb index cf2814fe..ddc5ce30 100644 --- a/docs/vector_store.ipynb +++ b/docs/vector_store.ipynb @@ -585,10 +585,49 @@ "all_texts = [\"Apples and oranges\", \"Cars and airplanes\", \"Pineapple\", \"Train\", \"Banana\"]\n", "metadatas = [{\"len\": len(t)} for t in all_texts]\n", "ids = [str(uuid.uuid4()) for _ in all_texts]\n", - "await custom_store.aadd_texts(all_texts, metadatas=metadatas, ids=ids)\n", + "await custom_store.aadd_texts(all_texts, metadatas=metadatas, ids=ids)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For v0.15.0+\n", "\n", + "**Important Update:** Support for string filters has been deprecated. Please use dictionaries to add filters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "# Use filter on search\n", - "docs = await custom_store.asimilarity_search_by_vector(query_vector, filter=\"len >= 6\")\n", + "docs = await custom_store.asimilarity_search_by_vector(\n", + " query_vector, filter={\"len\": {\"$gte\": 6}}\n", + ")\n", + "\n", + "print(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### For v0.14 and under\n", + "\n", + "You can make use of the string filters to filter on metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use filter on search\n", + "docs = await custom_store.asimilarity_search(query, filter=\"len >= 6\")\n", "\n", "print(docs)" ] @@ -623,6 +662,357 @@ "docs = await loader.aload()\n", "print(docs)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a Vector Store using existing table\n", + "\n", + "A Vector Store can be built up on an existing table.\n", + "\n", + "Assuming there's a pre-existing table in Cloud SQL Pg: `products`, which stores product details for an eComm venture.\n", + "\n", + "
\n", + " Click for Table Schema Details\n", + " \n", + " ### SQL query for table creation\n", + " ```\n", + " CREATE TABLE products (\n", + " product_id SERIAL PRIMARY KEY,\n", + " name VARCHAR(255) NOT NULL,\n", + " description TEXT,\n", + " price_usd DECIMAL(10, 2) NOT NULL,\n", + " category VARCHAR(255),\n", + " quantity INT DEFAULT 0,\n", + " sku VARCHAR(255) UNIQUE NOT NULL,\n", + " image_url VARCHAR(255),\n", + " metadata JSON,\n", + " embed vector(768) DEFAULT NULL --> vector dimensions depends on the embedding model\n", + " );\n", + " ```\n", + " ### Insertion of records\n", + " ```\n", + "INSERT INTO\n", + " products (name,\n", + " description,\n", + " price_usd,\n", + " category,\n", + " quantity,\n", + " sku,\n", + " image_url,\n", + " METADATA,\n", + " embed)\n", + "VALUES\n", + " ('Laptop', 'High-performance gaming laptop', 1200.00, 'Electronics', 10, 'SKU12345', 'https://example.com/laptop.jpg', '{\"category\" : \"Electronics\", \"name\" : \"Laptop\", \"description\" : \"High-performance gaming laptop\"}', ARRAY[0.028855365,-0.012488421,0.006031946,0.0041402685,0.058347773,0.034766156,0.0033533745,0.02021188,0.022670388,0.049201276,0.029006215,-0.00986186,-0.052214462,-0.012280585,0.023684537,-0.059519604,0.001378169,-0.04670758,0.020753963,0.0013795564,0.013659675,0.013842887,-0.011299884,-0.03746782,-0.024693582,-0.07013125,0.030126512,-0.028513059,-0.045777187,0.020505989,-0.05952914,0.0015648323,-0.050879195,0.006477519,-0.007886009,-0.02629686,-0.0161126,0.0314275,-0.0328995,0.0265609,-0.01530363,-0.019561788,-0.04535006,0.030131247,0.05462397,-0.0122205755,0.009777537,-0.0049046725,0.02023674,-0.064513534,0.041379478,0.006994005,0.045187026,-0.029661352,0.019398877,-0.02221874,-0.017291287,-0.016321573,-0.033429787,-0.009547383,0.031690586,0.009064364,-0.015285908,0.076494075,0.010917006,-0.016593782,-0.018348552,0.017040739,0.05943369,-0.020822933,0.009285482,0.027736548,0.07029796,-0.0644397,-0.037717465,-0.047550958,-0.0054535423,0.047678974,0.060069297,-0.015072207,-0.04320405,-0.0019738402,-0.061910342,-0.034316592,-0.023359261,0.057676528,0.0054635284,0.042063717,0.020484874,0.005591504,-0.008757174,-0.0153757995,0.04932489,-0.04626516,0.0004756786,0.03749645,0.018522505,-0.015642159,-0.00842546,-0.06284679,-0.006150201,-0.061204597,0.0008340049,0.0040505463,0.014210282,-0.009027461,-0.014203488,0.030791085,-0.022282222,0.0011378798,-0.047313087,-0.008226634,-0.03726029,-0.04307269,0.04519085,-0.021895533,0.019570287,0.08584432,-0.003815025,0.021276724,0.027253378,-0.01660856,0.056772888,0.053538952,0.02739156,0.04655151,0.021516826,0.064367436,-0.021094408,0.0149244,-0.009901731,-0.04166729,-0.0032499651,0.022982895,0.063407354,0.04826923,0.056767307,-0.024418632,0.063300684,0.08071309,-0.054988176,0.01652395,-0.014671885,0.000837919,-0.044569198,0.03651631,-0.016364796,0.0053244857,0.051150765,-0.01878448,0.005112729,-0.0011729974,-0.052268386,0.034706745,0.05072015,0.0052968785,0.021704907,0.045661792,0.002976117,-0.02205154,0.037168674,0.002627892,0.018275578,0.032312263,-0.06719407,-0.056915596,-0.019727554,0.0009450171,0.0029568567,0.047435578,0.033826437,-0.009351167,-0.05718618,-0.062166944,-0.005684254,-0.009788955,0.016364967,0.0122847315,-0.016126394,0.012999976,-0.075272575,0.017478324,0.03005914,0.024401167,0.0099941185,-0.043311242,0.032115143,0.0047207233,-0.034337096,0.0054743756,-0.0024234303,0.012045114,0.032277416,-0.019994166,0.012312445,0.021211047,-0.037350595,0.0017910452,0.04450775,0.0054527316,0.03591427,0.029365221,0.0009824947,-0.006488191,0.034008037,0.01649739,0.07955305,-0.035204325,0.0056851353,-0.0086927805,-0.032573096,0.0010878195,-0.061459325,0.027879931,0.015068312,0.032717325,0.03890655,0.01902891,0.016527452,-0.0020142202,0.025338948,-0.0016015576,-0.06429177,-0.0041105347,-0.025726322,0.09078289,-0.03174613,0.015951345,0.009411334,-0.03598392,0.034463316,0.010011217,-0.009883364,-0.008042991,0.040896636,-0.025115138,0.048056312,0.028382989,0.007793395,0.019581616,-0.02584373,0.04317992,0.025689745,0.02035658,-0.05990108,-0.0007803719,-0.06793038,-0.02130707,0.0048890263,0.042799927,-0.009928141,-0.003192067,0.008781545,0.024785394,-0.07565836,-0.043356933,-0.067785084,-0.019649943,-0.024896448,-0.008327102,-0.015189734,-0.0140810255,0.0049958434,-0.015353841,0.020730853,0.028829988,-0.022614283,-0.03751693,0.011577282,0.031927988,-0.024855413,-0.042680055,0.08018929,-0.0021632465,-0.017928878,-0.0030442774,-0.005651566,-0.0010570051,-0.040446285,-0.00189408,0.06388222,0.0024985478,0.004886204,-0.05113467,-0.019480383,0.049765434,0.0077566532,-0.07356923,0.011988718,-0.020965552,-0.04025921,-0.032686763,-0.0053743063,-0.015599607,-0.03576176,0.00907552,-0.044702522,0.038329247,0.046024352,0.02194124,0.01844749,0.004619246,-0.029577129,-0.031205669,0.00896738,0.0115034515,0.013058729,0.01372364,0.03063813,-0.0316296,-0.04826321,-0.049244087,-0.037644744,0.019473651,0.059536345,0.04033204,-0.06602803,0.050612085,-0.027031716,0.04213856,-0.015262794,0.07257449,0.044631373,-0.0151061565,0.012033797,0.0009732858,-0.014827035,0.046652585,-0.042083394,-0.0436095,-0.035586536,0.026696088,-0.004066648,0.06954644,-0.029623765,-0.020358749,-0.04957031,0.01740737,-0.017026579,0.011162373,0.0487351,-0.031720005,-0.050231773,-0.089686565,-0.014156863,-0.02636994,0.015916161,-0.025308851,0.02081637,-0.02257452,0.021604244,0.10139386,-0.03208752,-0.008580313,-0.008898747,-0.06853021,0.04102758,0.041922912,0.047566738,-0.0341902,-0.07725792,0.005653997,0.00021225312,-0.0104829185,0.001749244,0.011929626,0.078264005,0.036519475,-0.0073295147,0.021337496,-0.008336836,-0.035804152,0.010720447,0.007127837,-0.053885818,-0.009795316,-0.05424524,-0.003111704,0.019710006,-0.012413589,0.02320744,0.024137065,0.023079542,0.0030920266,0.013961592,0.0040291087,0.020265838,0.041183334,-0.0029272675,-0.018539282,-0.011489972,0.017938145,0.025854694,0.033188265,-0.042004097,-0.0106819095,-0.045249976,-0.06986475,0.030204961,-0.032193515,-0.00095170306,-0.0107111735,-0.017970158,-0.02740307,-0.06307846,-0.031544626,0.004178074,0.016592229,-0.032037992,-0.030618787,0.008946463,0.03110429,0.0207187,-0.016861247,-0.08070464,-0.03067543,0.067448415,-0.041909873,-0.0048193526,0.018761802,0.020243261,0.024184326,0.002299031,-0.014152546,-0.035749547,-0.0071563246,0.050069712,-0.027215304,0.049641047,0.02778935,0.070745096,0.023794815,0.0029510225,0.0069351746,-0.034430653,-0.085317925,-0.036851004,0.023848707,0.035138704,-0.017030267,0.041982725,0.014077844,0.012787886,-0.029716792,-0.024732213,-0.059604853,0.024058796,-0.027469097,0.02969232,-0.06889772,-0.034953564,-0.0678685,0.02039748,-0.073483475,-0.04067064,-0.023628144,0.052601792,0.10005532,0.0027910264,-0.00044562414,0.025615653,0.008896907,-0.016369712,-0.030180404,0.026393086,-0.02041892,0.0072918,-0.018448602,0.020845268,0.006290655,-0.010850651,-0.035378493,-0.01083432,0.012116494,-0.045438327,0.05191333,-0.082797736,0.042320468,0.039703712,0.00923727,0.03598509,-0.064069025,0.049349498,0.007205401,-0.0079013845,0.015407162,-0.049755134,-0.0335355,-0.033252683,0.025886077,-0.043650113,-0.021745201,-0.046847582,-0.02873071,-0.01435186,0.01642749,-0.030346846,0.00564007,0.0074587157,0.027222605,-0.024691164,0.007528186,-0.04551536,-0.011026097,0.091698915,-0.062147886,0.0013525741,-0.0065618614,-0.030818032,0.024246406,-0.010786434,0.006758053,-0.016815495,0.071824,0.022536254,-0.026362726,-0.066206455,0.011966612,0.06430261,0.021586932,0.032340884,-0.015460002,-0.0963993,-0.0041012894,0.026189657,-0.101343565,0.038662393,0.07043264,-0.0373032,0.0038455573,-0.017408002,0.12948644,-0.056175977,0.02693295,-0.033682294,-0.032874268,0.0016187532,0.023056049,0.06884863,0.04350595,0.02135146,-0.059129357,-0.0055416543,0.0098204445,-0.008596177,-0.04332969,-0.012624592,-0.09298762,0.041691724,-0.014171953,0.004045705,0.009756654,0.059401184,-0.02852561,0.006892971,-0.019445946,-0.013781522,-0.03458903,-0.001079532,-0.008455719,-0.025446072,-0.03641567,-0.034449898,0.004487285,0.07899037,0.031314176,-0.031828023,0.031026838,0.034468375,0.0166286,0.032397788,0.02265452,0.07575427,0.015329588,0.05969185,-0.049144097,-0.043501142,0.031721197,-0.03434621,0.04558533,-0.00039121095,0.00093291467,0.033810064,0.0131731015,-0.0161992,0.039637238,0.0018543458,-0.041811496,-0.01406263,-0.020126836,-0.011859638,0.029031854,0.018889664,0.015262868,-0.03756649,-0.024570176,0.02538295,0.0038968727,-0.06393701,0.00093783275,-0.05943941,-0.062095385,0.08169533,-0.026443593,0.045758378,-0.026765708,-0.023990292,-0.028646782,0.0013627055,0.0022589415,0.009424216,-0.004252787,0.01159273,-0.0393901,-0.02593045,-0.04785985,0.023880653,0.012857186,-0.028907716,-0.05117687,-0.017512657,-0.035777926,0.01183514,0.025101895,0.089760125,-0.009716518,0.012040118,-0.023447596,0.057904292,0.03486462,-0.014875794,0.05191007,0.002385196,0.016686346,-0.052348964,-0.029286617,0.023832947,-0.02915365,0.007727999,-0.012708917,-0.055755604,-0.0073897606,0.032306697,0.02891973,-0.029123511,0.08987496,0.049180396,-0.08122004,-0.029804248,0.03262262,-0.06680825,0.016717656,0.0038353673,0.021287518,0.0018424556,-0.0041867862,-0.0011719886,-0.044280436,0.02019424,-0.052992586,-0.05063449,0.039644204,-0.0494374,-0.033791043,-0.0041454337,-0.032513123,-0.073564336,-0.04585872,0.0023792102,0.027335508,-0.06999816,0.04888005,0.026423248,0.021874929,0.010904174,0.060097646,-0.034017522,0.05548881,-0.024519302,0.049890403,-0.015645353,-0.060680103,0.017045638,0.019808227,0.025153033,0.0040058065,0.053807795,0.034485374,-0.053428553,-0.0034872151,0.033813756,-0.03047597,0.007858348,0.024711734,0.060215656,0.008143574,-0.0070263194,0.0048007956,0.015641727,0.052094024,-0.049206913,0.016296484,-0.0059813466,0.040864628,0.013278136,-0.012139221,-0.04106141,0.0144868875,0.0013842004,0.021345256,0.04826021,-0.06929805,-0.021199407,0.00090551435,0.009481861,-0.0017141728,0.028452767,-0.019797614,0.038415838,0.056153923,-0.014074272,-0.00823969,-0.00050664565,-0.07698735,-0.025168924,0.057516575,-0.07501726,0.037316702,-0.02765656,-0.011325112,0.058868058,-0.010426108,-0.013318932,-0.0016809561,-0.062076304,0.027063645,-0.020674324,0.06843111,0.018448142,-0.04226709,-0.015164476,-0.008888517,0.040828817,0.048462827,0.00942803,-0.019631634,0.020950766,-0.0003345382,-0.030098192,0.022870619,-0.0017267349,-0.009055838,-0.012781693,0.07583533,0.045031916,-0.02076535,-0.07310905,-0.011597339,-0.00062336307,0.005723161,-0.018269768,0.020560576,0.023111053,-0.00881239,0.0052197427,0.022200806,0.013797317,0.019722437]::vector(768)),\n", + " ('Smartphone', 'Latest model with high-resolution camera', 800.00, 'Electronics', 15, 'SKU12346', 'https://example.com/smartphone.jpg', '{\"category\" : \"Electronics\", \"name\" : \"Smartphone\", \"description\" : \"Latest model with high-resolution camera\"}', ARRAY[0.031757303,-0.030950155,-0.058881454,-0.05073203,0.053704526,-0.01064694,0.030361004,0.0036670829,-0.014013894,0.022840602,0.06545107,0.0108244,0.009321064,-0.0236112,0.0098358095,-0.038861487,-0.011348891,-0.011887714,0.011245335,-0.018139482,0.03049321,-0.030338986,-0.001923893,0.011787388,-0.01825618,-0.050398953,0.0036137043,-0.04487695,-0.021582587,0.023590472,-0.051335085,0.08021365,-0.06793676,-0.00514603,0.024418706,-0.054447155,-0.050472837,0.010439093,-0.017847419,0.07124281,0.004419413,-0.028902968,-0.062286377,-0.02737251,0.048311986,-0.029160773,0.0059961462,0.0344943,0.037635062,-0.081315145,0.025175434,-0.0050063017,0.023545247,-0.015210805,-0.035123624,-0.020403884,0.014771475,0.015879042,0.0029214756,0.011768866,0.004276383,-0.009031657,-0.050000243,0.059927624,-0.03906005,-0.027238877,-0.04796615,0.03084268,0.07360646,-0.028875567,0.027232852,0.015592421,0.07156161,-0.059652634,-0.04831314,-0.049740285,-0.017305655,0.10253246,0.016519215,-0.0021727297,-0.0063062175,-0.0015423468,-0.03617129,-0.03982753,-0.059866134,0.082323685,-0.01662162,-0.0048025097,0.011876321,0.08410362,-0.006159452,-0.0008565244,0.04274695,-0.08079417,0.04427687,0.04110836,0.04812812,-0.053979542,-0.004387368,-0.04829328,-0.022975856,-0.015012431,-0.0056774826,-0.03936704,0.023132714,-0.007810687,-0.011018049,0.031620245,-0.02713872,0.0018347959,-0.024968592,0.02253628,-0.00809666,-0.0076680584,0.06435103,-0.020083368,-0.0049473317,0.07430767,0.01915259,0.040656384,0.00998682,-0.014684721,0.026354978,0.032759093,0.037668057,-0.009659323,0.006720873,0.063525185,0.03982695,0.04567435,-0.02619304,-0.030550981,-0.014520635,0.0010599799,0.034034356,0.06294083,0.07422565,0.01973267,0.05249243,0.010003681,-0.034319345,-0.023254821,0.0019625498,0.033209592,-0.015176091,0.056498263,0.0041291295,-0.046049923,0.054690883,-0.021583585,-0.019928787,-0.010311507,-0.03155074,0.038876258,0.055084117,0.0006716143,-0.005959439,0.02702423,-0.0041947966,0.015374709,0.057063535,0.028639654,0.069971144,0.019529812,-0.026227735,-0.083985895,-0.0041349265,0.009833876,-0.015811538,0.016993256,-0.010458223,0.040068664,0.009195164,-0.03924835,-0.007896623,-0.06261605,0.015779363,-0.018634042,-0.0013783163,0.016493134,-0.041971806,-0.039205268,0.020863583,-0.00169911,0.026609324,-0.07237093,0.07898098,-0.008871385,0.017599586,0.018514562,-0.01763139,0.00015460308,-0.03443664,0.026305566,0.0019577034,0.049758997,-0.014016935,0.01580608,-0.005885855,-0.014773614,0.008331391,0.011858725,0.047954902,0.016360788,0.040261615,-0.014324732,0.062151354,-0.037888777,0.02075746,0.039549813,-0.077434056,0.00096539775,-0.044017132,-0.012209571,0.034755055,-0.020098051,0.008095624,0.031291816,0.04792529,-0.008659437,0.01759492,0.009537845,-0.05313831,-0.010890252,-0.03342564,0.061369378,-0.031681072,-0.053262327,8.374469e-05,-0.027414132,-0.013404388,0.033906803,0.025408141,-0.035230264,0.030235829,-0.0014981066,0.023731904,0.029274339,0.047021322,0.025153603,-0.050763946,0.042003185,0.028869675,0.023947056,-0.045773767,-0.029348088,-0.04498305,0.03974547,0.021556387,0.032411546,-0.028107764,-0.01917967,0.020117322,0.035401057,-0.087708965,0.028180089,-0.07627729,0.010020432,-0.055026818,0.013467507,0.05156387,0.030606749,-0.012557438,0.0075980667,-0.049580842,0.025251655,0.011958476,-0.05784425,-0.00688397,-0.026897762,-0.0073929257,-0.082809925,0.0707716,0.0044888635,-0.023634167,0.00959699,0.027249858,0.009045479,-0.008601681,0.007323367,0.014609572,0.007073427,0.0055342577,-0.047172364,-0.023501316,-0.03593993,-0.022744065,-0.031178312,0.007601522,0.01038201,-0.040641543,-0.02084411,-0.04739785,0.0016813428,-0.022378212,-0.024991153,-0.019224035,0.033300195,0.04363394,-0.0072962623,0.0044990415,0.00530943,-0.0061862995,-0.1226422,-0.0048183375,-0.010383665,-0.043834127,-0.010673082,0.00016926302,0.026351877,-0.03451933,-0.017912712,-0.06287377,-0.00329357,0.056648213,-0.005951308,-0.017310314,0.06057505,0.00529039,0.04522765,0.009986563,0.09290384,0.0046436884,-0.027085476,-0.0051616537,0.014926508,-0.027059292,0.07819409,0.0018491915,-0.034066174,-0.04200668,0.017987153,-0.054097146,0.0263208,-0.030290576,0.012135319,-0.053635724,0.0040904377,-0.06391213,-0.012962556,0.039401833,0.029892938,-0.010509396,-0.09667328,-0.004525119,-0.0660734,0.005074788,-0.0043580704,0.048569698,-0.029491736,-0.00813117,0.099913284,-0.02152916,-0.0046480033,-0.004279434,-0.022350302,0.07403285,2.6268553e-05,0.024700351,-0.070556544,-0.046257928,0.047623277,0.013440511,0.022684522,0.0105078975,0.029062217,0.036317576,0.012476447,-0.025555858,0.0043436335,0.006260482,-0.030046312,0.012665346,-0.060015686,-0.042867333,-0.043334395,-0.09350731,-0.015882127,-0.023036648,0.0035012013,0.019168707,-0.029792963,0.014690395,-0.03232301,0.04318316,-0.023454774,0.024906443,0.033632547,0.026205529,0.021056164,-0.014863617,0.03884084,0.019737227,0.0643725,-0.015622061,0.010209574,-0.042415053,-0.041623153,0.020822845,-0.020490937,-0.0542278,-0.0033205135,-0.041752372,-0.069488324,-0.016277319,-0.0044792043,-0.02016524,-0.03959827,-0.032634977,-0.0039365673,-0.0132405395,0.0067148125,0.075648956,-0.05606617,-0.06265819,-0.019359354,0.05813966,-0.01447109,-0.010593954,-0.00086784246,0.00957173,0.02843471,0.00845407,0.024766237,-0.017594881,-0.02089351,-0.023622723,-0.033868976,0.01189866,0.04348284,0.017560178,0.0044504236,0.0201572,-0.010445271,0.016996963,-0.063251264,0.036506347,0.014985517,-0.004923813,-0.019643096,0.004065921,-0.03441569,0.02174584,-0.022037273,-0.105745554,-0.017520802,0.024135107,-0.056571614,0.065653384,-0.11961944,-0.019004421,-0.048515763,-0.018267322,-0.02178645,-0.00048087785,-0.042244278,0.041203473,0.039137937,-0.028382456,0.0027469762,-0.035103243,-0.008536376,-0.022003518,0.013834031,0.04035347,-0.05127768,-0.021083988,-0.019288905,0.030957388,0.03837377,-0.0003459004,-0.043197013,0.059090964,0.03584024,-0.009635979,0.049144205,-0.113005035,0.012198436,0.0030250824,-0.0005766731,0.010016404,-0.004630926,0.036304604,0.030682925,-0.028248072,0.0053004674,-0.028463472,-0.045950726,-0.016214147,0.02234844,-0.024365503,0.0045087263,0.0015641076,-0.046219032,0.019860927,0.011021814,-0.024108216,-0.048900776,0.012885111,-0.0022583513,-0.030102832,-0.016490621,0.024889058,-0.0009473834,0.015075038,-0.040798195,-0.005642347,-0.0029682147,-0.050329093,0.0009567131,0.007919075,0.01719906,-0.018685095,-0.016243592,0.010302834,4.1979074e-06,-0.042400364,0.055864133,0.033395868,-0.017874744,0.0013070442,-0.05331383,-0.10789571,0.0074728676,0.03525642,-0.07436872,0.04979144,0.046753135,0.0027637088,0.014162893,-0.026069263,0.06226656,-0.056384422,0.008216318,-0.02018645,-0.007397228,0.0074180462,0.035483476,-0.01882623,-0.02706421,0.04596009,-0.013163229,-0.021003753,0.037058793,0.052453898,0.013129776,0.015402059,-0.048313417,0.023352273,0.009391176,-0.044023603,-0.0107533,0.054881006,-0.019277383,0.02055352,-0.030710667,-0.02347742,0.0092705265,-0.047558293,-0.024285497,-0.03519891,-0.0038767713,-0.005330039,-0.026968258,0.06881978,0.06537581,-0.023353418,0.01331013,0.045053896,0.032502707,0.065926,0.0009946732,0.051750924,0.005718337,-0.0038732293,-0.029579317,-0.06977859,-0.0048092776,-0.025378013,0.023722455,0.032475006,-0.031788938,-0.00917764,0.0056064464,-0.016738426,0.021969007,0.012666437,-0.046921335,-0.02513667,-0.028311022,-0.009224157,0.05264038,-0.026426777,0.02599612,-0.018745475,-0.015264339,-0.013577108,0.0011754846,0.020499794,0.01423578,-0.015937831,-0.034813095,0.06295408,-0.033208452,0.041733917,0.0022288205,-0.0036853347,-0.015074669,-0.00813031,-0.004992453,0.010502773,0.017247686,0.03162546,-0.006212466,-0.06321386,0.022924462,0.03354761,-0.02742972,-0.018287206,-0.05058406,-0.02762529,0.014693771,-0.009422438,-0.0113650765,0.04500726,-0.009418481,0.023177318,-0.0394831,0.07899207,0.010970399,0.01519068,0.060208563,-0.014248415,-0.027108915,-0.055970594,-0.05615517,0.00082430604,-0.02946103,0.012972071,-0.034580585,-0.092063755,0.023562009,0.09187191,0.03979375,-0.048233856,0.0891921,0.0054705814,-0.07132956,-0.03294508,0.015985591,-0.06979576,-0.008607954,0.03748406,0.018775256,-0.00055046624,-0.0018972756,0.010640039,-0.039262787,0.045647603,-0.052634962,-0.04485457,0.059673585,0.005487001,0.005677175,-0.040526956,-0.0023886457,-0.051557075,-0.026969707,-0.020169057,0.020184118,-0.06750348,0.014797761,0.043389246,0.022667736,0.012956063,0.056346934,0.038232267,0.02334661,-0.002965094,0.053386245,-0.016282998,-0.08433834,-0.005240998,0.020763554,0.0041468525,0.011248255,0.013354228,0.0062226793,0.01238483,-0.042322755,0.017076539,-0.024617095,-0.03331688,-0.001430632,0.05623171,0.0073584137,0.013339925,-0.0041607106,0.015201854,0.029444456,-0.039367896,0.032675862,0.016636375,0.04101005,0.0073330533,0.03937178,-0.01699229,0.026922127,-0.00465699,0.014691186,0.07985071,-0.045738634,-0.040622048,0.040370528,-0.0070402357,-0.048223954,0.048428483,-0.013764062,0.02645368,0.030109879,-0.01834218,-0.0045400057,0.036011115,-0.010352046,-0.068165384,0.037795525,-0.036501475,0.020713413,-1.2293508e-05,-0.00038850267,0.073334076,0.01821627,0.003559663,0.017506005,-0.02564981,0.039007656,-0.026543219,0.018282859,-0.038226757,-0.04996024,0.01010447,-0.012900636,-0.020180488,0.042488355,0.0135185765,-0.0083626835,-0.019743606,0.025633369,0.035687257,-0.053833067,-0.053783447,0.007418253,-0.04581871,0.032362275,0.050387084,-0.010103674,-0.051880397,0.010476682,0.015898407,0.04970622,-0.04664034,0.036457486,-0.017625386,-0.0058598807,-0.011529857,0.018154921,0.013366902,0.0021690137]::vector(768)),\n", + " ('Coffee Maker', 'Brews coffee in under 5 minutes', 99.99, 'Kitchen Appliances', 20, 'SKU12347', 'https://example.com/coffeemaker.jpg', '{\"category\" : \"Kitchen Appliances\", \"name\" : \"Coffee Maker\", \"description\" : \"Brews coffee in under 5 minutes\"}', ARRAY[0.025002815,-0.052869678,-0.010500825,-0.024296444,0.049798742,0.043427017,-0.01307104,0.0077243242,0.022190414,0.037746448,0.029453197,-0.009484218,0.0028156517,-0.03531512,-0.012121426,0.0091221025,0.025652027,-0.009445565,-0.02820549,-0.04105274,-0.0010839493,0.015024874,0.053036522,-0.018628811,0.014746092,-0.049109433,0.026801802,-0.0070828577,-0.02369395,0.010975214,-0.03531074,0.04859645,-0.004710616,-0.018579654,-0.0076328423,-0.030808363,-0.012824788,0.03848257,0.014652247,0.058704656,0.00325119,-0.007205416,-0.04686223,-0.028575234,0.02045449,-0.008556303,-0.009746742,0.018289749,0.00093424425,-0.046003163,0.0039943205,-0.023993168,0.05866197,0.008093339,-0.00565744,-0.008198263,-0.001283407,-0.0007927462,-0.018114842,-0.008134085,-0.00014443924,0.021404255,-0.014830747,0.050932012,-0.032427747,-0.027500387,-0.020814912,0.025367612,0.061494272,-0.028271751,-0.002093295,-0.005629965,0.054627255,-0.062579386,-0.01051155,-0.06421958,-0.012094066,0.06576773,0.05998704,0.10272862,-0.021875817,-0.062225047,0.022178214,0.010618126,-0.05723891,0.040955715,-0.038523626,0.021909224,0.018677043,0.056335997,-0.01599579,0.015702266,0.025712736,-0.024550503,0.041618552,0.031751215,-0.0013378685,-0.042116627,-0.033073347,-0.011056941,0.022297822,-0.052519917,-0.06455736,0.030026494,0.04122688,0.0435459,-0.021909805,0.025392938,-0.05491582,0.022167888,-0.06104317,0.021199005,0.021531114,0.0003258208,0.051008765,-0.0056826724,0.0019850046,0.08186525,0.014742098,0.01913513,-0.026228607,-0.023587128,0.041640177,0.016765678,0.028365733,0.057187237,0.011515794,0.0734812,0.048084594,-0.0028821004,0.00025123838,-0.010272774,0.025670059,-0.049766205,0.0862307,0.07104121,0.008422137,0.026603732,0.06897059,-0.0013259795,-0.003537648,-0.016978277,-0.03289158,0.019160148,-0.030429484,0.03210423,-0.0025404708,-0.052619252,0.0020272017,-0.014941184,0.0026864705,0.012819193,-0.043763664,0.057997666,0.043563023,0.03174006,-0.04444913,0.0060016355,-0.029776296,-0.017748147,0.036185395,-0.0014833601,-0.017309692,0.04368944,0.020283954,-0.0160715,0.03019354,0.02680017,0.013467745,0.010598811,-0.009857402,0.0035379697,-0.04074403,-0.015414817,0.016311716,-0.0669727,0.0034562463,-0.024640094,-0.023524309,0.0028607736,-0.06249814,-0.058054965,-0.007223816,0.012088017,0.029124737,-0.030978883,0.07969112,-0.05076358,0.015344627,-0.00898595,-0.0097088795,-0.019155432,-0.035673082,0.027780814,0.006400352,0.055502266,-0.046420977,0.03919276,0.040964182,-0.024075434,-0.014520242,0.07941375,0.023109328,0.030869437,0.06598536,0.00059927267,0.076354064,-0.048273984,0.0025508753,0.0066330666,-0.070879966,0.03704847,-0.0650441,0.01176703,0.033744898,0.0400285,0.024317512,0.028281165,-0.008897873,-0.029537052,-0.0047060223,0.026686963,-0.07052627,0.023556747,-0.056385886,0.0714133,0.007949809,0.011887155,0.0029032454,-0.015065537,0.011513513,0.050219424,0.010533179,-0.009971522,0.03655571,-0.0066924663,-0.012303563,0.016773308,0.013691093,0.025839401,-0.044451136,0.049260832,0.05713467,0.013278825,-0.022100078,-0.0017930771,-0.016181005,0.0217466,-0.02600776,0.046996534,-0.022629611,-0.023503313,0.0074507482,0.0134722935,-0.04945182,0.022608835,-0.026130896,-0.01177188,-0.027667308,0.026118958,0.0025001818,0.021639917,-0.015105975,0.02968347,-0.043928802,0.03762012,0.019912925,-0.004347233,-0.006596504,0.016333994,-0.025137693,-0.01686705,0.04786869,0.034643404,0.011117003,-0.011134983,-0.0074818125,-0.006335571,0.022040822,-0.006491301,0.0054816976,0.038022403,0.016072717,-0.06609374,-0.03203102,-0.059326455,-0.04408214,-0.03787348,0.014894112,-0.02038928,-0.044823527,-0.015866352,-0.047105137,0.002020473,-0.04468357,0.018793538,-0.029475007,0.06967502,0.04684481,0.048074055,-0.0010090554,-0.0027273456,0.047790546,-0.030050496,0.023022242,-0.028264726,0.03571066,-0.0164874,-0.019399788,0.0076415916,-0.0060172956,-0.010469042,-0.045296766,-0.0071801674,0.032818798,0.034934863,-0.0737483,0.0327411,-0.006032433,0.05928009,-0.00927453,0.07627373,0.0050010816,-0.048511107,-0.0037969523,-0.007150538,-0.010152546,0.025746513,-0.02757783,-0.049112115,-0.029450508,0.037618097,-0.04765299,0.021502782,0.04031621,-0.021789404,-0.03477437,-0.0029428764,-0.04645585,0.015724704,0.0061205924,0.027327916,-0.016831782,-0.07413835,-0.009106179,-0.005994898,0.0015746661,-0.0066348854,0.08860898,0.026653405,-0.010490873,0.01737892,-0.036203787,0.0019658727,-0.05349199,-0.031604912,0.059320047,-0.0035595773,-0.013159466,-0.043662973,-0.000936755,0.037883844,1.1725969e-05,0.008455511,0.028007427,0.026448535,0.03587197,0.034501214,-0.020195212,-0.036874935,0.008322776,-0.038275808,0.014955824,-0.066956565,-0.03433901,-0.043297864,-0.07503335,-0.037108134,0.032691672,-0.05912909,0.023559488,-0.023238983,0.0012042256,0.0074822125,0.0058873207,-0.021845229,0.0054280413,0.05752058,-0.026954507,0.026175871,0.012301664,0.06307251,0.07353519,0.011740042,-0.012562488,-0.025707787,0.011014364,-0.064245604,-0.018075097,-0.04286179,-0.06992585,-0.031043975,-0.0022823277,-0.05855018,0.015864456,0.00024379989,0.0070141326,-0.00035948,-0.023150876,-0.063177474,0.008194795,0.023019124,0.014603101,-0.06850171,-0.07586402,-0.029384725,0.09732399,-0.023403296,0.00983274,0.00043465907,-0.037277438,0.060318034,-0.010698135,-0.0012939094,-0.015873678,-0.006272459,0.0014064384,-0.041425075,-0.021238888,0.021737115,0.030599548,0.043125883,0.01929081,-0.0011234619,-0.031159677,-0.05745639,0.0146679375,0.046521254,-0.01835481,-0.033141162,0.00036415283,-0.06466151,0.043580752,0.011921412,-0.07292401,-0.047980927,0.02159395,-0.023352068,0.0425091,-0.09635663,0.0060955146,-0.06484201,-0.029811602,-0.026076958,-0.014945281,-0.04334233,-0.00242451,0.047840517,-0.02103297,-0.0191666,-0.0074735563,1.0544848e-05,-0.028074,-0.037163526,0.030064873,-0.02934737,0.050285384,-0.023986174,0.025914317,0.10199452,-0.021887174,-0.0066847154,-0.023618985,0.03283886,0.045797225,0.047762897,-0.07030183,0.026901271,0.008702326,0.017019885,0.033345792,-0.03833666,0.031567782,-0.013102635,-0.009532979,0.025451964,-0.021708276,-0.023218581,-0.07980661,0.03028782,-0.021675726,0.03096571,-0.018742265,-0.04427001,0.009433704,0.03455316,-0.035231985,0.04002238,0.012793141,0.025124295,-0.04512409,-0.06486318,0.019942157,-0.030111039,-0.0069209165,0.0015545462,0.028818183,0.0014206765,-0.032698274,0.008883163,0.058960456,-0.00906729,0.0298577,-0.0070162034,-0.014469902,-0.0032146918,-0.04448409,0.03293327,0.040138587,0.01842061,0.0055912337,-0.03388838,-0.071546026,0.02821449,0.033089,-0.04839594,0.016159212,0.08211776,-0.08987595,0.036964364,-0.051373526,0.1035708,-0.053108595,-0.01896186,0.01644011,-0.012502358,0.008263514,0.04065409,-0.015298684,0.0011162056,0.04276282,0.0027434586,0.0324373,0.03511016,0.02446925,0.002442109,0.049384676,-0.05747281,-0.0020478321,-0.03639974,0.011938583,-0.031114291,0.03284646,-0.03238849,0.08670559,-0.07415254,-0.036738325,-0.025126172,-0.045095183,-0.015307702,-0.06554373,-0.05546525,0.005472855,-0.006981692,0.04587679,0.111925036,0.013912294,0.014268016,0.058842134,-0.011192024,0.034922387,0.012045642,0.008008024,-0.014226386,0.06913233,-0.04700873,-0.06164794,-0.0024386728,0.043209903,0.051432677,-0.017323477,0.013788927,0.012737198,0.06472892,-0.070449375,0.005222667,0.050599333,0.0015403829,0.015714316,-0.008632714,0.014941663,0.06433311,-0.021354778,-0.0071928906,-0.028242689,0.018915592,0.021451298,0.0063637616,0.0019523413,-0.017883593,0.028570741,-0.016318232,0.053636383,-0.028484613,-0.006531752,0.022900375,0.023723338,-0.024363475,-0.015181002,0.024642847,-0.002409233,-0.0001194501,0.013567875,-0.046026736,-0.016705032,-0.013025837,0.020370122,-0.027258568,-0.04735096,0.011894463,0.0019317217,-0.0031460563,0.040866848,0.00464604,0.03964947,0.027275842,-0.0030081465,-0.008669969,0.0462421,0.010375526,-0.024637504,0.08480695,-0.02768799,-0.005021901,-0.009944692,0.015040328,-0.0051919715,-0.043738216,0.054622557,-0.0116185825,-0.044851393,-0.01769878,0.06967592,-0.026938388,0.0030814619,0.07516173,-0.022243993,-0.09390373,-0.056307606,0.011178256,-0.058882743,0.016906237,0.010931337,0.011277608,-0.03310829,0.008875099,-0.017342865,-0.049926963,-0.0021014255,-0.019715691,-0.024091842,0.029629463,-0.06452303,0.009643791,-0.025999011,-0.017722748,-0.09347366,-0.019748896,-0.011190205,-0.0044534663,-0.04336357,-0.01312215,0.056558847,-0.022783643,0.0004763564,0.04152026,-0.03813543,0.0038315274,0.021157283,-0.007934057,0.0004752217,-0.057082873,-0.011285772,-0.014152046,0.03181829,0.033805694,0.04453719,-0.02024123,-0.0038247174,-0.0262423,0.007036252,-0.012817323,-0.025822328,0.06599188,0.067939,-0.022174655,-0.022773167,9.6714546e-05,-0.017627345,0.08549309,-0.06266334,-0.00575442,-0.011873023,-0.07250961,0.0056728884,0.017012162,-0.025071641,0.022021066,0.030550413,-0.010627088,0.050028834,-0.01721913,-0.050976366,-0.024867795,0.011782799,-0.075504154,-0.004392594,-0.01807583,0.031157117,0.030725744,-0.014750008,0.005684259,0.047403537,-0.08811708,0.007985649,0.043377616,-0.037903026,0.029741386,-0.0011720062,-0.010578729,0.051289707,-0.024345556,0.017949736,0.02636295,-0.059689533,0.06373776,-0.049072567,0.013506145,-0.040476285,-0.02940512,-0.023568999,-0.00035632766,0.056101788,0.061561547,0.03079068,-0.02166795,0.009211557,0.0030255727,-0.0036865661,0.023821775,0.0015869564,0.0064414316,-0.057368714,0.061502002,0.023947174,0.0046180966,-0.05202509,0.002360597,0.03557417,0.036739,-0.03005605,0.047780115,0.025282156,0.034349978,0.034781702,0.0276351,-0.040908,0.081558466]::vector(768)),\n", + " ('Bluetooth Headphones', 'Noise cancelling, over the ear headphones', 250.00, 'Accessories', 5, 'SKU12348', 'https://example.com/headphones.jpg', '{\"category\" : \"Accessories\", \"name\" : \"Bluetooth Headphones\", \"description\" : \"Noise cancelling, over the ear headphones\"}', ARRAY[0.022783848,-0.057248034,-0.047374193,-0.04242414,0.049324054,0.0077371066,0.017048897,0.00500827,0.008471851,0.010170231,0.054357704,0.018568166,-0.024179503,0.026519066,0.026404649,-0.06330503,0.014405935,-0.015520485,0.0052459002,-0.0398403,0.0026082278,-0.026374431,0.020055598,-0.009738811,0.013321584,-0.033184614,0.034118295,-0.0011876881,-0.04513898,0.04878162,-0.0725106,0.018109042,-0.075869314,-0.023766529,0.015067321,-0.019572936,0.024169574,-0.01577634,-0.048197363,0.049358875,-0.030935159,-0.0363981,-0.04534119,-0.044748895,-0.004167742,-0.02121328,-0.052715167,0.0006209187,0.036595955,-0.085123576,0.052309636,-0.01926014,0.00049565616,-0.0057477825,0.010993081,-0.06675727,0.0037074706,-0.033420403,-0.052601676,0.023439946,-0.01880516,-0.009576131,-0.0114066675,0.10504714,0.00022831495,0.029810086,-0.0044366047,0.043377023,0.06093195,-0.004545408,0.013371212,-0.029174658,0.06625106,-0.0077476054,-0.0163617,-0.056035727,-0.024698364,0.06076837,0.020102862,0.038081013,-0.018504761,-0.027918378,0.03942784,0.004596525,-0.057653908,0.034515597,0.010063118,0.04525672,0.023651283,0.03596632,-0.0378574,-0.013078957,0.021554954,-0.0606351,-0.007272484,0.044470455,-0.015513987,-0.018171282,-0.014020262,-0.040379126,-0.032836802,-0.055859733,-0.05644243,-0.001610613,-0.05527219,-0.00052593346,-0.00546389,0.02911079,-0.0037673921,0.036246333,-0.057133533,0.043779045,-0.0028422247,-0.044305976,0.05993566,-0.005543668,-0.0015800337,0.07515586,-0.00020748413,0.03876171,0.026035579,0.012980581,0.056657698,0.020252425,0.029382393,0.011205804,0.039896134,0.04349186,0.08402962,-0.0031059172,-0.022395832,-0.023471512,0.029480197,0.0038065156,0.07106566,0.07560159,0.019708911,0.0063190344,0.06826459,0.05426478,-0.016353253,-0.016603524,0.035430502,0.01285351,-0.044608854,0.06445639,0.027575186,-0.020047447,0.07155171,-0.024042875,0.007684551,-0.057774883,-0.05863421,0.04027459,0.034241315,0.029786138,-0.011771758,-0.008067332,0.005154275,0.017256541,0.012795448,0.0361206,0.046198364,0.007581977,-0.0643159,-0.032997373,0.025989803,0.039828006,0.00950064,0.043332074,0.016609278,0.034839373,-0.022875424,-0.028605282,-0.017703732,-0.06238004,0.010994231,-0.0007306017,-0.034711856,-0.0440203,-0.025970237,-0.04595589,0.030582627,0.0073314123,-0.017986864,-0.055571377,0.082270294,-0.018736921,-0.0012149982,-0.0060279733,0.0044796504,0.025173035,-0.037219252,0.00027956237,-0.010430433,0.02825617,-0.046855696,0.018841878,0.0435598,0.005803966,0.0019149927,0.092197396,0.022937872,-0.0033373323,0.072473325,-0.014439769,0.047117453,-0.08000118,-0.012863106,0.0260884,-0.04135028,0.0070068296,-0.07510927,0.03672727,0.033531025,0.042364623,-0.019229556,0.0048453975,0.031276144,0.014006409,0.016036421,-0.017694592,-0.036794797,0.014908425,0.030831292,0.03190712,-0.022060342,0.041704472,0.017002491,-0.06408182,0.03923344,-0.02587273,-0.017719302,-0.025430005,0.06814103,-0.009046621,0.033220492,-0.033640996,-0.02523642,0.048086986,-0.035158273,0.048114188,0.043751266,0.01995209,-0.0295469,-0.020247698,-0.053099316,0.032099206,-0.045260355,0.0326798,-0.0043251985,-0.052964494,0.07017924,-0.0037189184,-0.03395965,0.040903587,-0.060891,-0.010537573,-0.030650055,-0.029651405,0.013975478,0.007255845,-0.010439494,-0.011794211,-0.05466926,0.024609366,-0.017408509,-0.05243266,-0.020957882,0.037831362,0.0216147,-0.035116594,0.03829302,-0.016048789,-0.035066966,-0.013764898,0.00042713518,0.030633073,-0.008326726,-0.015224956,0.012373721,0.0844943,0.0245434,-0.046264216,-0.011655971,-0.013199105,-0.05529712,0.006216126,0.038966317,0.04622981,-0.039118554,-0.044550307,-0.009771392,-0.006652356,-0.023040479,0.010476257,-0.004093151,0.008969803,0.010324751,-0.022387082,0.023577597,0.019100022,0.008391375,-0.07391311,-0.02210422,0.021720598,-0.0109519595,-0.0820701,0.022086475,-0.003670014,0.0019491176,-0.053155318,-0.022906458,0.0148452455,0.015515676,0.019605495,-0.02868708,-0.01828674,-0.0005499542,0.06639364,-0.01821442,0.09175476,-0.0016622626,-0.059729476,-0.019477114,0.025505545,-0.034742665,0.028956799,-0.019135797,-0.016046764,-0.03779796,0.06325585,-0.04046284,-0.0065921973,-0.0019740656,0.053527426,-0.06304376,-0.035805233,-0.04792203,-0.0012729234,0.048093352,0.007456611,-0.058022104,-0.07442454,0.012629627,-0.027595298,0.0021199721,-0.027464667,0.02698153,0.00060683774,0.044545636,0.06083593,-0.0031620082,-0.025901018,-0.034706157,0.013555886,0.042545,0.056980383,0.009854132,-0.06190446,-0.034308147,0.0043845526,0.017239122,-0.031214224,-0.010807414,0.026710719,0.022394834,-0.009421089,-0.04236166,0.022885358,0.01318956,-0.019174583,-0.0026612883,0.010784672,-0.010333064,-0.043234736,-0.054500565,-0.027753199,-0.022639737,-0.03062474,0.008183766,-0.017117208,0.03024305,-0.03615811,-0.01150264,-0.03863528,0.04852956,0.024548976,-0.012997513,-0.0041008275,0.03406041,-0.0070994645,0.072934166,0.02805505,-0.030694276,-0.035828616,-0.017640414,-0.03957751,0.06840472,0.0046152286,-0.020437988,-0.025648775,-0.083415866,-0.04167123,-0.035016168,-0.015291769,0.009293348,0.04628708,-0.014721913,-0.0033228637,0.04403616,0.061276685,0.037830554,-0.041214965,-0.084479295,-0.0012414041,0.030978376,-0.017235488,0.04445431,0.05231969,-0.0008037167,0.045372415,0.02067265,0.024952972,-0.033815585,-0.03739797,0.034983158,-0.016312862,0.017926387,-0.02016297,-0.019343764,0.017820694,-0.011671569,0.02410841,-0.042012513,-0.03900872,0.032663334,0.011938514,-0.029834026,0.047740217,-0.0058686035,-0.046729274,0.05985927,0.007610642,-0.060446266,-0.04216537,0.017497085,-0.06986214,0.076023735,-0.10476386,-0.020937927,-0.073560745,-0.014322972,-0.048601817,-0.0056885225,-0.03637434,0.04715089,0.054749545,0.014689732,0.006048463,0.046543427,-0.017363597,-0.03678888,-0.08802858,0.063708976,0.021423126,0.04030153,-0.036243204,0.036450744,0.024569608,0.016401349,-0.022465378,-0.0034262848,0.060547307,-0.014745138,-0.020591581,-0.0054737274,-0.0074623367,0.06138278,-0.016604895,0.0032445828,0.009028142,0.002864045,0.001341044,-0.03825005,0.03237135,-0.009647875,-0.0470159,-0.024240978,0.017859152,0.010279892,-0.014414872,-0.017152937,0.020384172,0.008366546,-0.003495199,-0.024638942,-0.031768326,0.06240018,-0.0067493794,-0.04322142,-0.0030645356,-0.0027114467,-0.0072583677,0.06152745,-0.05525731,0.01201016,0.034348775,-0.032004267,0.027236925,0.05926736,-0.010569189,-0.023563573,0.0018119658,0.04231199,0.01966649,-0.014960187,0.029649874,0.01606933,0.0033748902,0.021692606,0.00794783,-0.113133654,0.012736659,0.03742399,-0.010987754,0.02547777,0.026347551,-0.09020402,0.009588993,-0.043276373,0.106708415,-0.049185734,0.007007848,0.030148245,-0.026434064,-0.017702276,0.0007948643,0.026716268,0.013030543,0.013651695,-0.019745413,-0.0087912055,0.0046337135,0.038207695,0.0059329793,0.016351476,-0.069271706,-0.006662047,0.028512167,0.0038343759,-0.00027066944,0.042824432,-0.038911343,0.012483791,-0.06324616,-0.0023198558,-0.0028892683,-0.043326154,-0.035926916,-0.006348816,-0.025913015,-0.015930604,0.040185526,0.044628017,0.039083507,0.009474702,0.017115341,0.05052131,-0.01357451,0.020331299,0.038159154,0.0349774,0.015846666,0.022699736,-0.022343196,-0.056707054,-0.0010885954,0.020071063,-0.000391925,0.06397024,-0.024627347,-0.005184313,0.05034518,0.009061781,0.034097236,-0.007981921,-0.03801412,-0.0028578758,0.013567372,-0.008190868,0.033633735,-0.053976685,-0.025468381,-0.044378527,0.032747604,0.036202736,0.038062613,0.014995585,0.0036792904,-0.01603846,-0.047275733,0.066113465,0.0045884387,0.08915791,-0.0068142917,-0.0064188805,-0.060516927,-0.016080644,0.041549493,-0.008397882,-0.0071816393,0.0064753946,-0.0017467311,-0.019128935,0.0164788,0.022168875,0.011003241,-0.026863558,-0.05437178,-0.032724023,-0.0042122444,0.010392475,0.0042387135,0.04948556,-0.013747793,0.051330764,-0.0050607547,0.054571416,0.025556272,-0.00022029632,0.047628347,0.01427685,-0.020254403,-0.03590239,0.011610469,0.041846078,-0.02470694,-0.013697807,-0.021193847,-0.04341633,-0.0041078446,0.053439837,-0.021625757,-0.037942924,0.07774385,0.005912317,-0.0929516,-0.025328774,0.025199909,-0.041145753,0.017296704,-0.0050417483,0.012186051,0.0024183579,-0.025558045,-0.0051468383,-0.07548276,0.028603543,-0.04549798,0.007635448,0.010916566,-0.029269122,0.01546215,-0.024502348,-0.021702306,-0.025917016,-0.016031386,-0.0012059321,0.031981774,-0.056502126,0.025166377,0.04160211,-0.020680273,0.010293909,0.029529357,-0.01568588,0.026115898,-0.01032236,0.02089118,-0.01709118,-0.0597839,-0.029326,0.045068808,0.00761455,0.034416553,0.022160128,-0.025166225,-0.04248117,-0.029465536,0.027829373,0.006342403,-0.05032602,0.040032476,0.07705231,-0.01583979,-0.0049204687,-0.0140532,0.022657644,0.05293866,-0.009608256,-0.005003684,-0.02478457,0.029608795,-0.022698086,0.003895031,-0.0039730286,0.023749523,0.07514458,-0.036099195,0.04289709,-0.02329434,-0.06419562,0.037709154,0.0004289863,-0.02686404,0.0032855049,-0.030955583,0.018836787,0.033646755,0.022193655,-0.028475504,0.00394302,-0.05765806,-0.062945105,0.024937302,-0.06828151,0.016094193,-0.036405172,-0.016450962,0.04907368,-0.05975235,-0.04858766,0.07675482,-0.06289323,0.052024625,0.018600732,0.038572676,-0.0011811757,-0.07612922,0.03844793,-0.015206284,0.05163554,0.046980042,0.023522004,0.00037627618,0.011324654,-0.028600419,6.0430884e-05,0.00431597,-0.023766082,-0.015001608,-0.018692184,0.08730754,0.032889076,-0.018612336,-0.019428827,-0.002722986,-0.020110032,0.04016962,-0.043657966,0.044247244,0.019661218,0.042629678,0.016911589,0.038489193,-0.0036892071,0.015036206]::vector(768)),\n", + " ('Backpack', 'Waterproof backpack with laptop compartment', 59.99, 'Accessories', 30, 'SKU12349', 'https://example.com/backpack.jpg', '{\"category\" : \"Accessories\", \"name\" : \"Backpack\", \"description\" : \"Waterproof backpack with laptop compartment\"}', ARRAY[-0.0028279827,-0.02903348,-0.02541054,-0.025740657,0.06572692,-0.01105207,-0.018005589,0.014476618,0.0039552255,0.04976717,0.034852527,0.018194634,-0.010718678,0.012003344,-0.008418802,-0.026018273,0.029329967,-0.016163627,0.009272989,-0.03639675,0.011046671,-0.008078595,0.023365447,-0.0033083789,0.020028763,-0.025491415,0.033595297,-0.0116388025,-0.057485484,0.06268812,-0.05302806,0.033510745,-0.06083909,-0.03115934,-0.014793818,-0.028653687,-0.011399838,0.03950949,-0.03437827,-0.001663737,-0.01088612,-0.01894241,-0.055767413,-0.0044360803,0.043946534,0.012161133,0.03891473,0.001239441,0.009908146,-0.07272227,0.055397917,0.003453955,0.016562339,-0.041937787,0.05197343,-0.026436094,-0.025229415,-0.034988422,-0.02628748,0.022921052,0.013600747,-0.0020118777,-0.033795673,0.06700571,0.016018055,-0.024256106,-0.02621731,0.045516666,0.05339654,0.0040287147,-0.03260985,0.0014520925,0.064204894,-0.07453437,-0.05054596,-0.042698923,-0.010596,0.013536595,0.0057951836,0.02499754,-0.008574824,-0.0074555897,-0.03567392,-0.016175417,-0.048651025,0.051804803,0.032162882,0.015001442,-0.015329716,0.028219966,-0.031235777,-0.011996138,0.001956758,-0.057833184,-0.022306677,0.031238675,-0.006414606,-0.06930158,-0.017475452,-0.027142663,0.020731354,-0.02221535,0.031049741,0.02081393,-0.022421336,0.0264318,-0.009509332,0.03522677,-0.004379289,0.011600757,-0.022017384,0.010730822,-0.010784208,-0.032706123,0.011207074,-0.023580823,0.013793131,0.05083659,0.047280807,0.048402432,0.05347524,-0.01837716,0.005956893,0.038448945,0.056967188,0.0107236095,0.03256511,0.06276655,0.04472847,0.04416061,-0.010116117,-0.048367113,0.029135885,0.010681488,0.036315914,0.056885246,0.03745567,-0.045721106,0.060501557,0.07454113,-0.018330548,-0.0113306865,-0.011580698,0.020741342,0.020118712,0.08663372,-0.009871896,0.0153012,0.05436686,-0.032210644,-0.029824084,0.023739373,-0.024163425,0.025129095,-0.016016128,0.04870382,0.013377057,0.012678613,0.011070294,-0.0072714896,0.042209458,0.029714484,0.042831365,0.032464053,-0.047759824,-0.032160178,-0.014084912,0.016434442,0.009782443,0.0013573115,-0.015243139,0.007621731,-0.037185922,-0.054615762,-0.008570435,-0.00029953485,-0.012346052,0.00016998274,-0.03163527,-0.0139267165,-0.07079747,-0.007061694,0.020720486,0.0025725542,0.019498186,-0.03700232,0.10145702,-0.004775887,-0.042089477,-0.023965659,-0.04021527,-0.0004672301,0.007410538,-0.0024715534,0.013863051,0.02261263,-0.027591249,0.020157337,0.012993745,-0.0067202765,-0.029478177,0.052134037,0.020799996,0.014809602,0.06626069,0.0069596902,0.063764,-0.04220143,-0.0040134583,0.007221788,0.014255095,0.059271786,-0.04741277,0.014235989,0.067689635,-0.005667792,0.03801926,0.0117749525,0.025480399,0.011015113,0.0037910545,0.00022392142,-0.044315543,0.010447604,0.010668871,0.0779741,-0.08010141,0.04994428,0.0024064495,-0.04755275,-0.0114773,0.014421721,-0.028229935,-0.06231835,0.05197635,-0.00798093,-0.0025467642,0.010583627,-0.017485484,0.048588324,-0.0008222008,0.033517472,0.007129084,0.0010124474,-0.05219366,0.017978905,-0.01833836,0.019664295,-0.008339645,0.013213594,8.404173e-05,-0.058585837,0.06634499,-0.032446846,-0.066239364,0.0011773852,-0.07504017,0.026009388,-0.026110237,-0.00089784985,0.004558591,-0.027107328,0.017480537,-0.0062587988,-0.008309775,0.024417007,0.022020336,-0.025295774,0.0089702625,0.026482984,0.008462929,-0.043885507,0.023143305,-0.012536918,-0.025114551,-0.030675266,-0.030063663,-0.004634334,-0.0024470752,-0.03869859,0.015594325,0.0131572345,0.0029243943,-0.046118148,-0.03834942,-0.022946607,-0.0071579637,-0.042097863,-0.01229437,0.024193348,-0.03535916,-0.05725744,-0.014191351,-0.034702625,-0.03553529,-0.0063754944,0.0024684118,0.042859882,0.013016258,-0.02985961,-0.0020391515,0.030625137,0.016144354,-0.049042817,0.024231678,-0.025589447,-0.05898161,-0.023193993,0.031626217,-0.028190944,0.017940147,-0.049932066,-0.04810013,0.047244985,0.1082508,0.001041191,-0.057233974,-0.006368648,-0.06945289,0.048442855,-0.021192377,0.10568124,0.053165488,-0.0084766345,0.031292096,-0.009400329,-0.042162478,0.06982496,-0.014560452,-0.0073914286,-0.048956916,0.030368945,0.022202695,-0.0050742053,-0.012722453,-0.011377622,-0.051865157,-0.0070718606,-0.01745792,-0.02462795,0.030636197,0.030104883,-0.04482826,-0.11079195,-0.024324121,-0.002861835,-0.014245193,-0.020608244,0.03153579,-0.009367316,0.014898636,0.033479474,-0.015162162,0.01307384,-0.052216247,-0.025208864,0.014302212,0.023454865,0.030064361,-0.00028293114,-0.05237653,0.02271106,0.0057998034,0.021696828,0.0065965196,0.061783127,0.052609395,0.018527359,-0.012383652,0.036548115,6.0759903e-05,-0.027102679,0.0020538126,-0.026467739,-0.00931995,-0.056754645,-0.059189495,0.022508893,-0.037084196,0.008752761,0.011397571,-0.001640177,0.010061019,0.024978038,0.01750796,0.0017406448,0.0692028,0.042931892,0.008515072,-0.03527143,0.006649334,-0.0015101181,0.09099013,0.0423155,-0.060909722,-0.007118597,-0.0070489836,-0.05583034,0.035233498,-0.008949495,-0.021592604,-0.023997912,-0.030185444,-0.015039309,-0.07469254,-0.05510056,0.029319923,0.01650634,-0.0660325,-0.015404232,0.03715267,0.03294396,0.005133208,-0.071616374,-0.04183193,-0.039515678,0.06556278,-0.006204309,0.018765671,0.0087025305,0.04139539,0.039423864,-0.0096283825,-0.03788884,-0.030308004,0.016888767,0.033892095,-0.0046063373,0.036512673,0.046478424,0.030432703,-0.008351917,0.038958482,0.030963391,-0.0012744869,-0.068324916,0.035514664,0.029101191,0.019952206,-0.035990257,0.05016547,-0.0034300084,0.011099454,-0.01642832,-0.055300374,-0.07178654,0.023697836,-0.02809622,0.054089297,-0.1083301,-0.018408947,-0.075191386,-0.0048826155,-0.042217527,-0.069461025,-0.06703293,0.009000863,0.06276143,-0.0017238993,0.03036515,-0.009982445,0.055421855,-0.027764114,-0.05543302,0.022685751,0.022210898,0.049183954,-0.0047965907,0.055648796,0.011152965,-0.014035957,-0.02337775,-0.01123261,0.052066986,-0.006916061,0.03199984,-0.094863154,0.003547006,0.041498255,0.004490882,0.020994756,-0.07455022,0.036187306,-0.0051827626,-0.017956927,-0.00029976605,-0.044009093,0.0028350798,-0.052361596,0.07876513,-0.06365592,0.0017824164,0.017088404,-0.038679466,-0.008001763,-0.0013830748,-0.025812596,-0.0182766,-8.765931e-05,-0.0072022257,-0.046436142,-0.072371304,0.0057044053,-0.03468649,0.056389496,-0.020051511,0.031401794,0.0026272596,-0.045338016,-0.029466175,0.008883405,0.036455907,-0.012484258,0.0015844881,0.036832172,0.023578366,-0.043958467,0.00577308,0.055652507,-0.036696434,0.002894534,-0.032786682,-0.05258521,-0.006260205,0.030400572,-0.061743345,0.021158593,0.028482735,-0.061397683,-0.015825676,0.01941984,0.075950265,-0.11372872,-0.018362995,-0.010228874,-0.009783626,0.023449693,0.027557475,-0.0023083165,-0.0021188299,0.05987247,-0.00944442,-0.020868102,0.03482851,0.039515875,-0.026193311,0.023197955,-0.07931663,0.005395495,0.013140455,-0.061495673,0.0022219154,0.038023517,-0.05545234,0.020771723,-0.0067305462,-0.03169365,-0.021337083,0.019638145,-0.053754907,-0.035756346,-0.036120877,-0.05413345,-0.0077516357,0.03129875,0.016264724,-0.011121187,0.016678393,0.0678958,-0.014889522,-0.019517552,-0.0059457496,0.018003179,-0.0072531863,0.081852585,-0.030259738,-0.05358454,0.020454926,-0.009424692,0.10091245,-0.012819172,-0.011656013,0.031110896,0.08538375,-0.026021762,0.047623295,0.04384129,-0.05093276,0.014624959,0.026958883,-0.004577614,0.02551685,-0.019736024,0.0063903728,-0.024696782,-0.041850932,0.027209712,0.0050771283,-0.028201208,-0.03125501,-0.001541728,-0.06142714,0.054404832,-0.007287412,0.0626698,0.03180891,-0.015927717,-0.04500077,-0.0022995493,0.0124429,-0.015138294,-0.026622217,0.008842311,-0.010787062,0.0010311591,0.0013770667,0.039663706,-0.02192414,-0.019322718,-0.051264115,-0.011981459,-0.03414706,-0.006800422,-0.028382706,0.043155897,-0.007300542,0.02638807,-0.019196216,0.06930381,0.020622948,0.014042502,0.06754253,-0.043790415,0.015294639,-0.040941276,0.028382495,-0.013607999,-0.040120583,0.008768077,-0.0101868035,-0.060808867,-0.013499631,0.059239235,0.035230562,-0.019976182,0.11870333,0.053272087,-0.08745547,-0.018802922,0.004555603,-0.028306624,0.0020639726,-0.018859716,0.026370116,0.0097041875,-0.0029847843,0.017317675,-0.0533067,0.038994376,-0.03322375,-0.052456018,0.050101582,-0.015041677,-0.03370439,-0.010739062,-0.039727744,-0.045931656,-0.08658831,0.05190126,0.055936754,-0.07664951,0.041408025,0.011245535,-0.012530026,0.024861438,0.016954603,0.017269976,0.06397909,-0.000105038154,0.036761504,0.006065827,-0.02139009,-0.025604198,0.010828613,0.023636553,0.04226646,0.041076142,0.025892248,-0.051934887,0.0029032188,0.040332098,-0.015436589,-0.057878137,0.005353198,0.064739525,-0.006427803,-0.024176747,0.011304507,0.03381613,0.08625095,-0.027353497,-0.039551895,-0.04934357,-0.016709028,0.024133967,0.00441431,-0.048314437,0.040782917,0.026620803,-0.02146332,0.030112874,-0.027528606,-0.016772546,0.005690125,-0.0047134855,-0.036793064,0.04092668,-0.02411072,0.023851473,0.07727627,-0.006492274,-0.0025583038,0.0017014288,-0.0541687,-0.010395329,0.031044465,-0.0536995,0.029957417,-0.040688735,-0.037072316,0.01663893,-0.04231374,-0.030213326,0.0061428403,-0.06634084,0.06036701,0.016658397,0.024410319,-0.03309207,-0.03735754,-0.04359427,-0.013476715,0.00078163255,0.033615876,0.022759296,-0.003551954,0.017715035,-0.0072518513,0.033236742,-0.0070533687,-0.05334901,-0.014660441,0.0025560227,0.03979979,-0.00433087,-0.018232862,-0.017161474,0.008870558,0.021989124,0.078787796,-0.009815632,0.022819351,0.020795409,0.028896132,-0.0061202813,0.012352534,-0.009014175,0.0024110335]::vector(768))\n", + " ```\n", + "
\n", + "\n", + "Here is how this table mapped to `PostgresVectorStore`:\n", + "\n", + "- **`id_column=\"product_id\"`**: ID column uniquely identifies each row in the products table.\n", + "\n", + "- **`content_column=\"description\"`**: The `description` column contains text descriptions of each product. This text is used by the `embedding_service` to create vectors that go in embedding_column and represent the semantic meaning of each description.\n", + "\n", + "- **`embedding_column=\"embed\"`**: The `embed` column stores the vectors created from the product descriptions. These vectors are used to find products with similar descriptions.\n", + "\n", + "- **`metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"]`**: These columns are treated as metadata for each product. Metadata provides additional information about a product, such as its name, category, price, quantity available, SKU (Stock Keeping Unit), and an image URL. This information is useful for displaying product details in search results or for filtering and categorization.\n", + "\n", + "- **`metadata_json_column=\"metadata\"`**: The `metadata` column can store any additional information about the products in a flexible JSON format. This allows for storing varied and complex data that doesn't fit into the standard columns.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set an existing table name\n", + "TABLE_NAME = \"products\"\n", + "# SCHEMA_NAME = \"my_schema\"\n", + "\n", + "# Initialize PostgresVectorStore\n", + "custom_store = await PostgresVectorStore.create(\n", + " engine=engine,\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " embedding_service=embedding,\n", + " # Connect to existing VectorStore by customizing below column names\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note: \n", + "\n", + "1. Optional: If the `embed` column is newly created or has different dimensions than supported by embedding model, it is required to one-time add the embeddings for the old records, like this: \n", + "\n", + " `ALTER TABLE products ADD COLUMN embed vector(768) DEFAULT NULL`\n", + "\n", + "1. For new records, added via `VectorStore` embeddings are automatically generated." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hybrid Search with PostgresVectorStore\n", + "\n", + "A Hybrid Search combines multiple lookup strategies to provide more comprehensive and relevant search results. Specifically, it leverages both dense embedding vector search (for semantic similarity) and TSV (Text Search Vector) based keyword search (for lexical matching). This approach is particularly powerful for applications requiring efficient searching through customized text and metadata, especially when a specialized embedding model isn't feasible or necessary.\n", + "\n", + "By integrating both semantic and lexical capabilities, hybrid search helps overcome the limitations of each individual method:\n", + "* **Semantic Search**: Excellent for understanding the meaning of a query, even if the exact keywords aren't present. However, it can sometimes miss highly relevant documents that contain the precise keywords but have a slightly different semantic context.\n", + "* **Keyword Search**: Highly effective for finding documents with exact keyword matches and is generally fast. Its weakness lies in its inability to understand synonyms, misspellings, or conceptual relationships." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hybrid Search Config\n", + "\n", + "You can take advantage of hybrid search with PostgresVectorStore using the `HybridSearchConfig`.\n", + "\n", + "With a `HybridSearchConfig` provided, the `PostgresVectorStore` class can efficiently manage a hybrid search vector store using Cloud SQL Postgres as the backend, automatically handling the creation and population of the necessary TSV columns when possible." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Building the config\n", + "\n", + "Here are the parameters to the hybrid search config:\n", + "* **tsv_column:** The column name for TSV column. Default: `_tsv`\n", + "* **tsv_lang:** Value representing a supported language. Default: `pg_catalog.english`\n", + "* **fts_query:** If provided, this would be used for secondary retrieval instead of user provided query.\n", + "* **fusion_function:** Determines how the results are to be merged, default is equal weighted sum ranking.\n", + "* **fusion_function_parameters:** Parameters for the fusion function\n", + "* **primary_top_k:** Max results fetched for primary retrieval. Default: `4`\n", + "* **secondary_top_k:** Max results fetched for secondary retrieval. Default: `4`\n", + "* **index_name:** Name of the index built on the `tsv_column`\n", + "* **index_type:** GIN or GIST. Default: `GIN`" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here is an example `HybridSearchConfig`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_google_cloud_sql_pg import (\n", + " HybridSearchConfig,\n", + " reciprocal_rank_fusion,\n", + ")\n", + "\n", + "hybrid_search_config = HybridSearchConfig(\n", + " tsv_column=\"hybrid_description\",\n", + " tsv_lang=\"pg_catalog.english\",\n", + " fusion_function=reciprocal_rank_fusion,\n", + " fusion_function_parameters={\n", + " \"rrf_k\": 60,\n", + " \"fetch_top_k\": 10,\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Note:** In this case, we have mentioned the fusion function to be a `reciprocal rank fusion` but you can also use the `weighted_sum_ranking`.\n", + "\n", + "Make sure to use the right fusion function parameters\n", + "\n", + "`reciprocal_rank_fusion`:\n", + "* rrf_k: The RRF parameter k. Defaults to 60\n", + "* fetch_top_k: The number of documents to fetch after merging the results. Defaults to 4\n", + "\n", + "`weighted_sum_ranking`:\n", + "* primary_results_weight: The weight for the primary source's scores. Defaults to 0.5\n", + "* secondary_results_weight: The weight for the secondary source's scores. Defaults to 0.5\n", + "* fetch_top_k: The number of documents to fetch after merging the results. Defaults to 4\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Let's assume we are using the previously mentioned table [`products`](#create-a-vector-store-using-existing-table), which stores product details for an eComm venture.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With a new hybrid search table\n", + "To create a new postgres table with the tsv column, specify the hybrid search config during the initialization of the vector store.\n", + "\n", + "In this case, all the similarity searches will make use of hybrid search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "TABLE_NAME = \"hybrid_search_products\"\n", + "VECTOR_SIZE = 768\n", + "\n", + "await engine.ainit_vectorstore_table(\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " vector_size=VECTOR_SIZE,\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + " hybrid_search_config=hybrid_search_config,\n", + " store_metadata=True,\n", + ")\n", + "\n", + "vs_hybrid = await PostgresVectorStore.create(\n", + " engine,\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " embedding_service=embedding,\n", + " # Connect to existing VectorStore by customizing below column names\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + " hybrid_search_config=hybrid_search_config,\n", + ")\n", + "\n", + "# Fetch documents from the previously created store to fetch product documents\n", + "docs = await custom_store.asimilarity_search(\"products\", k=5)\n", + "# Add data normally to the hybrid search vector store, which will also add the tsv values in tsv_column\n", + "await vs_hybrid.aadd_documents(docs)\n", + "\n", + "# Use hybrid search\n", + "hybrid_docs = await vs_hybrid.asimilarity_search(\"products\", k=5)\n", + "print(hybrid_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With a pre-existing table\n", + "\n", + "If a hybrid search config is **NOT** provided during `init_vectorstore_table` while creating a table, the table will not contain a tsv_column. In this case you can still take advantage of hybrid search using the `HybridSearchConfig`.\n", + "\n", + "The specified TSV column is not present but the TSV vectors are created dynamically on-the-go for hybrid search." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Set the existing table name\n", + "TABLE_NAME = \"products\"\n", + "# SCHEMA_NAME = \"my_schema\"\n", + "\n", + "hybrid_search_config = HybridSearchConfig(\n", + " tsv_lang=\"pg_catalog.english\",\n", + " fusion_function=reciprocal_rank_fusion,\n", + " fusion_function_parameters={\n", + " \"rrf_k\": 60,\n", + " \"fetch_top_k\": 10,\n", + " },\n", + ")\n", + "\n", + "# Initialize PostgresVectorStore with the hybrid search config\n", + "custom_hybrid_store = await PostgresVectorStore.create(\n", + " engine,\n", + " table_name=TABLE_NAME,\n", + " # schema_name=SCHEMA_NAME,\n", + " embedding_service=embedding,\n", + " # Connect to existing VectorStore by customizing below column names\n", + " id_column=\"product_id\",\n", + " content_column=\"description\",\n", + " embedding_column=\"embed\",\n", + " metadata_columns=[\"name\", \"category\", \"price_usd\", \"quantity\", \"sku\", \"image_url\"],\n", + " metadata_json_column=\"metadata\",\n", + " hybrid_search_config=hybrid_search_config,\n", + ")\n", + "\n", + "# Use hybrid search\n", + "hybrid_docs = await custom_hybrid_store.asimilarity_search(\"products\", k=5)\n", + "print(hybrid_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case, all the similarity searches will make use of hybrid search." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Applying Hybrid Search to Specific Queries\n", + "\n", + "To use hybrid search only for certain queries, omit the configuration during initialization and pass it directly to the search method when needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use hybrid search\n", + "hybrid_docs = await custom_store.asimilarity_search(\n", + " \"products\", k=5, hybrid_search_config=hybrid_search_config\n", + ")\n", + "print(hybrid_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Hybrid Search Index\n", + "\n", + "Optionally, if you have created a Cloud SQL PG table with a tsv_column, you can create an index." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "await vs_hybrid.aapply_hybrid_search_index()" + ] } ], "metadata": { diff --git a/integration.cloudbuild.yaml b/integration.cloudbuild.yaml index 18414b8e..bc0b0d8f 100644 --- a/integration.cloudbuild.yaml +++ b/integration.cloudbuild.yaml @@ -62,7 +62,7 @@ substitutions: _DATABASE_PORT: "5432" _DATABASE_ID: test-database _REGION: us-central1 - _VERSION: "3.9" + _VERSION: "3.10" _IP_ADDRESS: "127.0.0.1" options: diff --git a/pyproject.toml b/pyproject.toml index 8f656b2c..737ff6f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,19 +4,16 @@ dynamic = ["version"] description = "LangChain integrations for Google Cloud SQL for PostgreSQL" readme = "README.rst" license = {file = "LICENSE"} -requires-python = ">=3.9" +requires-python = ">=3.10" authors = [ {name = "Google LLC", email = "googleapis-packages@google.com"} ] dependencies = [ "cloud-sql-python-connector[asyncpg] >= 1.10.0, <2.0.0", - "langchain-core>=0.2.36, <1.0.0 ", "numpy>=1.24.4, <3.0.0; python_version >= '3.11'", "numpy>=1.24.4, <=2.2.6; python_version == '3.10'", - "numpy>=1.24.4, <=2.0.2; python_version <= '3.9'", - "pgvector>=0.2.5, <1.0.0", - "SQLAlchemy[asyncio]>=2.0.25, <3.0.0" + "langchain-postgres>=0.0.16", ] classifiers = [ @@ -24,10 +21,10 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ] [tool.setuptools.dynamic] @@ -41,17 +38,17 @@ Changelog = "https://github.com/googleapis/langchain-google-cloud-sql-pg-python/ [project.optional-dependencies] langgraph = [ - "langgraph-checkpoint>=2.0.9, <3.0.0" + "langgraph-checkpoint>=3.0.0, <3.1.0" ] test = [ - "black[jupyter]==25.1.0", - "isort==6.0.1", - "mypy==1.15.0", + "black[jupyter]==25.12.0", + "isort==7.0.0", + "mypy==1.19.1", "pytest-asyncio==0.26.0", - "pytest==8.4.1", - "pytest-cov==6.2.1", - "langchain-tests==0.3.20", - "langgraph==0.5.2" + "pytest==8.4.2", + "pytest-cov==7.0.0", + "langchain-tests==1.1.0", + "langgraph==1.0.4" ] [build-system] @@ -65,7 +62,7 @@ target-version = ['py39'] profile = "black" [tool.mypy] -python_version = 3.9 +python_version = "3.10" warn_unused_configs = true disallow_incomplete_defs = true diff --git a/requirements.txt b/requirements.txt index e3045bbc..feb341ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,5 @@ -cloud-sql-python-connector[asyncpg]==1.18.2 -langchain-core==0.3.68 -numpy==2.3.1; python_version >= "3.11" +cloud-sql-python-connector[asyncpg]==1.19.0 +numpy==2.3.5; python_version >= "3.11" numpy==2.2.6; python_version == "3.10" -numpy==2.0.2; python_version <= "3.9" -pgvector==0.4.1 -SQLAlchemy[asyncio]==2.0.41 -langgraph==0.5.2 +langgraph==1.0.4 +langchain-postgres==0.0.16 diff --git a/samples/index_tuning_sample/requirements.txt b/samples/index_tuning_sample/requirements.txt index 94fe05d4..52a63c3a 100644 --- a/samples/index_tuning_sample/requirements.txt +++ b/samples/index_tuning_sample/requirements.txt @@ -1,3 +1,3 @@ -langchain-community==0.3.27 -langchain-google-cloud-sql-pg==0.14.0 -langchain-google-vertexai==2.0.27 +langchain-community==0.4.1 +langchain-google-cloud-sql-pg==0.14.1 +langchain-google-vertexai==3.2.1 diff --git a/samples/langchain_on_vertexai/clean_up.py b/samples/langchain_on_vertexai/clean_up.py index 45e57ae5..42c3866a 100644 --- a/samples/langchain_on_vertexai/clean_up.py +++ b/samples/langchain_on_vertexai/clean_up.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio import os +from typing import Any, Coroutine from config import ( CHAT_TABLE_NAME, @@ -32,6 +33,15 @@ TEST_NAME = os.getenv("DISPLAY_NAME") +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._default_loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._default_loop) + ) + return await coro + + async def delete_tables(): engine = await PostgresEngine.afrom_instance( PROJECT_ID, @@ -42,12 +52,14 @@ async def delete_tables(): password=PASSWORD, ) - async with engine._pool.connect() as conn: - await conn.execute(text("COMMIT")) - await conn.execute(text(f"DROP TABLE IF EXISTS {TABLE_NAME}")) - await conn.execute(text(f"DROP TABLE IF EXISTS {CHAT_TABLE_NAME}")) + async def _logic(): + async with engine._pool.connect() as conn: + await conn.execute(text("COMMIT")) + await conn.execute(text(f"DROP TABLE IF EXISTS {TABLE_NAME}")) + await conn.execute(text(f"DROP TABLE IF EXISTS {CHAT_TABLE_NAME}")) + + await run_on_background(engine, _logic()) await engine.close() - await engine._connector.close_async() def delete_engines(): diff --git a/samples/langchain_on_vertexai/create_embeddings.py b/samples/langchain_on_vertexai/create_embeddings.py index 105a86df..370d8262 100644 --- a/samples/langchain_on_vertexai/create_embeddings.py +++ b/samples/langchain_on_vertexai/create_embeddings.py @@ -13,6 +13,7 @@ # limitations under the License. import asyncio import uuid +from typing import Any, Coroutine from config import ( CHAT_TABLE_NAME, @@ -32,6 +33,15 @@ from langchain_google_cloud_sql_pg import PostgresEngine, PostgresVectorStore +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._default_loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._default_loop) + ) + return await coro + + async def create_databases(): engine = await PostgresEngine.afrom_instance( PROJECT_ID, @@ -41,10 +51,14 @@ async def create_databases(): user=USER, password=PASSWORD, ) - async with engine._pool.connect() as conn: - await conn.execute(text("COMMIT")) - await conn.execute(text(f'DROP DATABASE IF EXISTS "{DATABASE}"')) - await conn.execute(text(f'CREATE DATABASE "{DATABASE}"')) + + async def _logic(): + async with engine._pool.connect() as conn: + await conn.execute(text("COMMIT")) + await conn.execute(text(f'DROP DATABASE IF EXISTS "{DATABASE}"')) + await conn.execute(text(f'CREATE DATABASE "{DATABASE}"')) + + await run_on_background(engine, _logic()) await engine.close() @@ -95,7 +109,7 @@ async def grant_select(engine): engine, table_name=TABLE_NAME, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=PROJECT_ID + model_name="text-embedding-005", project=PROJECT_ID ), ) diff --git a/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py b/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py index 472b9da9..efd7fb58 100644 --- a/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py +++ b/samples/langchain_on_vertexai/prebuilt_langchain_agent_template.py @@ -65,7 +65,7 @@ def similarity_search(query: str) -> list[Document]: engine, table_name=TABLE_NAME, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=PROJECT_ID + model_name="text-embedding-005", project=PROJECT_ID ), ) retriever = vector_store.as_retriever() @@ -91,9 +91,9 @@ def similarity_search(query: str) -> list[Document]: DISPLAY_NAME = os.getenv("DISPLAY_NAME") or "PrebuiltAgent" remote_app = reasoning_engines.ReasoningEngine.create( - reasoning_engines.LangchainAgent( + reasoning_engines.LangchainAgent( # type: ignore[arg-type] model="gemini-2.0-flash-001", - tools=[similarity_search], + tools=[similarity_search], # type: ignore[list-item] model_kwargs={ "temperature": 0.1, }, @@ -104,4 +104,4 @@ def similarity_search(query: str) -> list[Document]: extra_packages=["config.py"], ) -print(remote_app.query(input="movies about engineers")) +print(remote_app.query(input="movies about engineers")) # type: ignore[attr-defined] diff --git a/samples/langchain_on_vertexai/requirements.txt b/samples/langchain_on_vertexai/requirements.txt index db0df6bc..064bf76a 100644 --- a/samples/langchain_on_vertexai/requirements.txt +++ b/samples/langchain_on_vertexai/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.97.0 +google-cloud-aiplatform[reasoningengine,langchain]==1.121.0 google-cloud-resource-manager==1.14.2 -langchain-community==0.3.27 -langchain-google-cloud-sql-pg==0.14.0 -langchain-google-vertexai==2.0.27 +langchain-community==0.3.31 +langchain-google-cloud-sql-pg==0.14.1 +langchain-google-vertexai==2.1.2 diff --git a/samples/langchain_on_vertexai/retriever_agent_with_history_template.py b/samples/langchain_on_vertexai/retriever_agent_with_history_template.py index 7d8a520e..bba06a16 100644 --- a/samples/langchain_on_vertexai/retriever_agent_with_history_template.py +++ b/samples/langchain_on_vertexai/retriever_agent_with_history_template.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -from typing import Optional +from typing import Any, Optional import vertexai # type: ignore from config import ( @@ -91,7 +91,7 @@ def set_up(self): engine, table_name=self.table, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=self.project + model_name="text-embedding-005", project=self.project ), ) retriever = vector_store.as_retriever() @@ -132,7 +132,7 @@ def set_up(self): history_messages_key="chat_history", ) - def query(self, input: str, session_id: str) -> str: + def query(self, input: str, session_id: str, **kwargs: Any) -> str: # type: ignore[override] """Query the application. Args: @@ -192,4 +192,4 @@ def query(self, input: str, session_id: str) -> str: extra_packages=["config.py"], ) -print(remote_app.query(input="movies about engineers", session_id="abc123")) +print(remote_app.query(input="movies about engineers", session_id="abc123")) # type: ignore diff --git a/samples/langchain_on_vertexai/retriever_chain_template.py b/samples/langchain_on_vertexai/retriever_chain_template.py index d05780c3..0d322ba8 100644 --- a/samples/langchain_on_vertexai/retriever_chain_template.py +++ b/samples/langchain_on_vertexai/retriever_chain_template.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -from typing import Optional +from typing import Any, Optional import vertexai # type: ignore from config import ( @@ -97,7 +97,7 @@ def set_up(self): engine, table_name=self.table, embedding_service=VertexAIEmbeddings( - model_name="textembedding-gecko@latest", project=self.project + model_name="text-embedding-005", project=self.project ), ) retriever = vector_store.as_retriever() @@ -106,7 +106,7 @@ def set_up(self): # an LLM to generate a response self.chain = create_retrieval_chain(retriever, combine_docs_chain) - def query(self, input: str) -> str: + def query(self, input: str, **kwargs: Any) -> str: # type: ignore[override] """Query the application. Args: @@ -161,4 +161,4 @@ def query(self, input: str) -> str: extra_packages=["config.py"], ) -print(remote_app.query(input="movies about engineers")) +print(remote_app.query(input="movies about engineers")) # type: ignore diff --git a/samples/requirements.txt b/samples/requirements.txt index db0df6bc..fa739a07 100644 --- a/samples/requirements.txt +++ b/samples/requirements.txt @@ -1,5 +1,5 @@ -google-cloud-aiplatform[reasoningengine,langchain]==1.97.0 -google-cloud-resource-manager==1.14.2 -langchain-community==0.3.27 -langchain-google-cloud-sql-pg==0.14.0 -langchain-google-vertexai==2.0.27 +google-cloud-aiplatform[reasoningengine,langchain]==1.130.0 +google-cloud-resource-manager==1.15.0 +langchain-community==0.3.29 +langchain-google-cloud-sql-pg==0.14.1 +langchain-google-vertexai==2.1.2 diff --git a/src/langchain_google_cloud_sql_pg/__init__.py b/src/langchain_google_cloud_sql_pg/__init__.py index ca8ab9ef..34bf7514 100644 --- a/src/langchain_google_cloud_sql_pg/__init__.py +++ b/src/langchain_google_cloud_sql_pg/__init__.py @@ -12,10 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +from langchain_postgres import Column +from langchain_postgres.v2.hybrid_search_config import ( + HybridSearchConfig, + reciprocal_rank_fusion, + weighted_sum_ranking, +) + from . import indexes from .chat_message_history import PostgresChatMessageHistory from .checkpoint import PostgresSaver -from .engine import Column, PostgresEngine +from .engine import PostgresEngine from .loader import PostgresDocumentSaver, PostgresLoader from .vectorstore import PostgresVectorStore from .version import __version__ @@ -29,5 +36,8 @@ "PostgresLoader", "PostgresDocumentSaver", "PostgresSaver", + "HybridSearchConfig", + "reciprocal_rank_fusion", + "weighted_sum_ranking", "__version__", ] diff --git a/src/langchain_google_cloud_sql_pg/async_checkpoint.py b/src/langchain_google_cloud_sql_pg/async_checkpoint.py index fc875991..32eef521 100644 --- a/src/langchain_google_cloud_sql_pg/async_checkpoint.py +++ b/src/langchain_google_cloud_sql_pg/async_checkpoint.py @@ -276,7 +276,9 @@ async def aput( async with self.pool.connect() as conn: type_, serialized_checkpoint = self.serde.dumps_typed(checkpoint) - serialized_metadata = self.jsonplus_serde.dumps(metadata) + serialized_metadata = json.dumps(metadata, ensure_ascii=False).encode( + "utf-8", "ignore" + ) await conn.execute( text(query), { @@ -409,7 +411,7 @@ async def alist( (value["type"], value["checkpoint"]) ), metadata=( - self.jsonplus_serde.loads(value["metadata"]) # type: ignore + json.loads(value["metadata"]) # type: ignore if value["metadata"] is not None else {} ), @@ -494,7 +496,7 @@ async def aget_tuple(self, config: RunnableConfig) -> Optional[CheckpointTuple]: }, checkpoint=self.serde.loads_typed((value["type"], value["checkpoint"])), metadata=( - self.jsonplus_serde.loads(value["metadata"]) # type: ignore + json.loads(value["metadata"]) # type: ignore if value["metadata"] is not None else {} ), diff --git a/src/langchain_google_cloud_sql_pg/async_vectorstore.py b/src/langchain_google_cloud_sql_pg/async_vectorstore.py index 0cde1f8d..d40470f3 100644 --- a/src/langchain_google_cloud_sql_pg/async_vectorstore.py +++ b/src/langchain_google_cloud_sql_pg/async_vectorstore.py @@ -15,1187 +15,11 @@ # TODO: Remove below import when minimum supported Python version is 3.10 from __future__ import annotations -import copy -import json -import uuid -from typing import Any, Callable, Iterable, Optional, Sequence +from langchain_postgres.v2.async_vectorstore import AsyncPGVectorStore -import numpy as np -from langchain_core.documents import Document -from langchain_core.embeddings import Embeddings -from langchain_core.vectorstores import VectorStore, utils -from sqlalchemy import text -from sqlalchemy.engine.row import RowMapping -from sqlalchemy.ext.asyncio import AsyncEngine -from .engine import PostgresEngine -from .indexes import ( - DEFAULT_DISTANCE_STRATEGY, - DEFAULT_INDEX_NAME_SUFFIX, - BaseIndex, - DistanceStrategy, - ExactNearestNeighbor, - QueryOptions, -) - -COMPARISONS_TO_NATIVE = { - "$eq": "=", - "$ne": "!=", - "$lt": "<", - "$lte": "<=", - "$gt": ">", - "$gte": ">=", -} - -SPECIAL_CASED_OPERATORS = { - "$in", - "$nin", - "$between", - "$exists", -} - -TEXT_OPERATORS = { - "$like", - "$ilike", -} - -LOGICAL_OPERATORS = {"$and", "$or", "$not"} - -SUPPORTED_OPERATORS = ( - set(COMPARISONS_TO_NATIVE) - .union(TEXT_OPERATORS) - .union(LOGICAL_OPERATORS) - .union(SPECIAL_CASED_OPERATORS) -) - - -class AsyncPostgresVectorStore(VectorStore): +class AsyncPostgresVectorStore(AsyncPGVectorStore): """Google Cloud SQL for PostgreSQL Vector Store class""" - __create_key = object() - - def __init__( - self, - key: object, - pool: AsyncEngine, - embedding_service: Embeddings, - table_name: str, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - id_column: str = "langchain_id", - metadata_json_column: Optional[str] = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ): - """AsyncPostgresVectorStore constructor. - Args: - key (object): Prevent direct constructor usage. - pool (PostgresEngine): Connection pool engine for managing connections to Postgres database. - embedding_service (Embeddings): Text embedding model to use. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - content_column (str): Column that represent a Document's page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - - Raises: - Exception: If called directly by user. - """ - if key != AsyncPostgresVectorStore.__create_key: - raise Exception( - "Only create class through 'create' or 'create_sync' methods!" - ) - - self.pool = pool - self.embedding_service = embedding_service - self.table_name = table_name - self.schema_name = schema_name - self.content_column = content_column - self.embedding_column = embedding_column - self.metadata_columns = metadata_columns - self.id_column = id_column - self.metadata_json_column = metadata_json_column - self.distance_strategy = distance_strategy - self.k = k - self.fetch_k = fetch_k - self.lambda_mult = lambda_mult - self.index_query_options = index_query_options - - @classmethod - async def create( - cls, - engine: PostgresEngine, - embedding_service: Embeddings, - table_name: str, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: Optional[str] = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> AsyncPostgresVectorStore: - """Create a new AsyncPostgresVectorStore instance. - - Args: - engine (PostgresEngine): Connection pool engine for managing connections to Cloud SQL for PostgreSQL database. - embedding_service (Embeddings): Text embedding model to use. - table_name (str): Name of an existing table or table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - content_column (str): Column that represent a Document's page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Returns: - AsyncPostgresVectorStore - """ - if metadata_columns and ignore_metadata_columns: - raise ValueError( - "Can not use both metadata_columns and ignore_metadata_columns." - ) - # Get field type information - async with engine._pool.connect() as conn: - result = await conn.execute( - text( - f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}'AND table_schema = '{schema_name}'" - ) - ) - result_map = result.mappings() - results = result_map.fetchall() - - columns = {} - for field in results: - columns[field["column_name"]] = field["data_type"] - - # Check columns - if id_column not in columns: - raise ValueError(f"Id column, {id_column}, does not exist.") - if content_column not in columns: - raise ValueError(f"Content column, {content_column}, does not exist.") - content_type = columns[content_column] - if content_type != "text" and "char" not in content_type: - raise ValueError( - f"Content column, {content_column}, is type, {content_type}. It must be a type of character string." - ) - if embedding_column not in columns: - raise ValueError(f"Embedding column, {embedding_column}, does not exist.") - if columns[embedding_column] != "USER-DEFINED": - raise ValueError( - f"Embedding column, {embedding_column}, is not type Vector." - ) - - metadata_json_column = ( - None if metadata_json_column not in columns else metadata_json_column - ) - - # If using metadata_columns check to make sure column exists - for column in metadata_columns: - if column not in columns: - raise ValueError(f"Metadata column, {column}, does not exist.") - - # If using ignore_metadata_columns, filter out known columns and set known metadata columns - all_columns = columns - if ignore_metadata_columns: - for column in ignore_metadata_columns: - del all_columns[column] - - del all_columns[id_column] - del all_columns[content_column] - del all_columns[embedding_column] - metadata_columns = [k for k in all_columns.keys()] - - return cls( - cls.__create_key, - engine._pool, - embedding_service, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - - @property - def embeddings(self) -> Embeddings: - return self.embedding_service - - async def __aadd_embeddings( - self, - texts: Iterable[str], - embeddings: list[list[float]], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Add embeddings to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - if not ids: - ids = [str(uuid.uuid4()) for _ in texts] - else: - # This is done to fill in any missing ids - ids = [id if id is not None else str(uuid.uuid4()) for id in ids] - if not metadatas: - metadatas = [{} for _ in texts] - # Insert embeddings - for id, content, embedding, metadata in zip(ids, texts, embeddings, metadatas): - metadata_col_names = ( - ", " + ", ".join(f'"{col}"' for col in self.metadata_columns) - if len(self.metadata_columns) > 0 - else "" - ) - insert_stmt = f'INSERT INTO "{self.schema_name}"."{self.table_name}"("{self.id_column}", "{self.content_column}", "{self.embedding_column}"{metadata_col_names}' - values = { - "langchain_id": id, - "content": content, - "embedding": str([float(dimension) for dimension in embedding]), - } - values_stmt = "VALUES (:langchain_id, :content, :embedding" - - # Add metadata - extra = copy.deepcopy(metadata) - for metadata_column in self.metadata_columns: - if metadata_column in metadata: - values_stmt += f", :{metadata_column}" - values[metadata_column] = metadata[metadata_column] - del extra[metadata_column] - else: - values_stmt += ",null" - - # Add JSON column and/or close statement - insert_stmt += ( - f""", "{self.metadata_json_column}")""" - if self.metadata_json_column - else ")" - ) - if self.metadata_json_column: - values_stmt += ", :extra)" - values["extra"] = json.dumps(extra) - else: - values_stmt += ")" - - upsert_stmt = f' ON CONFLICT ("{self.id_column}") DO UPDATE SET "{self.content_column}" = EXCLUDED."{self.content_column}", "{self.embedding_column}" = EXCLUDED."{self.embedding_column}"' - - if self.metadata_json_column: - upsert_stmt += f', "{self.metadata_json_column}" = EXCLUDED."{self.metadata_json_column}"' - - for column in self.metadata_columns: - upsert_stmt += f', "{column}" = EXCLUDED."{column}"' - - upsert_stmt += ";" - - query = insert_stmt + values_stmt + upsert_stmt - async with self.pool.connect() as conn: - await conn.execute(text(query), values) - await conn.commit() - - return ids - - async def aget_by_ids(self, ids: Sequence[str]) -> list[Document]: - """Get documents by ids.""" - - quoted_ids = [f"'{id_val}'" for id_val in ids] - id_list_str = ", ".join(quoted_ids) - - columns = self.metadata_columns + [ - self.id_column, - self.content_column, - ] - if self.metadata_json_column: - columns.append(self.metadata_json_column) - - column_names = ", ".join(f'"{col}"' for col in columns) - - query = f'SELECT {column_names} FROM "{self.schema_name}"."{self.table_name}" WHERE "{self.id_column}" IN ({id_list_str});' - - async with self.pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - results = result_map.fetchall() - - documents = [] - for row in results: - metadata = ( - row[self.metadata_json_column] - if self.metadata_json_column and row[self.metadata_json_column] - else {} - ) - for col in self.metadata_columns: - metadata[col] = row[col] - documents.append( - ( - Document( - page_content=row[self.content_column], - metadata=metadata, - id=str(row[self.id_column]), - ) - ) - ) - - return documents - - async def aadd_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed texts and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - embeddings = self.embedding_service.embed_documents(list(texts)) - ids = await self.__aadd_embeddings( - texts, embeddings, metadatas=metadatas, ids=ids, **kwargs - ) - return ids - - async def aadd_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed documents and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - if not ids: - ids = [doc.id for doc in documents] - ids = await self.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) - return ids - - async def adelete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - """Delete records from the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - if not ids: - return False - - id_list = ", ".join([f"'{id}'" for id in ids]) - query = f'DELETE FROM "{self.schema_name}"."{self.table_name}" WHERE {self.id_column} in ({id_list})' - async with self.pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - return True - - @classmethod - async def afrom_texts( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - """Create an AsyncPostgresVectorStore instance from texts. - - Args: - texts (list[str]): Texts to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list[str]]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - AsyncPostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - await vs.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) - return vs - - @classmethod - async def afrom_documents( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - """Create an AsyncPostgresVectorStore instance from documents. - - Args: - documents (list[Document]): Documents to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list[str]]): List of IDs to add to table records. - content_column (str): Column that represent a Document's page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - AsyncPostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - await vs.aadd_texts(texts, metadatas=metadatas, ids=ids, **kwargs) - return vs - - async def __query_collection( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> Sequence[RowMapping]: - """Perform similarity search query on the vector store table.""" - k = k if k else self.k - operator = self.distance_strategy.operator - search_function = self.distance_strategy.search_function - - columns = self.metadata_columns + [ - self.id_column, - self.content_column, - self.embedding_column, - ] - if self.metadata_json_column: - columns.append(self.metadata_json_column) - - column_names = ", ".join(f'"{col}"' for col in columns) - - if filter and isinstance(filter, dict): - filter = self._create_filter_clause(filter) - filter = f"WHERE {filter}" if filter else "" - embedding_string = f"'{[float(dimension) for dimension in embedding]}'" - stmt = f'SELECT {column_names}, {search_function}({self.embedding_column}, {embedding_string}) as distance FROM "{self.schema_name}"."{self.table_name}" {filter} ORDER BY {self.embedding_column} {operator} {embedding_string} LIMIT {k};' - if self.index_query_options: - async with self.pool.connect() as conn: - await conn.execute( - text(f"SET LOCAL {self.index_query_options.to_string()};") - ) - result = await conn.execute(text(stmt)) - result_map = result.mappings() - results = result_map.fetchall() - else: - async with self.pool.connect() as conn: - result = await conn.execute(text(stmt)) - result_map = result.mappings() - results = result_map.fetchall() - return results - - async def asimilarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by similarity search on query.""" - embedding = self.embedding_service.embed_query(text=query) - - return await self.asimilarity_search_by_vector( - embedding=embedding, k=k, filter=filter, **kwargs - ) - - def _select_relevance_score_fn(self) -> Callable[[float], float]: - """Select a relevance function based on distance strategy.""" - # Calculate distance strategy provided in - # vectorstore constructor - if self.distance_strategy == DistanceStrategy.COSINE_DISTANCE: - return self._cosine_relevance_score_fn - if self.distance_strategy == DistanceStrategy.INNER_PRODUCT: - return self._max_inner_product_relevance_score_fn - elif self.distance_strategy == DistanceStrategy.EUCLIDEAN: - return self._euclidean_relevance_score_fn - - async def asimilarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on query.""" - embedding = self.embedding_service.embed_query(query) - docs = await self.asimilarity_search_with_score_by_vector( - embedding=embedding, k=k, filter=filter, **kwargs - ) - return docs - - async def asimilarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by vector similarity search.""" - docs_and_scores = await self.asimilarity_search_with_score_by_vector( - embedding=embedding, k=k, filter=filter, **kwargs - ) - - return [doc for doc, _ in docs_and_scores] - - async def asimilarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by vector similarity search.""" - results = await self.__query_collection( - embedding=embedding, k=k, filter=filter, **kwargs - ) - - documents_with_scores = [] - for row in results: - metadata = ( - row[self.metadata_json_column] - if self.metadata_json_column and row[self.metadata_json_column] - else {} - ) - for col in self.metadata_columns: - metadata[col] = row[col] - documents_with_scores.append( - ( - Document( - page_content=row[self.content_column], - metadata=metadata, - id=str(row[self.id_column]), - ), - row["distance"], - ) - ) - - return documents_with_scores - - async def amax_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - embedding = self.embedding_service.embed_query(text=query) - - return await self.amax_marginal_relevance_search_by_vector( - embedding=embedding, - k=k, - fetch_k=fetch_k, - lambda_mult=lambda_mult, - filter=filter, - **kwargs, - ) - - async def amax_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - docs_and_scores = ( - await self.amax_marginal_relevance_search_with_score_by_vector( - embedding, - k=k, - fetch_k=fetch_k, - lambda_mult=lambda_mult, - filter=filter, - **kwargs, - ) - ) - - return [result[0] for result in docs_and_scores] - - async def amax_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected using the maximal marginal relevance.""" - results = await self.__query_collection( - embedding=embedding, k=fetch_k, filter=filter, **kwargs - ) - - k = k if k else self.k - fetch_k = fetch_k if fetch_k else self.fetch_k - lambda_mult = lambda_mult if lambda_mult else self.lambda_mult - embedding_list = [json.loads(row[self.embedding_column]) for row in results] - mmr_selected = utils.maximal_marginal_relevance( - np.array(embedding, dtype=np.float32), - embedding_list, - k=k, - lambda_mult=lambda_mult, - ) - - documents_with_scores = [] - for row in results: - metadata = ( - row[self.metadata_json_column] - if self.metadata_json_column and row[self.metadata_json_column] - else {} - ) - for col in self.metadata_columns: - metadata[col] = row[col] - documents_with_scores.append( - ( - Document( - page_content=row[self.content_column], - metadata=metadata, - id=str(row[self.id_column]), - ), - row["distance"], - ) - ) - - return [r for i, r in enumerate(documents_with_scores) if i in mmr_selected] - - async def aapply_vector_index( - self, - index: BaseIndex, - name: Optional[str] = None, - concurrently: bool = False, - ) -> None: - """Create an index on the vector store table.""" - if isinstance(index, ExactNearestNeighbor): - await self.adrop_vector_index() - return - - filter = f"WHERE ({index.partial_indexes})" if index.partial_indexes else "" - params = "WITH " + index.index_options() - function = index.distance_strategy.index_function - if name is None: - if index.name == None: - index.name = self.table_name + DEFAULT_INDEX_NAME_SUFFIX - name = index.name - stmt = f'CREATE INDEX {"CONCURRENTLY" if concurrently else ""} {name} ON "{self.schema_name}"."{self.table_name}" USING {index.index_type} ({self.embedding_column} {function}) {params} {filter};' - if concurrently: - async with self.pool.connect() as conn: - await conn.execute(text("COMMIT")) - await conn.execute(text(stmt)) - else: - async with self.pool.connect() as conn: - await conn.execute(text(stmt)) - await conn.commit() - - async def areindex(self, index_name: Optional[str] = None) -> None: - """Re-index the vector store table.""" - index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX - query = f"REINDEX INDEX {index_name};" - async with self.pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - - async def adrop_vector_index( - self, - index_name: Optional[str] = None, - ) -> None: - """Drop the vector index.""" - index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX - query = f"DROP INDEX IF EXISTS {index_name};" - async with self.pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - - async def is_valid_index( - self, - index_name: Optional[str] = None, - ) -> bool: - """Check if index exists in the table.""" - index_name = index_name or self.table_name + DEFAULT_INDEX_NAME_SUFFIX - stmt = f""" - SELECT tablename, indexname - FROM pg_indexes - WHERE tablename = '{self.table_name}' AND schemaname = '{self.schema_name}' AND indexname = '{index_name}'; - """ - async with self.pool.connect() as conn: - result = await conn.execute(text(stmt)) - result_map = result.mappings() - results = result_map.fetchall() - - return bool(len(results) == 1) - - def _handle_field_filter( - self, - field: str, - value: Any, - ) -> str: - """Create a filter for a specific field. - Args: - field: name of field - value: value to filter - If provided as is then this will be an equality filter - If provided as a dictionary then this will be a filter, the key - will be the operator and the value will be the value to filter by - Returns: - sql where query as a string - """ - if not isinstance(field, str): - raise ValueError( - f"field should be a string but got: {type(field)} with value: {field}" - ) - - if field.startswith("$"): - raise ValueError( - f"Invalid filter condition. Expected a field but got an operator: " - f"{field}" - ) - - # Allow [a-zA-Z0-9_], disallow $ for now until we support escape characters - if not field.isidentifier(): - raise ValueError( - f"Invalid field name: {field}. Expected a valid identifier." - ) - - if isinstance(value, dict): - # This is a filter specification - if len(value) != 1: - raise ValueError( - "Invalid filter condition. Expected a value which " - "is a dictionary with a single key that corresponds to an operator " - f"but got a dictionary with {len(value)} keys. The first few " - f"keys are: {list(value.keys())[:3]}" - ) - operator, filter_value = list(value.items())[0] - # Verify that that operator is an operator - if operator not in SUPPORTED_OPERATORS: - raise ValueError( - f"Invalid operator: {operator}. " - f"Expected one of {SUPPORTED_OPERATORS}" - ) - else: # Then we assume an equality operator - operator = "$eq" - filter_value = value - - if operator in COMPARISONS_TO_NATIVE: - # Then we implement an equality filter - # native is trusted input - if isinstance(filter_value, str): - filter_value = f"'{filter_value}'" - native = COMPARISONS_TO_NATIVE[operator] - return f"({field} {native} {filter_value})" - elif operator == "$between": - # Use AND with two comparisons - low, high = filter_value - - return f"({field} BETWEEN {low} AND {high})" - elif operator in {"$in", "$nin", "$like", "$ilike"}: - # We'll do force coercion to text - if operator in {"$in", "$nin"}: - for val in filter_value: - if not isinstance(val, (str, int, float)): - raise NotImplementedError( - f"Unsupported type: {type(val)} for value: {val}" - ) - - if isinstance(val, bool): # b/c bool is an instance of int - raise NotImplementedError( - f"Unsupported type: {type(val)} for value: {val}" - ) - - if operator in {"$in"}: - values = str(tuple(val for val in filter_value)) - return f"({field} IN {values})" - elif operator in {"$nin"}: - values = str(tuple(val for val in filter_value)) - return f"({field} NOT IN {values})" - elif operator in {"$like"}: - return f"({field} LIKE '{filter_value}')" - elif operator in {"$ilike"}: - return f"({field} ILIKE '{filter_value}')" - else: - raise NotImplementedError() - elif operator == "$exists": - if not isinstance(filter_value, bool): - raise ValueError( - "Expected a boolean value for $exists " - f"operator, but got: {filter_value}" - ) - else: - if filter_value: - return f"({field} IS NOT NULL)" - else: - return f"({field} IS NULL)" - else: - raise NotImplementedError() - - def _create_filter_clause(self, filters: Any) -> str: - """Create LangChain filter representation to matching SQL where clauses - Args: - filters: Dictionary of filters to apply to the query. - Returns: - String containing the sql where query. - """ - - if not isinstance(filters, dict): - raise ValueError( - f"Invalid type: Expected a dictionary but got type: {type(filters)}" - ) - if len(filters) == 1: - # The only operators allowed at the top level are $AND, $OR, and $NOT - # First check if an operator or a field - key, value = list(filters.items())[0] - if key.startswith("$"): - # Then it's an operator - if key.lower() not in ["$and", "$or", "$not"]: - raise ValueError( - f"Invalid filter condition. Expected $and, $or or $not " - f"but got: {key}" - ) - else: - # Then it's a field - return self._handle_field_filter(key, filters[key]) - - if key.lower() == "$and" or key.lower() == "$or": - if not isinstance(value, list): - raise ValueError( - f"Expected a list, but got {type(value)} for value: {value}" - ) - op = key[1:].upper() # Extract the operator - filter_clause = [self._create_filter_clause(el) for el in value] - if len(filter_clause) > 1: - return f"({f' {op} '.join(filter_clause)})" - elif len(filter_clause) == 1: - return filter_clause[0] - else: - raise ValueError( - "Invalid filter condition. Expected a dictionary " - "but got an empty dictionary" - ) - elif key.lower() == "$not": - if isinstance(value, list): - not_conditions = [ - self._create_filter_clause(item) for item in value - ] - not_stmts = [f"NOT {condition}" for condition in not_conditions] - return f"({' AND '.join(not_stmts)})" - elif isinstance(value, dict): - not_ = self._create_filter_clause(value) - return f"(NOT {not_})" - else: - raise ValueError( - f"Invalid filter condition. Expected a dictionary " - f"or a list but got: {type(value)}" - ) - else: - raise ValueError( - f"Invalid filter condition. Expected $and, $or or $not " - f"but got: {key}" - ) - elif len(filters) > 1: - # Then all keys have to be fields (they cannot be operators) - for key in filters.keys(): - if key.startswith("$"): - raise ValueError( - f"Invalid filter condition. Expected a field but got: {key}" - ) - # These should all be fields and combined using an $and operator - and_ = [self._handle_field_filter(k, v) for k, v in filters.items()] - if len(and_) > 1: - return f"({' AND '.join(and_)})" - elif len(and_) == 1: - return and_[0] - else: - raise ValueError( - "Invalid filter condition. Expected a dictionary " - "but got an empty dictionary" - ) - else: - return "" - - def get_by_ids(self, ids: Sequence[str]) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def add_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def add_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def delete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - @classmethod - def from_texts( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - @classmethod - def from_documents( # type: ignore[override] - cls: type[AsyncPostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - **kwargs: Any, - ) -> AsyncPostgresVectorStore: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def similarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def max_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def max_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) - - def max_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - raise NotImplementedError( - "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." - ) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) diff --git a/src/langchain_google_cloud_sql_pg/engine.py b/src/langchain_google_cloud_sql_pg/engine.py index c40462b5..102467bd 100644 --- a/src/langchain_google_cloud_sql_pg/engine.py +++ b/src/langchain_google_cloud_sql_pg/engine.py @@ -24,6 +24,7 @@ import google.auth # type: ignore import google.auth.transport.requests # type: ignore from google.cloud.sql.connector import Connector, IPTypes, RefreshStrategy +from langchain_postgres import Column, PGEngine from sqlalchemy import MetaData, Table, text from sqlalchemy.engine import URL from sqlalchemy.exc import InvalidRequestError @@ -78,58 +79,10 @@ async def _get_iam_principal_email( return email.replace(".gserviceaccount.com", "") -@dataclass -class Column: - name: str - data_type: str - nullable: bool = True - - def __post_init__(self): - """Check if initialization parameters are valid. - - Raises: - ValueError: Raises error if Column name is not string. - ValueError: Raises error if data_type is not type string. - """ - if not isinstance(self.name, str): - raise ValueError("Column name must be type string") - if not isinstance(self.data_type, str): - raise ValueError("Column data_type must be type string") - - -class PostgresEngine: +class PostgresEngine(PGEngine): """A class for managing connections to a Cloud SQL for Postgres database.""" _connector: Optional[Connector] = None - _default_loop: Optional[asyncio.AbstractEventLoop] = None - _default_thread: Optional[Thread] = None - __create_key = object() - - def __init__( - self, - key: object, - pool: AsyncEngine, - loop: Optional[asyncio.AbstractEventLoop], - thread: Optional[Thread], - ): - """PostgresEngine constructor. - - Args: - key (object): Prevent direct constructor usage. - pool (AsyncEngine): Async engine connection pool. - loop (Optional[asyncio.AbstractEventLoop]): Async event loop used to create the engine. - thread (Optional[Thread]): Thread used to create the engine async. - - Raises: - Exception: If the constructor is called directly by the user. - """ - if key != PostgresEngine.__create_key: - raise Exception( - "Only create class through 'create' or 'create_sync' methods!" - ) - self._pool = pool - self._loop = loop - self._thread = thread @classmethod async def _create( @@ -219,7 +172,7 @@ async def getconn() -> asyncpg.Connection: async_creator=getconn, **engine_args, ) - return cls(cls.__create_key, engine, loop, thread) + return cls(PGEngine._PGEngine__create_key, engine, loop, thread) # type: ignore @classmethod def __start_background_loop( @@ -354,13 +307,22 @@ async def afrom_instance( return await asyncio.wrap_future(future) @classmethod - def from_engine( + def from_connection_string( cls, - engine: AsyncEngine, - loop: Optional[asyncio.AbstractEventLoop] = None, + url: str | URL, + **kwargs: Any, ) -> PostgresEngine: - """Create an PostgresEngine instance from an AsyncEngine.""" - return cls(cls.__create_key, engine, loop, None) + """Create an PostgresEngine instance from arguments. These parameters are pass directly into sqlalchemy's create_async_engine function. + Args: + url (Union[str | URL]): the URL used to connect to a database + **kwargs (Any, optional): sqlalchemy `create_async_engine` arguments + Raises: + ValueError: If `postgresql+asyncpg` is not specified as the PG driver + Returns: + PostgresEngine + """ + + return PostgresEngine.from_engine_args(url=url, **kwargs) @classmethod def from_engine_args( @@ -396,198 +358,7 @@ def from_engine_args( raise ValueError("Driver must be type 'postgresql+asyncpg'") engine = create_async_engine(url, **kwargs) - return cls(cls.__create_key, engine, cls._default_loop, cls._default_thread) - - async def _run_as_async(self, coro: Awaitable[T]) -> T: - """Run an async coroutine asynchronously""" - # If a loop has not been provided, attempt to run in current thread - if not self._loop: - return await coro - # Otherwise, run in the background thread - return await asyncio.wrap_future( - asyncio.run_coroutine_threadsafe(coro, self._loop) - ) - - def _run_as_sync(self, coro: Awaitable[T]) -> T: - """Run an async coroutine synchronously""" - if not self._loop: - raise Exception( - "Engine was initialized without a background loop and cannot call sync methods." - ) - return asyncio.run_coroutine_threadsafe(coro, self._loop).result() - - async def close(self) -> None: - """Dispose of connection pool""" - await self._run_as_async(self._pool.dispose()) - - async def _ainit_vectorstore_table( - self, - table_name: str, - vector_size: int, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[Column] = [], - metadata_json_column: str = "langchain_metadata", - id_column: Union[str, Column] = "langchain_id", - overwrite_existing: bool = False, - store_metadata: bool = True, - ) -> None: - """ - Create a table for saving of vectors to be used with PostgresVectorStore. - - Args: - table_name (str): The Postgres database table name. - vector_size (int): Vector size for the embedding model to be used. - schema_name (str): The schema name to store Postgres database table. - Default: "public". - content_column (str): Name of the column to store document content. - Default: "page_content". - embedding_column (str) : Name of the column to store vector embeddings. - Default: "embedding". - metadata_columns (list[Column]): A list of Columns to create for custom - metadata. Default: []. Optional. - metadata_json_column (str): The column to store extra metadata in JSON format. - Default: "langchain_metadata". Optional. - id_column (Union[str, Column]) : Column to store ids. - Default: "langchain_id" column name with data type UUID. Optional. - overwrite_existing (bool): Whether to drop existing table. Default: False. - store_metadata (bool): Whether to store metadata in the table. - Default: True. - Raises: - :class:`DuplicateTableError `: if table already exists and overwrite flag is not set. - :class:`UndefinedObjectError `: if the data type of the id column is not a postgreSQL data type. - """ - async with self._pool.connect() as conn: - await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector")) - await conn.commit() - - if overwrite_existing: - async with self._pool.connect() as conn: - await conn.execute( - text(f'DROP TABLE IF EXISTS "{schema_name}"."{table_name}"') - ) - await conn.commit() - - id_data_type = "UUID" if isinstance(id_column, str) else id_column.data_type - id_column_name = id_column if isinstance(id_column, str) else id_column.name - - query = f"""CREATE TABLE "{schema_name}"."{table_name}"( - "{id_column_name}" {id_data_type} PRIMARY KEY, - "{content_column}" TEXT NOT NULL, - "{embedding_column}" vector({vector_size}) NOT NULL""" - for column in metadata_columns: - nullable = "NOT NULL" if not column.nullable else "" - query += f',\n"{column.name}" {column.data_type} {nullable}' - if store_metadata: - query += f""",\n"{metadata_json_column}" JSON""" - query += "\n);" - - async with self._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() - - async def ainit_vectorstore_table( - self, - table_name: str, - vector_size: int, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[Column] = [], - metadata_json_column: str = "langchain_metadata", - id_column: Union[str, Column] = "langchain_id", - overwrite_existing: bool = False, - store_metadata: bool = True, - ) -> None: - """ - Create a table for saving of vectors to be used with PostgresVectorStore. - - Args: - table_name (str): The Postgres database table name. - vector_size (int): Vector size for the embedding model to be used. - schema_name (str): The schema name to store Postgres database table. - Default: "public". - content_column (str): Name of the column to store document content. - Default: "page_content". - embedding_column (str) : Name of the column to store vector embeddings. - Default: "embedding". - metadata_columns (list[Column]): A list of Columns to create for custom - metadata. Default: []. Optional. - metadata_json_column (str): The column to store extra metadata in JSON format. - Default: "langchain_metadata". Optional. - id_column (Union[str, Column]) : Column to store ids. - Default: "langchain_id" column name with data type UUID. Optional. - overwrite_existing (bool): Whether to drop existing table. Default: False. - store_metadata (bool): Whether to store metadata in the table. - Default: True. - """ - await self._run_as_async( - self._ainit_vectorstore_table( - table_name, - vector_size, - schema_name, - content_column, - embedding_column, - metadata_columns, - metadata_json_column, - id_column, - overwrite_existing, - store_metadata, - ) - ) - - def init_vectorstore_table( - self, - table_name: str, - vector_size: int, - schema_name: str = "public", - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[Column] = [], - metadata_json_column: str = "langchain_metadata", - id_column: Union[str, Column] = "langchain_id", - overwrite_existing: bool = False, - store_metadata: bool = True, - ) -> None: - """ - Create a table for saving of vectors to be used with PostgresVectorStore. - - Args: - table_name (str): The Postgres database table name. - vector_size (int): Vector size for the embedding model to be used. - schema_name (str): The schema name to store Postgres database table. - Default: "public". - content_column (str): Name of the column to store document content. - Default: "page_content". - embedding_column (str) : Name of the column to store vector embeddings. - Default: "embedding". - metadata_columns (list[Column]): A list of Columns to create for custom - metadata. Default: []. Optional. - metadata_json_column (str): The column to store extra metadata in JSON format. - Default: "langchain_metadata". Optional. - id_column (Union[str, Column]) : Column to store ids. - Default: "langchain_id" column name with data type UUID. Optional. - overwrite_existing (bool): Whether to drop existing table. Default: False. - store_metadata (bool): Whether to store metadata in the table. - Default: True. - Raises: - :class:`UndefinedObjectError `: if the `ids` data type does not match that of the `id_column`. - """ - self._run_as_sync( - self._ainit_vectorstore_table( - table_name, - vector_size, - schema_name, - content_column, - embedding_column, - metadata_columns, - metadata_json_column, - id_column, - overwrite_existing, - store_metadata, - ) - ) + return cls(PGEngine._PGEngine__create_key, engine, cls._default_loop, cls._default_thread) # type: ignore async def _ainit_chat_history_table( self, table_name: str, schema_name: str = "public" diff --git a/src/langchain_google_cloud_sql_pg/indexes.py b/src/langchain_google_cloud_sql_pg/indexes.py index 18d7a740..7f5dd187 100644 --- a/src/langchain_google_cloud_sql_pg/indexes.py +++ b/src/langchain_google_cloud_sql_pg/indexes.py @@ -12,94 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import enum -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Optional - - -@dataclass -class StrategyMixin: - operator: str - search_function: str - index_function: str - - -class DistanceStrategy(StrategyMixin, enum.Enum): - """Enumerator of the Distance strategies.""" - - EUCLIDEAN = "<->", "l2_distance", "vector_l2_ops" - COSINE_DISTANCE = "<=>", "cosine_distance", "vector_cosine_ops" - INNER_PRODUCT = "<#>", "inner_product", "vector_ip_ops" - - -DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE_DISTANCE -DEFAULT_INDEX_NAME_SUFFIX: str = "langchainvectorindex" - - -@dataclass -class BaseIndex(ABC): - name: Optional[str] = None - index_type: str = "base" - distance_strategy: DistanceStrategy = field( - default_factory=lambda: DistanceStrategy.COSINE_DISTANCE - ) - partial_indexes: Optional[list[str]] = None - - @abstractmethod - def index_options(self) -> str: - """Set index query options for vector store initialization.""" - raise NotImplementedError( - "index_options method must be implemented by subclass" - ) - - -@dataclass -class ExactNearestNeighbor(BaseIndex): - index_type: str = "exactnearestneighbor" - - -@dataclass -class HNSWIndex(BaseIndex): - index_type: str = "hnsw" - m: int = 16 - ef_construction: int = 64 - - def index_options(self) -> str: - """Set index query options for vector store initialization.""" - return f"(m = {self.m}, ef_construction = {self.ef_construction})" - - -@dataclass -class QueryOptions(ABC): - def to_string(self) -> str: - """Convert index attributes to string.""" - raise NotImplementedError("to_string method must be implemented by subclass") - - -@dataclass -class HNSWQueryOptions(QueryOptions): - ef_search: int = 40 - - def to_string(self): - """Convert index attributes to string.""" - return f"hnsw.ef_search = {self.ef_search}" - - -@dataclass -class IVFFlatIndex(BaseIndex): - index_type: str = "ivfflat" - lists: int = 100 - - def index_options(self) -> str: - """Set index query options for vector store initialization.""" - return f"(lists = {self.lists})" - - -@dataclass -class IVFFlatQueryOptions(QueryOptions): - probes: int = 1 - - def to_string(self): - """Convert index attributes to string.""" - return f"ivflfat.probes = {self.probes}" +from langchain_postgres.v2.indexes import ( + DEFAULT_DISTANCE_STRATEGY, + DEFAULT_INDEX_NAME_SUFFIX, + BaseIndex, + DistanceStrategy, + ExactNearestNeighbor, + HNSWIndex, + HNSWQueryOptions, + IVFFlatIndex, + IVFFlatQueryOptions, + QueryOptions, + StrategyMixin, +) diff --git a/src/langchain_google_cloud_sql_pg/vectorstore.py b/src/langchain_google_cloud_sql_pg/vectorstore.py index f5333fd6..75598b85 100644 --- a/src/langchain_google_cloud_sql_pg/vectorstore.py +++ b/src/langchain_google_cloud_sql_pg/vectorstore.py @@ -15,12 +15,12 @@ # TODO: Remove below import when minimum supported Python version is 3.10 from __future__ import annotations -from typing import Any, Callable, Iterable, Optional, Sequence +from typing import Optional -import numpy as np -from langchain_core.documents import Document from langchain_core.embeddings import Embeddings -from langchain_core.vectorstores import VectorStore +from langchain_postgres import PGVectorStore + +from langchain_google_cloud_sql_pg import HybridSearchConfig from .async_vectorstore import AsyncPostgresVectorStore from .engine import PostgresEngine @@ -32,41 +32,22 @@ ) -class PostgresVectorStore(VectorStore): +class PostgresVectorStore(PGVectorStore): """Google Cloud SQL for PostgreSQL Vector Store class""" - __create_key = object() - - def __init__( - self, key: object, engine: PostgresEngine, vs: AsyncPostgresVectorStore - ): - """PostgresVectorStore constructor. - Args: - key (object): Prevent direct constructor usage. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - vs (AsyncPostgresVectorstore): The async only VectorStore implementation - - Raises: - Exception: If called directly by user. - """ - if key != PostgresVectorStore.__create_key: - raise Exception( - "Only create class through 'create' or 'create_sync' methods!" - ) - - self._engine = engine - self.__vs = vs + _engine: PostgresEngine + __vs: AsyncPostgresVectorStore @classmethod async def create( cls, - engine: PostgresEngine, + engine: PostgresEngine, # type: ignore embedding_service: Embeddings, table_name: str, schema_name: str = "public", content_column: str = "content", embedding_column: str = "embedding", - metadata_columns: list[str] = [], + metadata_columns: Optional[list[str]] = None, ignore_metadata_columns: Optional[list[str]] = None, id_column: str = "langchain_id", metadata_json_column: Optional[str] = "langchain_metadata", @@ -75,6 +56,7 @@ async def create( fetch_k: int = 20, lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, + hybrid_search_config: Optional[HybridSearchConfig] = None, ) -> PostgresVectorStore: """Create a new PostgresVectorStore instance. @@ -94,6 +76,7 @@ async def create( fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. index_query_options (QueryOptions): Index query option. + hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None. Returns: PostgresVectorStore @@ -102,32 +85,33 @@ async def create( engine, embedding_service, table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, + schema_name=schema_name, + content_column=content_column, + embedding_column=embedding_column, + metadata_columns=metadata_columns, + ignore_metadata_columns=ignore_metadata_columns, + metadata_json_column=metadata_json_column, + id_column=id_column, + distance_strategy=distance_strategy, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + index_query_options=index_query_options, + hybrid_search_config=hybrid_search_config, ) vs = await engine._run_as_async(coro) - return cls(cls.__create_key, engine, vs) + return cls(cls._PGVectorStore__create_key, engine, vs) # type: ignore @classmethod def create_sync( cls, - engine: PostgresEngine, + engine: PostgresEngine, # type: ignore embedding_service: Embeddings, table_name: str, schema_name: str = "public", content_column: str = "content", embedding_column: str = "embedding", - metadata_columns: list[str] = [], + metadata_columns: Optional[list[str]] = None, ignore_metadata_columns: Optional[list[str]] = None, id_column: str = "langchain_id", metadata_json_column: str = "langchain_metadata", @@ -136,6 +120,7 @@ def create_sync( fetch_k: int = 20, lambda_mult: float = 0.5, index_query_options: Optional[QueryOptions] = None, + hybrid_search_config: Optional[HybridSearchConfig] = None, ) -> PostgresVectorStore: """Create a new PostgresVectorStore instance. @@ -155,6 +140,7 @@ def create_sync( fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. index_query_options (QueryOptions): Index query option. + hybrid_search_config (HybridSearchConfig): Hybrid search configuration. Defaults to None. Returns: PostgresVectorStore @@ -163,661 +149,19 @@ def create_sync( engine, embedding_service, table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, + schema_name=schema_name, + content_column=content_column, + embedding_column=embedding_column, + metadata_columns=metadata_columns, + ignore_metadata_columns=ignore_metadata_columns, + metadata_json_column=metadata_json_column, + id_column=id_column, + distance_strategy=distance_strategy, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + index_query_options=index_query_options, + hybrid_search_config=hybrid_search_config, ) vs = engine._run_as_sync(coro) - return cls(cls.__create_key, engine, vs) - - @property - def embeddings(self) -> Embeddings: - return self.__vs.embedding_service - - async def aadd_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed texts and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return await self._engine._run_as_async( - self.__vs.aadd_texts(texts, metadatas, ids, **kwargs) - ) - - def add_texts( - self, - texts: Iterable[str], - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed texts and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return self._engine._run_as_sync( - self.__vs.aadd_texts(texts, metadatas, ids, **kwargs) - ) - - async def aadd_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed documents and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return await self._engine._run_as_async( - self.__vs.aadd_documents(documents, ids, **kwargs) - ) - - def add_documents( - self, - documents: list[Document], - ids: Optional[list] = None, - **kwargs: Any, - ) -> list[str]: - """Embed documents and add to the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return self._engine._run_as_sync( - self.__vs.aadd_documents(documents, ids, **kwargs) - ) - - async def adelete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - """Delete records from the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return await self._engine._run_as_async(self.__vs.adelete(ids, **kwargs)) - - def delete( - self, - ids: Optional[list] = None, - **kwargs: Any, - ) -> Optional[bool]: - """Delete records from the table. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - """ - return self._engine._run_as_sync(self.__vs.adelete(ids, **kwargs)) - - @classmethod - async def afrom_texts( # type: ignore[override] - cls: type[PostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from texts. - - Args: - texts (list[str]): Texts to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - await vs.aadd_texts(texts, metadatas=metadatas, ids=ids) - return vs - - @classmethod - async def afrom_documents( # type: ignore[override] - cls: type[PostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from documents. - - Args: - documents (list[Document]): Documents to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = await cls.create( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - await vs.aadd_documents(documents, ids=ids) - return vs - - @classmethod - def from_texts( # type: ignore[override] - cls: type[PostgresVectorStore], - texts: list[str], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - metadatas: Optional[list[dict]] = None, - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from texts. - - Args: - texts (list[str]): Texts to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = cls.create_sync( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - vs.add_texts(texts, metadatas=metadatas, ids=ids) - return vs - - @classmethod - def from_documents( # type: ignore[override] - cls: type[PostgresVectorStore], - documents: list[Document], - embedding: Embeddings, - engine: PostgresEngine, - table_name: str, - schema_name: str = "public", - ids: Optional[list] = None, - content_column: str = "content", - embedding_column: str = "embedding", - metadata_columns: list[str] = [], - ignore_metadata_columns: Optional[list[str]] = None, - id_column: str = "langchain_id", - metadata_json_column: str = "langchain_metadata", - distance_strategy: DistanceStrategy = DEFAULT_DISTANCE_STRATEGY, - k: int = 4, - fetch_k: int = 20, - lambda_mult: float = 0.5, - index_query_options: Optional[QueryOptions] = None, - ) -> PostgresVectorStore: - """Create an PostgresVectorStore instance from documents. - - Args: - documents (list[Document]): Documents to add to the vector store. - embedding (Embeddings): Text embedding model to use. - engine (PostgresEngine): Connection pool engine for managing connections to Postgres database. - table_name (str): Name of the existing table or the table to be created. - schema_name (str, optional): Database schema name of the table. Defaults to "public". - metadatas (Optional[list[dict]]): List of metadatas to add to table records. - ids: (Optional[list]): List of IDs to add to table records. - content_column (str): Column that represent a Document’s page_content. Defaults to "content". - embedding_column (str): Column for embedding vectors. The embedding is generated from the document value. Defaults to "embedding". - metadata_columns (list[str]): Column(s) that represent a document's metadata. - ignore_metadata_columns (list[str]): Column(s) to ignore in pre-existing tables for a document's metadata. Can not be used with metadata_columns. Defaults to None. - id_column (str): Column that represents the Document's id. Defaults to "langchain_id". - metadata_json_column (str): Column to store metadata as JSON. Defaults to "langchain_metadata". - distance_strategy (DistanceStrategy): Distance strategy to use for vector similarity search. Defaults to COSINE_DISTANCE. - k (int): Number of Documents to return from search. Defaults to 4. - fetch_k (int): Number of Documents to fetch to pass to MMR algorithm. - lambda_mult (float): Number between 0 and 1 that determines the degree of diversity among the results with 0 corresponding to maximum diversity and 1 to minimum diversity. Defaults to 0.5. - index_query_options (QueryOptions): Index query option. - - Raises: - :class:`InvalidTextRepresentationError `: if the `ids` data type does not match that of the `id_column`. - - Returns: - PostgresVectorStore - """ - vs = cls.create_sync( - engine, - embedding, - table_name, - schema_name, - content_column, - embedding_column, - metadata_columns, - ignore_metadata_columns, - id_column, - metadata_json_column, - distance_strategy, - k, - fetch_k, - lambda_mult, - index_query_options, - ) - vs.add_documents(documents, ids=ids) - return vs - - async def asimilarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by similarity search on query.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search(query, k, filter, **kwargs) - ) - - def similarity_search( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by similarity search on query.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search(query, k, filter, **kwargs) - ) - - # Required for (a)similarity_search_with_relevance_scores - def _select_relevance_score_fn(self) -> Callable[[float], float]: - """Select a relevance function based on distance strategy.""" - # Calculate distance strategy provided in vectorstore constructor - if self.__vs.distance_strategy == DistanceStrategy.COSINE_DISTANCE: - return self._cosine_relevance_score_fn - if self.__vs.distance_strategy == DistanceStrategy.INNER_PRODUCT: - return self._max_inner_product_relevance_score_fn - elif self.__vs.distance_strategy == DistanceStrategy.EUCLIDEAN: - return self._euclidean_relevance_score_fn - - async def asimilarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on query.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs) - ) - - def similarity_search_with_score( - self, - query: str, - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on query.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search_with_score(query, k, filter, **kwargs) - ) - - async def asimilarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by vector similarity search.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs) - ) - - def similarity_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected by vector similarity search.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search_by_vector(embedding, k, filter, **kwargs) - ) - - async def asimilarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by vector similarity search.""" - return await self._engine._run_as_async( - self.__vs.asimilarity_search_with_score_by_vector( - embedding, k, filter, **kwargs - ) - ) - - def similarity_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected by similarity search on vector.""" - return self._engine._run_as_sync( - self.__vs.asimilarity_search_with_score_by_vector( - embedding, k, filter, **kwargs - ) - ) - - async def amax_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return await self._engine._run_as_async( - self.__vs.amax_marginal_relevance_search( - query, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - def max_marginal_relevance_search( - self, - query: str, - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return self._engine._run_as_sync( - self.__vs.amax_marginal_relevance_search( - query, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - async def amax_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return await self._engine._run_as_async( - self.__vs.amax_marginal_relevance_search_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - def max_marginal_relevance_search_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[Document]: - """Return docs selected using the maximal marginal relevance.""" - return self._engine._run_as_sync( - self.__vs.amax_marginal_relevance_search_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - async def amax_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected using the maximal marginal relevance.""" - return await self._engine._run_as_async( - self.__vs.amax_marginal_relevance_search_with_score_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - def max_marginal_relevance_search_with_score_by_vector( - self, - embedding: list[float], - k: Optional[int] = None, - fetch_k: Optional[int] = None, - lambda_mult: Optional[float] = None, - filter: Optional[dict] | Optional[str] = None, - **kwargs: Any, - ) -> list[tuple[Document, float]]: - """Return docs and distance scores selected using the maximal marginal relevance.""" - return self._engine._run_as_sync( - self.__vs.amax_marginal_relevance_search_with_score_by_vector( - embedding, k, fetch_k, lambda_mult, filter, **kwargs - ) - ) - - async def aapply_vector_index( - self, - index: BaseIndex, - name: Optional[str] = None, - concurrently: bool = False, - ) -> None: - """Create an index on the vector store table.""" - return await self._engine._run_as_async( - self.__vs.aapply_vector_index(index, name, concurrently) - ) - - def apply_vector_index( - self, - index: BaseIndex, - name: Optional[str] = None, - concurrently: bool = False, - ) -> None: - """Create an index on the vector store table.""" - return self._engine._run_as_sync( - self.__vs.aapply_vector_index(index, name, concurrently) - ) - - async def areindex(self, index_name: Optional[str] = None) -> None: - """Re-index the vector store table.""" - return await self._engine._run_as_async(self.__vs.areindex(index_name)) - - def reindex(self, index_name: Optional[str] = None) -> None: - """Re-index the vector store table.""" - return self._engine._run_as_sync(self.__vs.areindex(index_name)) - - async def adrop_vector_index( - self, - index_name: Optional[str] = None, - ) -> None: - """Drop the vector index.""" - return await self._engine._run_as_async( - self.__vs.adrop_vector_index(index_name) - ) - - def drop_vector_index( - self, - index_name: Optional[str] = None, - ) -> None: - """Drop the vector index.""" - return self._engine._run_as_sync(self.__vs.adrop_vector_index(index_name)) - - async def ais_valid_index( - self, - index_name: Optional[str] = None, - ) -> bool: - """Check if index exists in the table.""" - return await self._engine._run_as_async(self.__vs.is_valid_index(index_name)) - - def is_valid_index( - self, - index_name: Optional[str] = None, - ) -> bool: - """Check if index exists in the table.""" - return self._engine._run_as_sync(self.__vs.is_valid_index(index_name)) - - async def aget_by_ids(self, ids: Sequence[str]) -> list[Document]: - """Get documents by ids.""" - return await self._engine._run_as_async(self.__vs.aget_by_ids(ids=ids)) - - def get_by_ids(self, ids: Sequence[str]) -> list[Document]: - """Get documents by ids.""" - return self._engine._run_as_sync(self.__vs.aget_by_ids(ids=ids)) + return cls(cls._PGVectorStore__create_key, engine, vs) # type: ignore diff --git a/src/langchain_google_cloud_sql_pg/version.py b/src/langchain_google_cloud_sql_pg/version.py index f735a04c..a9b14a39 100644 --- a/src/langchain_google_cloud_sql_pg/version.py +++ b/src/langchain_google_cloud_sql_pg/version.py @@ -13,4 +13,4 @@ # See the License for the specific language governing permissions and # limitations under the License. # -__version__ = "0.14.1" +__version__ = "0.15.0" diff --git a/tests/test_async_chatmessagehistory.py b/tests/test_async_chatmessagehistory.py index e5443b11..585661a1 100644 --- a/tests/test_async_chatmessagehistory.py +++ b/tests/test_async_chatmessagehistory.py @@ -11,8 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -33,10 +35,23 @@ table_name_async = "message_store" + str(uuid.uuid4()) +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) @pytest_asyncio.fixture @@ -47,7 +62,10 @@ async def async_engine(): instance=instance_id, database=db_name, ) - await async_engine._ainit_chat_history_table(table_name=table_name_async) + await run_on_background( + async_engine, + async_engine._ainit_chat_history_table(table_name=table_name_async), + ) yield async_engine # use default table for AsyncPostgresChatMessageHistory query = f'DROP TABLE IF EXISTS "{table_name_async}"' @@ -59,14 +77,19 @@ async def async_engine(): async def test_chat_message_history_async( async_engine: PostgresEngine, ) -> None: - history = await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name_async + history = await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name_async + ), ) msg1 = HumanMessage(content="hi!") msg2 = AIMessage(content="whats up?") - await history.aadd_message(msg1) - await history.aadd_message(msg2) - messages = await history._aget_messages() + + await run_on_background(async_engine, history.aadd_message(msg1)) + await run_on_background(async_engine, history.aadd_message(msg2)) + + messages = await run_on_background(async_engine, history._aget_messages()) # verify messages are correct assert messages[0].content == "hi!" @@ -75,48 +98,71 @@ async def test_chat_message_history_async( assert type(messages[1]) is AIMessage # verify clear() clears message history - await history.aclear() - assert len(await history._aget_messages()) == 0 + await run_on_background(async_engine, history.aclear()) + messages_after_clear = await run_on_background( + async_engine, history._aget_messages() + ) + assert len(messages_after_clear) == 0 @pytest.mark.asyncio async def test_chat_message_history_sync_messages( async_engine: PostgresEngine, ) -> None: - history1 = await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name_async + history1 = await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name_async + ), ) - history2 = await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name_async + history2 = await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name_async + ), ) msg1 = HumanMessage(content="hi!") msg2 = AIMessage(content="whats up?") - await history1.aadd_message(msg1) - await history2.aadd_message(msg2) + await run_on_background(async_engine, history1.aadd_message(msg1)) + await run_on_background(async_engine, history2.aadd_message(msg2)) + + len_history1 = len(await run_on_background(async_engine, history1._aget_messages())) + len_history2 = len(await run_on_background(async_engine, history2._aget_messages())) - assert len(await history1._aget_messages()) == 2 - assert len(await history2._aget_messages()) == 2 + assert len_history1 == 2 + assert len_history2 == 2 # verify clear() clears message history - await history2.aclear() - assert len(await history2._aget_messages()) == 0 + await run_on_background(async_engine, history2.aclear()) + len_history2_after_clear = len( + await run_on_background(async_engine, history2._aget_messages()) + ) + assert len_history2_after_clear == 0 @pytest.mark.asyncio async def test_chat_table_async(async_engine): with pytest.raises(ValueError): - await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name="doesnotexist" + await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name="doesnotexist" + ), ) @pytest.mark.asyncio async def test_chat_schema_async(async_engine): table_name = "test_table" + str(uuid.uuid4()) - await async_engine._ainit_document_table(table_name=table_name) + await run_on_background( + async_engine, async_engine._ainit_document_table(table_name=table_name) + ) with pytest.raises(IndexError): - await AsyncPostgresChatMessageHistory.create( - engine=async_engine, session_id="test", table_name=table_name + await run_on_background( + async_engine, + AsyncPostgresChatMessageHistory.create( + engine=async_engine, session_id="test", table_name=table_name + ), ) query = f'DROP TABLE IF EXISTS "{table_name}"' diff --git a/tests/test_async_checkpoint.py b/tests/test_async_checkpoint.py index f3d8b5ed..00d26b29 100644 --- a/tests/test_async_checkpoint.py +++ b/tests/test_async_checkpoint.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import re import uuid -from typing import Any, List, Literal, Optional, Sequence, Tuple, Union +from typing import Any, Coroutine, List, Literal, Optional, Sequence, Tuple, Union import pytest import pytest_asyncio @@ -39,7 +40,7 @@ empty_checkpoint, ) from langgraph.checkpoint.serde.jsonplus import JsonPlusSerializer -from langgraph.prebuilt import ( # type: ignore[import-not-found] +from langgraph.prebuilt import ( # type: ignore ToolNode, ValidationNode, create_react_agent, @@ -78,6 +79,7 @@ "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } @@ -106,18 +108,33 @@ def _AnyIdToolMessage(**kwargs: Any) -> ToolMessage: return message +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async with engine._pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + async def _impl(): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + return result_map.fetchall() + + return await run_on_background(engine, _impl()) @pytest_asyncio.fixture @@ -138,10 +155,15 @@ async def async_engine(): @pytest_asyncio.fixture async def checkpointer(async_engine): - await async_engine._ainit_checkpoint_table(table_name=table_name) - checkpointer = await AsyncPostgresSaver.create( + await run_on_background( + async_engine, async_engine._ainit_checkpoint_table(table_name=table_name) + ) + checkpointer = await run_on_background( async_engine, - table_name, # serde=JsonPlusSerializer + AsyncPostgresSaver.create( + async_engine, + table_name, # serde=JsonPlusSerializer + ), ) yield checkpointer @@ -159,7 +181,9 @@ async def test_checkpoint_async( } } # Verify if updated configuration after storing the checkpoint is correct - next_config = await checkpointer.aput(write_config, checkpoint, {}, {}) + next_config = await run_on_background( + async_engine, checkpointer.aput(write_config, checkpoint, {}, {}) + ) assert dict(next_config) == test_config # Verify if the checkpoint is stored correctly in the database @@ -212,6 +236,7 @@ def test_data() -> dict[str, Any]: "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } chkpnt_1: Checkpoint = empty_checkpoint() chkpnt_2: Checkpoint = create_checkpoint(chkpnt_1, {}, 1) @@ -256,7 +281,9 @@ async def test_checkpoint_aput_writes( ("test_channel1", {}), ("test_channel2", {}), ] - await checkpointer.aput_writes(config, writes, task_id="1") + await run_on_background( + async_engine, checkpointer.aput_writes(config, writes, task_id="1") + ) results = await afetch(async_engine, f'SELECT * FROM "{table_name_writes}"') assert len(results) == 2 @@ -275,9 +302,19 @@ async def test_checkpoint_alist( checkpoints = test_data["checkpoints"] metadata = test_data["metadata"] - await checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) - await checkpointer.aput(configs[2], checkpoints[2], metadata[1], {}) - await checkpointer.aput(configs[3], checkpoints[3], metadata[2], {}) + await run_on_background( + async_engine, checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) + ) + await run_on_background( + async_engine, checkpointer.aput(configs[2], checkpoints[2], metadata[1], {}) + ) + await run_on_background( + async_engine, checkpointer.aput(configs[3], checkpoints[3], metadata[2], {}) + ) + + # Helper to consume async iterator on background thread + async def consume_alist(config, filter): + return [c async for c in checkpointer.alist(config, filter=filter)] # call method / assertions query_1 = {"source": "input"} # search by 1 key @@ -288,26 +325,35 @@ async def test_checkpoint_alist( query_3: dict[str, Any] = {} # search by no keys, return all checkpoints query_4 = {"source": "update", "step": 1} # no match - search_results_1 = [c async for c in checkpointer.alist(None, filter=query_1)] + search_results_1 = await run_on_background( + async_engine, consume_alist(None, filter=query_1) + ) assert len(search_results_1) == 1 print(metadata[0]) print(search_results_1[0].metadata) assert search_results_1[0].metadata == metadata[0] - search_results_2 = [c async for c in checkpointer.alist(None, filter=query_2)] + search_results_2 = await run_on_background( + async_engine, consume_alist(None, filter=query_2) + ) assert len(search_results_2) == 1 assert search_results_2[0].metadata == metadata[1] - search_results_3 = [c async for c in checkpointer.alist(None, filter=query_3)] + search_results_3 = await run_on_background( + async_engine, consume_alist(None, filter=query_3) + ) assert len(search_results_3) == 3 - search_results_4 = [c async for c in checkpointer.alist(None, filter=query_4)] + search_results_4 = await run_on_background( + async_engine, consume_alist(None, filter=query_4) + ) assert len(search_results_4) == 0 # search by config (defaults to checkpoints across all namespaces) - search_results_5 = [ - c async for c in checkpointer.alist({"configurable": {"thread_id": "thread-2"}}) - ] + search_results_5 = await run_on_background( + async_engine, + consume_alist({"configurable": {"thread_id": "thread-2"}}, filter=None), + ) assert len(search_results_5) == 2 assert { search_results_5[0].config["configurable"]["checkpoint_ns"], @@ -351,6 +397,7 @@ def _llm_type(self) -> str: @pytest.mark.asyncio async def test_checkpoint_with_agent( + async_engine: PostgresEngine, checkpointer: AsyncPostgresSaver, ) -> None: # from the tests in https://github.com/langchain-ai/langgraph/blob/909190cede6a80bb94a2d4cfe7dedc49ef0d4127/libs/langgraph/tests/test_prebuilt.py @@ -358,8 +405,9 @@ async def test_checkpoint_with_agent( agent = create_react_agent(model, [], checkpointer=checkpointer) inputs = [HumanMessage("hi?")] - response = await agent.ainvoke( - {"messages": inputs}, config=thread_agent_config, debug=True + response = await run_on_background( + async_engine, + agent.ainvoke({"messages": inputs}, config=thread_agent_config, debug=True), ) expected_response = {"messages": inputs + [AIMessage(content="hi?", id="0")]} assert response == expected_response @@ -370,7 +418,9 @@ def _AnyIdHumanMessage(**kwargs: Any) -> HumanMessage: message.id = AnyStr() return message - saved = await checkpointer.aget_tuple(thread_agent_config) + saved = await run_on_background( + async_engine, checkpointer.aget_tuple(thread_agent_config) + ) assert saved is not None assert ( _AnyIdHumanMessage(content="hi?") @@ -390,6 +440,7 @@ def _AnyIdHumanMessage(**kwargs: Any) -> HumanMessage: @pytest.mark.asyncio async def test_checkpoint_aget_tuple( + async_engine: PostgresEngine, checkpointer: AsyncPostgresSaver, test_data: dict[str, Any], ) -> None: @@ -397,30 +448,48 @@ async def test_checkpoint_aget_tuple( checkpoints = test_data["checkpoints"] metadata = test_data["metadata"] - new_config = await checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) + new_config = await run_on_background( + async_engine, checkpointer.aput(configs[1], checkpoints[1], metadata[0], {}) + ) # Matching checkpoint - search_results_1 = await checkpointer.aget_tuple(new_config) + search_results_1 = await run_on_background( + async_engine, checkpointer.aget_tuple(new_config) + ) assert search_results_1.metadata == metadata[0] # type: ignore # No matching checkpoint - assert await checkpointer.aget_tuple(configs[0]) is None + assert ( + await run_on_background(async_engine, checkpointer.aget_tuple(configs[0])) + is None + ) @pytest.mark.asyncio async def test_metadata( + async_engine: PostgresEngine, checkpointer: AsyncPostgresSaver, test_data: dict[str, Any], ) -> None: - config = await checkpointer.aput( - test_data["configs"][0], - test_data["checkpoints"][0], - {"my_key": "abc"}, # type: ignore - {}, + # Wrap aput + config = await run_on_background( + async_engine, + checkpointer.aput( + test_data["configs"][0], + test_data["checkpoints"][0], + {"my_key": "abc"}, # type: ignore + {}, + ), + ) + tuple_result = await run_on_background( + async_engine, checkpointer.aget_tuple(config) + ) + assert tuple_result.metadata["my_key"] == "abc" # type: ignore + + async def consume_alist(config, filter): + return [c async for c in checkpointer.alist(config, filter=filter)] + + alist_results = await run_on_background( + async_engine, consume_alist(None, filter={"my_key": "abc"}) ) - assert (await checkpointer.aget_tuple(config)).metadata["my_key"] == "abc" # type: ignore - assert [c async for c in checkpointer.alist(None, filter={"my_key": "abc"})][ - 0 - ].metadata[ - "my_key" # type: ignore - ] == "abc" # type: ignore + assert alist_results[0].metadata["my_key"] == "abc" # type: ignore diff --git a/tests/test_async_loader.py b/tests/test_async_loader.py index c29a82f7..61316519 100644 --- a/tests/test_async_loader.py +++ b/tests/test_async_loader.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import json import os import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -34,10 +36,23 @@ table_name = "test-table" + str(uuid.uuid4()) +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _action(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _action()) @pytest.mark.asyncio(scope="class") @@ -45,7 +60,6 @@ class TestLoaderAsync: @pytest_asyncio.fixture(scope="class") async def engine(self): - PostgresEngine._connector = None engine = await PostgresEngine.afrom_instance( project_id=project_id, instance=instance_id, @@ -56,37 +70,50 @@ async def engine(self): await engine.close() - async def _collect_async_items(self, docs_generator): - """Collects items from an async generator.""" - docs = [] - async for doc in docs_generator: - docs.append(doc) - return docs + async def _collect_async_items(self, engine, docs_generator): + """Collects items from an async generator, running on background loop.""" + + async def _consume(): + docs = [] + async for doc in docs_generator: + docs.append(doc) + return docs + + return await run_on_background(engine, _consume()) async def _cleanup_table(self, engine): await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') async def test_create_loader_with_invalid_parameters(self, engine): with pytest.raises(ValueError): - await AsyncPostgresLoader.create( - engine=engine, + await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + ), ) with pytest.raises(ValueError): def fake_formatter(): return None - await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - format="text", - formatter=fake_formatter, + await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + format="text", + formatter=fake_formatter, + ), ) with pytest.raises(ValueError): - await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - format="fake_format", + await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + format="fake_format", + ), ) async def test_load_from_query_default(self, engine): @@ -110,12 +137,15 @@ async def test_load_from_query_default(self, engine): """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -153,20 +183,23 @@ async def test_load_from_query_customized_content_customized_metadata(self, engi """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "fruit_name", - "variety", - "quantity_in_stock", - "price_per_unit", - "organic", - ], - metadata_columns=["fruit_id"], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "fruit_name", + "variety", + "quantity_in_stock", + "price_per_unit", + "organic", + ], + metadata_columns=["fruit_id"], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -205,19 +238,20 @@ async def test_load_from_query_customized_content_default_metadata(self, engine) """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + ), ) - documents = [] - async for docs in loader.alazy_load(): - documents.append(docs) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -230,18 +264,21 @@ async def test_load_from_query_customized_content_default_metadata(self, engine) ) ] - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], - format="JSON", + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + format="JSON", + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -280,13 +317,16 @@ async def test_load_from_query_default_content_customized_metadata(self, engine) """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - metadata_columns=["fruit_name", "organic"], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + metadata_columns=["fruit_name", "organic"], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -317,16 +357,19 @@ async def test_load_from_query_with_langchain_metadata(self, engine): VALUES ('Apple', 'Granny Smith', 150, 1, '{metadata}');""" await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - metadata_columns=[ - "fruit_name", - "langchain_metadata", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + metadata_columns=[ + "fruit_name", + "langchain_metadata", + ], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -362,15 +405,18 @@ async def test_load_from_query_with_json(self, engine): VALUES ('Apple', '{variety}', 150, 1, '{metadata}');""" await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - metadata_columns=[ - "variety", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + metadata_columns=[ + "variety", + ], + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -411,18 +457,21 @@ def my_formatter(row, content_columns): str(row[column]) for column in content_columns if column in row ) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], - formatter=my_formatter, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + formatter=my_formatter, + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -458,18 +507,21 @@ async def test_load_from_query_customized_content_default_metadata_custom_page_c """ await aexecute(engine, insert_query) - loader = await AsyncPostgresLoader.create( - engine=engine, - query=f'SELECT * FROM "{table_name}";', - content_columns=[ - "variety", - "quantity_in_stock", - "price_per_unit", - ], - format="YAML", + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + query=f'SELECT * FROM "{table_name}";', + content_columns=[ + "variety", + "quantity_in_stock", + "price_per_unit", + ], + format="YAML", + ), ) - documents = await self._collect_async_items(loader.alazy_load()) + documents = await self._collect_async_items(engine, loader.alazy_load()) assert documents == [ Document( @@ -487,7 +539,7 @@ async def test_load_from_query_customized_content_default_metadata_custom_page_c async def test_save_doc_with_default_metadata(self, engine): await self._cleanup_table(engine) - await engine._ainit_document_table(table_name) + await run_on_background(engine, engine._ainit_document_table(table_name)) test_docs = [ Document( page_content="Apple Granny Smith 150 0.99 1", @@ -502,16 +554,21 @@ async def test_save_doc_with_default_metadata(self, engine): metadata={"fruit_id": 3}, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), + ) + loader = await run_on_background( + engine, AsyncPostgresLoader.create(engine=engine, table_name=table_name) ) - loader = await AsyncPostgresLoader.create(engine=engine, table_name=table_name) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert docs == test_docs - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + + schema = await run_on_background(engine, engine._aload_table_schema(table_name)) + assert schema.columns.keys() == [ "page_content", "langchain_metadata", ] @@ -520,13 +577,16 @@ async def test_save_doc_with_default_metadata(self, engine): @pytest.mark.parametrize("store_metadata", [True, False]) async def test_save_doc_with_customized_metadata(self, engine, store_metadata): table_name = "test-table" + str(uuid.uuid4()) - await engine._ainit_document_table( - table_name, - metadata_columns=[ - Column("fruit_name", "VARCHAR"), - Column("organic", "BOOLEAN"), - ], - store_metadata=store_metadata, + await run_on_background( + engine, + engine._ainit_document_table( + table_name, + metadata_columns=[ + Column("fruit_name", "VARCHAR"), + Column("organic", "BOOLEAN"), + ], + store_metadata=store_metadata, + ), ) test_docs = [ Document( @@ -538,24 +598,30 @@ async def test_save_doc_with_customized_metadata(self, engine, store_metadata): }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), ) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - metadata_columns=[ - "fruit_name", - "organic", - ], + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + metadata_columns=[ + "fruit_name", + "organic", + ], + ), ) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) + + schema = await run_on_background(engine, engine._aload_table_schema(table_name)) if store_metadata: docs == test_docs - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + assert schema.columns.keys() == [ "page_content", "fruit_name", "organic", @@ -568,7 +634,7 @@ async def test_save_doc_with_customized_metadata(self, engine, store_metadata): metadata={"fruit_name": "Apple", "organic": True}, ), ] - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + assert schema.columns.keys() == [ "page_content", "fruit_name", "organic", @@ -577,7 +643,9 @@ async def test_save_doc_with_customized_metadata(self, engine, store_metadata): async def test_save_doc_without_metadata(self, engine): table_name = "test-table" + str(uuid.uuid4()) - await engine._ainit_document_table(table_name, store_metadata=False) + await run_on_background( + engine, engine._ainit_document_table(table_name, store_metadata=False) + ) test_docs = [ Document( page_content="Granny Smith 150 0.99", @@ -588,17 +656,21 @@ async def test_save_doc_without_metadata(self, engine): }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), ) - await saver.aadd_documents(test_docs) + await run_on_background(engine, saver.aadd_documents(test_docs)) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + ), ) - docs = await self._collect_async_items(loader.alazy_load()) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert docs == [ Document( @@ -606,14 +678,15 @@ async def test_save_doc_without_metadata(self, engine): metadata={}, ), ] - assert (await engine._aload_table_schema(table_name)).columns.keys() == [ + schema = await run_on_background(engine, engine._aload_table_schema(table_name)) + assert schema.columns.keys() == [ "page_content", ] await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') async def test_delete_doc_with_default_metadata(self, engine): table_name = "test-table" + str(uuid.uuid4()) - await engine._ainit_document_table(table_name) + await run_on_background(engine, engine._ainit_document_table(table_name)) test_docs = [ Document( @@ -625,37 +698,43 @@ async def test_delete_doc_with_default_metadata(self, engine): metadata={"fruit_id": 2}, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), + ) + loader = await run_on_background( + engine, AsyncPostgresLoader.create(engine=engine, table_name=table_name) ) - loader = await AsyncPostgresLoader.create(engine=engine, table_name=table_name) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert docs == test_docs - await saver.adelete(docs[:1]) - assert len(await self._collect_async_items(loader.alazy_load())) == 1 + await run_on_background(engine, saver.adelete(docs[:1])) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 1 - await saver.adelete(docs) - assert len(await self._collect_async_items(loader.alazy_load())) == 0 + await run_on_background(engine, saver.adelete(docs)) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 0 await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') async def test_delete_doc_with_query(self, engine): await self._cleanup_table(engine) - await engine._ainit_document_table( - table_name, - metadata_columns=[ - Column( - "fruit_name", - "VARCHAR", - ), - Column( - "organic", - "BOOLEAN", - ), - ], - store_metadata=True, + await run_on_background( + engine, + engine._ainit_document_table( + table_name, + metadata_columns=[ + Column( + "fruit_name", + "VARCHAR", + ), + Column( + "organic", + "BOOLEAN", + ), + ], + store_metadata=True, + ), ) test_docs = [ @@ -684,18 +763,21 @@ async def test_delete_doc_with_query(self, engine): }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, table_name=table_name + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create(engine=engine, table_name=table_name), ) query = f"SELECT * FROM \"{table_name}\" WHERE fruit_name='Apple';" - loader = await AsyncPostgresLoader.create(engine=engine, query=query) + loader = await run_on_background( + engine, AsyncPostgresLoader.create(engine=engine, query=query) + ) - await saver.aadd_documents(test_docs) - docs = await self._collect_async_items(loader.alazy_load()) + await run_on_background(engine, saver.aadd_documents(test_docs)) + docs = await self._collect_async_items(engine, loader.alazy_load()) assert len(docs) == 1 - await saver.adelete(docs) - assert len(await self._collect_async_items(loader.alazy_load())) == 0 + await run_on_background(engine, saver.adelete(docs)) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 0 await self._cleanup_table(engine) @pytest.mark.parametrize("metadata_json_column", [None, "metadata_col_test"]) @@ -704,14 +786,17 @@ async def test_delete_doc_with_customized_metadata( ): table_name = "test-table" + str(uuid.uuid4()) content_column = "content_col_test" - await engine._ainit_document_table( - table_name, - metadata_columns=[ - Column("fruit_name", "VARCHAR"), - Column("organic", "BOOLEAN"), - ], - content_column=content_column, - metadata_json_column=metadata_json_column, + await run_on_background( + engine, + engine._ainit_document_table( + table_name, + metadata_columns=[ + Column("fruit_name", "VARCHAR"), + Column("organic", "BOOLEAN"), + ], + content_column=content_column, + metadata_json_column=metadata_json_column, + ), ) test_docs = [ Document( @@ -731,27 +816,33 @@ async def test_delete_doc_with_customized_metadata( }, ), ] - saver = await AsyncPostgresDocumentSaver.create( - engine=engine, - table_name=table_name, - content_column=content_column, - metadata_json_column=metadata_json_column, + saver = await run_on_background( + engine, + AsyncPostgresDocumentSaver.create( + engine=engine, + table_name=table_name, + content_column=content_column, + metadata_json_column=metadata_json_column, + ), ) - loader = await AsyncPostgresLoader.create( - engine=engine, - table_name=table_name, - content_columns=[content_column], - metadata_json_column=metadata_json_column, + loader = await run_on_background( + engine, + AsyncPostgresLoader.create( + engine=engine, + table_name=table_name, + content_columns=[content_column], + metadata_json_column=metadata_json_column, + ), ) - await saver.aadd_documents(test_docs) + await run_on_background(engine, saver.aadd_documents(test_docs)) - docs = await loader.aload() + docs = await run_on_background(engine, loader.aload()) assert len(docs) == 2 - await saver.adelete(docs[:1]) - assert len(await self._collect_async_items(loader.alazy_load())) == 1 + await run_on_background(engine, saver.adelete(docs[:1])) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 1 - await saver.adelete(docs) - assert len(await self._collect_async_items(loader.alazy_load())) == 0 + await run_on_background(engine, saver.adelete(docs)) + assert len(await self._collect_async_items(engine, loader.alazy_load())) == 0 await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') diff --git a/tests/test_async_vectorstore.py b/tests/test_async_vectorstore.py index 12fb6506..6bcd58f5 100644 --- a/tests/test_async_vectorstore.py +++ b/tests/test_async_vectorstore.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid -from typing import Sequence +from typing import Any, Coroutine, Sequence import pytest import pytest_asyncio @@ -28,7 +29,7 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()) DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()) -CUSTOM_TABLE = "test-table-custom" + str(uuid.uuid4()) +CUSTOM_TABLE = "table-custom" + str(uuid.uuid4()) VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -50,18 +51,35 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + # Run on background loop + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async with engine._pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + async def _impl(): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + return result_map.fetchall() + + # Run on background loop + return await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="class") @@ -98,34 +116,50 @@ async def engine(self, db_project, db_region, db_instance, db_name): @pytest_asyncio.fixture(scope="class") async def vs(self, engine): - await engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) - vs = await AsyncPostgresVectorStore.create( + # Wrap private init method + await run_on_background( + engine, engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + ) + # Wrap creation of the async vectorstore + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + ), ) yield vs @pytest_asyncio.fixture(scope="class") async def vs_custom(self, engine): - await engine._ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - metadata_json_column="mymeta", + # Wrap private init method + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + metadata_json_column="mymeta", + ), ) - vs = await AsyncPostgresVectorStore.create( + + # Wrap creation of the async vectorstore + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], - metadata_json_column="mymeta", + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + metadata_json_column="mymeta", + ), ) yield vs @@ -144,32 +178,44 @@ async def test_init_with_constructor(self, engine): async def test_post_init(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="noname", - embedding_column="myembedding", - metadata_columns=["page", "source"], - metadata_json_column="mymeta", + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="noname", + embedding_column="myembedding", + metadata_columns=["page", "source"], + metadata_json_column="mymeta", + ), ) async def test_id_metadata_column(self, engine): table_name = "id_metadata" + str(uuid.uuid4()) - await engine._ainit_vectorstore_table( - table_name, - VECTOR_SIZE, - metadata_columns=[Column("id", "TEXT")], + await run_on_background( + engine, + engine._ainit_vectorstore_table( + table_name, + VECTOR_SIZE, + metadata_columns=[Column("id", "TEXT")], + ), ) - custom_vs = await AsyncPostgresVectorStore.create( + custom_vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=table_name, - metadata_columns=["id"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=table_name, + metadata_columns=["id"], + ), ) ids = [str(uuid.uuid4()) for i in range(len(texts))] - await custom_vs.aadd_texts(texts, id_column_as_metadata, ids) + # Wrap aadd_texts + await run_on_background( + engine, custom_vs.aadd_texts(texts, id_column_as_metadata, ids) + ) results = await afetch(engine, f'SELECT * FROM "{table_name}"') assert len(results) == 3 @@ -180,12 +226,14 @@ async def test_id_metadata_column(self, engine): async def test_aadd_texts(self, engine, vs): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, ids=ids) + # Wrap aadd_texts + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, metadatas, ids) + # Wrap aadd_texts + await run_on_background(engine, vs.aadd_texts(texts, metadatas, ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 6 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') @@ -193,42 +241,43 @@ async def test_aadd_texts(self, engine, vs): async def test_aadd_texts_edge_cases(self, engine, vs): texts = ["Taylor's", '"Swift"', "best-friend"] ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, ids=ids) + # Wrap aadd_texts + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') async def test_aadd_docs(self, engine, vs): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_documents(docs, ids=ids) + # Wrap aadd_documents + await run_on_background(engine, vs.aadd_documents(docs, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') async def test_aadd_docs_no_ids(self, engine, vs): - await vs.aadd_documents(docs) + # Wrap aadd_documents + await run_on_background(engine, vs.aadd_documents(docs)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 await aexecute(engine, f'TRUNCATE TABLE "{DEFAULT_TABLE}"') async def test_adelete(self, engine, vs): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs.aadd_texts(texts, ids=ids) + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 3 - # delete an ID - await vs.adelete([ids[0]]) + await run_on_background(engine, vs.adelete([ids[0]])) results = await afetch(engine, f'SELECT * FROM "{DEFAULT_TABLE}"') assert len(results) == 2 - # delete with no ids - result = await vs.adelete() + result = await run_on_background(engine, vs.adelete()) assert result == False ##### Custom Vector Store ##### async def test_aadd_texts_custom(self, engine, vs_custom): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs_custom.aadd_texts(texts, ids=ids) + await run_on_background(engine, vs_custom.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') assert len(results) == 3 assert results[0]["mycontent"] == "foo" @@ -237,7 +286,7 @@ async def test_aadd_texts_custom(self, engine, vs_custom): assert results[0]["source"] is None ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs_custom.aadd_texts(texts, metadatas, ids) + await run_on_background(engine, vs_custom.aadd_texts(texts, metadatas, ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') assert len(results) == 6 await aexecute(engine, f'TRUNCATE TABLE "{CUSTOM_TABLE}"') @@ -251,7 +300,7 @@ async def test_aadd_docs_custom(self, engine, vs_custom): ) for i in range(len(texts)) ] - await vs_custom.aadd_documents(docs, ids=ids) + await run_on_background(engine, vs_custom.aadd_documents(docs, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') assert len(results) == 3 @@ -263,13 +312,12 @@ async def test_aadd_docs_custom(self, engine, vs_custom): async def test_adelete_custom(self, engine, vs_custom): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await vs_custom.aadd_texts(texts, ids=ids) + await run_on_background(engine, vs_custom.aadd_texts(texts, ids=ids)) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') content = [result["mycontent"] for result in results] assert len(results) == 3 assert "foo" in content - # delete an ID - await vs_custom.adelete([ids[0]]) + await run_on_background(engine, vs_custom.adelete([ids[0]])) results = await afetch(engine, f'SELECT * FROM "{CUSTOM_TABLE}"') content = [result["mycontent"] for result in results] assert len(results) == 2 @@ -277,90 +325,111 @@ async def test_adelete_custom(self, engine, vs_custom): async def test_ignore_metadata_columns(self, engine): column_to_ignore = "source" - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - ignore_metadata_columns=[column_to_ignore], - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_json_column="mymeta", - ) - assert column_to_ignore not in vs.metadata_columns - - async def test_create_vectorstore_with_invalid_parameters_1(self, engine): - with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + AsyncPostgresVectorStore.create( engine, embedding_service=embeddings_service, table_name=CUSTOM_TABLE, + ignore_metadata_columns=[column_to_ignore], id_column="myid", content_column="mycontent", embedding_column="myembedding", - metadata_columns=["random_column"], # invalid metadata column + metadata_json_column="mymeta", + ), + ) + assert column_to_ignore not in vs.metadata_columns + + async def test_create_vectorstore_with_invalid_parameters_1(self, engine): + with pytest.raises(ValueError): + await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["random_column"], # invalid metadata column + ), ) async def test_create_vectorstore_with_invalid_parameters_2(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="langchain_id", # invalid content column type - embedding_column="myembedding", - metadata_columns=["random_column"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="langchain_id", # invalid content column type + embedding_column="myembedding", + metadata_columns=["random_column"], + ), ) async def test_create_vectorstore_with_invalid_parameters_3(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="random_column", # invalid embedding column - metadata_columns=["random_column"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="random_column", # invalid embedding column + metadata_columns=["random_column"], + ), ) async def test_create_vectorstore_with_invalid_parameters_4(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="langchain_id", # invalid embedding column data type - metadata_columns=["random_column"], + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="langchain_id", # invalid embedding column data type + metadata_columns=["random_column"], + ), ) async def test_create_vectorstore_with_invalid_parameters_5(self, engine): with pytest.raises(ValueError): - await AsyncPostgresVectorStore.create( + await run_on_background( engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="langchain_id", - metadata_columns=["random_column"], - ignore_metadata_columns=[ - "one", - "two", - ], # invalid use of metadata_columns and ignore columns + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="langchain_id", + metadata_columns=["random_column"], + ignore_metadata_columns=[ + "one", + "two", + ], # invalid use of metadata_columns and ignore columns + ), ) async def test_create_vectorstore_with_init(self, engine): with pytest.raises(Exception): - await AsyncPostgresVectorStore( - engine._pool, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["random_column"], # invalid metadata column + await run_on_background( + engine, + AsyncPostgresVectorStore( + engine._pool, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["random_column"], # invalid metadata column + ), ) diff --git a/tests/test_async_vectorstore_from_methods.py b/tests/test_async_vectorstore_from_methods.py index 59274f6a..aeba3995 100644 --- a/tests/test_async_vectorstore_from_methods.py +++ b/tests/test_async_vectorstore_from_methods.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid -from typing import Sequence +from typing import Any, Coroutine, Sequence import pytest import pytest_asyncio @@ -29,9 +30,7 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE_WITH_INT_ID = "test_table_custom_with_int_it" + str(uuid.uuid4()).replace( - "-", "_" -) +CUSTOM_TABLE_WITH_INT_ID = "custom_int" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 @@ -53,18 +52,33 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async with engine._pool.connect() as conn: - result = await conn.execute(text(query)) - result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + async def _impl(): + async with engine._pool.connect() as conn: + result = await conn.execute(text(query)) + result_map = result.mappings() + return result_map.fetchall() + + return await run_on_background(engine, _impl()) @pytest.mark.asyncio @@ -93,24 +107,34 @@ async def engine(self, db_project, db_region, db_instance, db_name): region=db_region, database=db_name, ) - await engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) - await engine._ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=False, + await run_on_background( + engine, engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + ) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=False, + ), ) - await engine._ainit_vectorstore_table( - CUSTOM_TABLE_WITH_INT_ID, - VECTOR_SIZE, - id_column=Column(name="integer_id", data_type="INTEGER", nullable="False"), - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=False, + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE_WITH_INT_ID, + VECTOR_SIZE, + id_column=Column( + name="integer_id", data_type="INTEGER", nullable="False" + ), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=False, + ), ) yield engine await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_TABLE}") @@ -120,13 +144,16 @@ async def engine(self, db_project, db_region, db_instance, db_name): async def test_afrom_texts(self, engine): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await AsyncPostgresVectorStore.afrom_texts( - texts, - embeddings_service, + await run_on_background( engine, - DEFAULT_TABLE, - metadatas=metadatas, - ids=ids, + AsyncPostgresVectorStore.afrom_texts( + texts, + embeddings_service, + engine, + DEFAULT_TABLE, + metadatas=metadatas, + ids=ids, + ), ) results = await afetch(engine, f"SELECT * FROM {DEFAULT_TABLE}") assert len(results) == 3 @@ -134,12 +161,15 @@ async def test_afrom_texts(self, engine): async def test_afrom_docs(self, engine): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await AsyncPostgresVectorStore.afrom_documents( - docs, - embeddings_service, + await run_on_background( engine, - DEFAULT_TABLE, - ids=ids, + AsyncPostgresVectorStore.afrom_documents( + docs, + embeddings_service, + engine, + DEFAULT_TABLE, + ids=ids, + ), ) results = await afetch(engine, f"SELECT * FROM {DEFAULT_TABLE}") assert len(results) == 3 @@ -147,16 +177,19 @@ async def test_afrom_docs(self, engine): async def test_afrom_texts_custom(self, engine): ids = [str(uuid.uuid4()) for i in range(len(texts))] - await AsyncPostgresVectorStore.afrom_texts( - texts, - embeddings_service, + await run_on_background( engine, - CUSTOM_TABLE, - ids=ids, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], + AsyncPostgresVectorStore.afrom_texts( + texts, + embeddings_service, + engine, + CUSTOM_TABLE, + ids=ids, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + ), ) results = await afetch(engine, f"SELECT * FROM {CUSTOM_TABLE}") assert len(results) == 3 @@ -174,16 +207,19 @@ async def test_afrom_docs_custom(self, engine): ) for i in range(len(texts)) ] - await AsyncPostgresVectorStore.afrom_documents( - docs, - embeddings_service, + await run_on_background( engine, - CUSTOM_TABLE, - ids=ids, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], + AsyncPostgresVectorStore.afrom_documents( + docs, + embeddings_service, + engine, + CUSTOM_TABLE, + ids=ids, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + ), ) results = await afetch(engine, f"SELECT * FROM {CUSTOM_TABLE}") @@ -203,16 +239,19 @@ async def test_afrom_docs_custom_with_int_id(self, engine): ) for i in range(len(texts)) ] - await AsyncPostgresVectorStore.afrom_documents( - docs, - embeddings_service, + await run_on_background( engine, - CUSTOM_TABLE_WITH_INT_ID, - ids=ids, - id_column="integer_id", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=["page", "source"], + AsyncPostgresVectorStore.afrom_documents( + docs, + embeddings_service, + engine, + CUSTOM_TABLE_WITH_INT_ID, + ids=ids, + id_column="integer_id", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["page", "source"], + ), ) results = await afetch(engine, f"SELECT * FROM {CUSTOM_TABLE_WITH_INT_ID}") diff --git a/tests/test_async_vectorstore_index.py b/tests/test_async_vectorstore_index.py index 68bc4e72..be61a9fa 100644 --- a/tests/test_async_vectorstore_index.py +++ b/tests/test_async_vectorstore_index.py @@ -13,9 +13,10 @@ # limitations under the License. +import asyncio import os -import sys import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -23,7 +24,10 @@ from langchain_core.embeddings import DeterministicFakeEmbedding from sqlalchemy import text -from langchain_google_cloud_sql_pg import PostgresEngine +from langchain_google_cloud_sql_pg import ( # type: ignore + HybridSearchConfig, + PostgresEngine, +) from langchain_google_cloud_sql_pg.async_vectorstore import AsyncPostgresVectorStore from langchain_google_cloud_sql_pg.indexes import ( DEFAULT_INDEX_NAME_SUFFIX, @@ -32,9 +36,11 @@ IVFFlatIndex, ) -DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -DEFAULT_INDEX_NAME = DEFAULT_TABLE + DEFAULT_INDEX_NAME_SUFFIX +UUID_STR = str(uuid.uuid4()).replace("-", "_") +DEFAULT_TABLE = "table" + UUID_STR +SIMPLE_TABLE = "simple" + UUID_STR +DEFAULT_HYBRID_TABLE = "hybrid" + UUID_STR +DEFAULT_INDEX_NAME = DEFAULT_INDEX_NAME_SUFFIX + UUID_STR VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -56,10 +62,23 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute(engine: PostgresEngine, query: str) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="class") @@ -90,54 +109,159 @@ async def engine(self, db_project, db_region, db_instance, db_name): ) yield engine await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_TABLE}") + await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_HYBRID_TABLE}") + await aexecute(engine, f"DROP TABLE IF EXISTS {SIMPLE_TABLE}") await engine.close() @pytest_asyncio.fixture(scope="class") async def vs(self, engine): - await engine._ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) - vs = await AsyncPostgresVectorStore.create( + await run_on_background( + engine, + engine._ainit_vectorstore_table( + DEFAULT_TABLE, VECTOR_SIZE, overwrite_existing=True + ), + ) + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + ), ) - await vs.aadd_texts(texts, ids=ids) - await vs.adrop_vector_index() + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) + await run_on_background(engine, vs.adrop_vector_index()) yield vs - async def test_aapply_vector_index(self, vs): + async def test_apply_default_name_vector_index(self, engine): + await run_on_background( + engine, + engine._ainit_vectorstore_table( + SIMPLE_TABLE, VECTOR_SIZE, overwrite_existing=True + ), + ) + + vs = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=SIMPLE_TABLE, + ), + ) + await run_on_background(engine, vs.aadd_texts(texts, ids=ids)) + await run_on_background(engine, vs.adrop_vector_index()) + index = HNSWIndex() - await vs.aapply_vector_index(index) - assert await vs.is_valid_index(DEFAULT_INDEX_NAME) - await vs.adrop_vector_index() + await run_on_background(engine, vs.aapply_vector_index(index)) + assert await run_on_background(engine, vs.is_valid_index()) + await run_on_background(engine, vs.adrop_vector_index()) + + async def test_aapply_vector_index(self, engine, vs): + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) + index = HNSWIndex(name=DEFAULT_INDEX_NAME) + await run_on_background(engine, vs.aapply_vector_index(index)) + assert await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) + await run_on_background(engine, vs.adrop_vector_index()) - async def test_areindex(self, vs): - if not await vs.is_valid_index(DEFAULT_INDEX_NAME): + async def test_areindex(self, engine, vs): + if not await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)): index = HNSWIndex() - await vs.aapply_vector_index(index) - await vs.areindex() - await vs.areindex(DEFAULT_INDEX_NAME) - assert await vs.is_valid_index(DEFAULT_INDEX_NAME) - await vs.adrop_vector_index() - - async def test_dropindex(self, vs): - await vs.adrop_vector_index() - result = await vs.is_valid_index(DEFAULT_INDEX_NAME) + await run_on_background(engine, vs.aapply_vector_index(index)) + await run_on_background(engine, vs.areindex(DEFAULT_INDEX_NAME)) + await run_on_background(engine, vs.areindex(DEFAULT_INDEX_NAME)) + assert await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) + await run_on_background(engine, vs.adrop_vector_index()) + + async def test_dropindex(self, engine, vs): + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) + result = await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) assert not result - async def test_aapply_vector_index_ivfflat(self, vs): - index = IVFFlatIndex(distance_strategy=DistanceStrategy.EUCLIDEAN) - await vs.aapply_vector_index(index, concurrently=True) - assert await vs.is_valid_index(DEFAULT_INDEX_NAME) + async def test_aapply_vector_index_ivfflat(self, engine, vs): + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) + index = IVFFlatIndex( + name=DEFAULT_INDEX_NAME, distance_strategy=DistanceStrategy.EUCLIDEAN + ) + await run_on_background( + engine, vs.aapply_vector_index(index, concurrently=True) + ) + assert await run_on_background(engine, vs.is_valid_index(DEFAULT_INDEX_NAME)) index = IVFFlatIndex( name="secondindex", distance_strategy=DistanceStrategy.INNER_PRODUCT, ) - await vs.aapply_vector_index(index) - assert await vs.is_valid_index("secondindex") - await vs.adrop_vector_index("secondindex") - await vs.adrop_vector_index() + await run_on_background(engine, vs.aapply_vector_index(index)) + assert await run_on_background(engine, vs.is_valid_index("secondindex")) + await run_on_background(engine, vs.adrop_vector_index("secondindex")) + await run_on_background(engine, vs.adrop_vector_index(DEFAULT_INDEX_NAME)) - async def test_is_valid_index(self, vs): - is_valid = await vs.is_valid_index("invalid_index") + async def test_is_valid_index(self, engine, vs): + is_valid = await run_on_background(engine, vs.is_valid_index("invalid_index")) assert is_valid == False + + async def test_aapply_hybrid_search_index_table_without_tsv_column( + self, engine, vs + ): + # overwriting vs to get a hybrid vs + tsv_index_name = "index_without_tsv_column_" + UUID_STR + vs = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + hybrid_search_config=HybridSearchConfig(index_name=tsv_index_name), + ), + ) + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) + ) + assert is_valid_index == False + await run_on_background(engine, vs.aapply_hybrid_search_index()) + assert await run_on_background(engine, vs.is_valid_index(tsv_index_name)) + await run_on_background(engine, vs.adrop_vector_index(tsv_index_name)) + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) + ) + assert is_valid_index == False + + async def test_aapply_hybrid_search_index_table_with_tsv_column(self, engine): + tsv_index_name = "index_without_tsv_column_" + UUID_STR + config = HybridSearchConfig( + tsv_column="tsv_column", + tsv_lang="pg_catalog.english", + index_name=tsv_index_name, + ) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + DEFAULT_HYBRID_TABLE, + VECTOR_SIZE, + hybrid_search_config=config, + ), + ) + vs = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_HYBRID_TABLE, + hybrid_search_config=config, + ), + ) + + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) + ) + assert is_valid_index == False + await run_on_background(engine, vs.aapply_hybrid_search_index()) + assert await run_on_background(engine, vs.is_valid_index(tsv_index_name)) + await run_on_background(engine, vs.areindex(tsv_index_name)) + assert await run_on_background(engine, vs.is_valid_index(tsv_index_name)) + await run_on_background(engine, vs.adrop_vector_index(tsv_index_name)) + is_valid_index = await run_on_background( + engine, vs.is_valid_index(tsv_index_name) + ) + assert is_valid_index == False diff --git a/tests/test_async_vectorstore_search.py b/tests/test_async_vectorstore_search.py index 418dbbad..16a63911 100644 --- a/tests/test_async_vectorstore_search.py +++ b/tests/test_async_vectorstore_search.py @@ -12,8 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid +from typing import Any, Coroutine import pytest import pytest_asyncio @@ -22,15 +24,22 @@ from metadata_filtering_data import FILTERING_TEST_CASES, METADATAS from sqlalchemy import text -from langchain_google_cloud_sql_pg import Column, PostgresEngine +from langchain_google_cloud_sql_pg import ( # type: ignore + Column, + HybridSearchConfig, + PostgresEngine, + reciprocal_rank_fusion, + weighted_sum_ranking, +) from langchain_google_cloud_sql_pg.async_vectorstore import AsyncPostgresVectorStore from langchain_google_cloud_sql_pg.indexes import DistanceStrategy, HNSWQueryOptions DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_FILTER_TABLE = "test_table_custom_filter" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_FILTER_TABLE = "custom_filter" + str(uuid.uuid4()).replace("-", "_") +HYBRID_SEARCH_TABLE1 = "hybrid1" + str(uuid.uuid4()).replace("-", "_") +HYBRID_SEARCH_TABLE2 = "hybrid2" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 -sync_method_exception_str = "Sync methods are not implemented for AsyncPostgresVectorStore. Use PostgresVectorStore interface instead." embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -45,6 +54,19 @@ ] embeddings = [embeddings_service.embed_query("foo") for i in range(len(texts))] +# Documents designed for hybrid search testing +hybrid_docs_content = { + "hs_doc_apple_fruit": "An apple is a sweet and edible fruit produced by an apple tree. Apples are very common.", + "hs_doc_apple_tech": "Apple Inc. is a multinational technology company. Their latest tech is amazing.", + "hs_doc_orange_fruit": "The orange is the fruit of various citrus species. Oranges are tasty.", + "hs_doc_generic_tech": "Technology drives innovation in the modern world. Tech is evolving.", + "hs_doc_unrelated_cat": "A fluffy cat sat on a mat quietly observing a mouse.", +} +hybrid_docs = [ + Document(page_content=content, metadata={"doc_id_key": key}) + for key, content in hybrid_docs_content.items() +] + def get_env_var(key: str, desc: str) -> str: v = os.environ.get(key) @@ -53,13 +75,26 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop.""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute( engine: PostgresEngine, query: str, ) -> None: - async with engine._pool.connect() as conn: - await conn.execute(text(query)) - await conn.commit() + async def _impl(): + async with engine._pool.connect() as conn: + await conn.execute(text(query)) + await conn.commit() + + await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="class") @@ -92,252 +127,726 @@ async def engine(self, db_project, db_region, db_instance, db_name): await aexecute(engine, f"DROP TABLE IF EXISTS {DEFAULT_TABLE}") await aexecute(engine, f"DROP TABLE IF EXISTS {CUSTOM_TABLE}") await aexecute(engine, f"DROP TABLE IF EXISTS {CUSTOM_FILTER_TABLE}") + await aexecute(engine, f"DROP TABLE IF EXISTS {HYBRID_SEARCH_TABLE1}") + await aexecute(engine, f"DROP TABLE IF EXISTS {HYBRID_SEARCH_TABLE2}") await engine.close() @pytest_asyncio.fixture(scope="class") async def vs(self, engine): - await engine._ainit_vectorstore_table( - DEFAULT_TABLE, VECTOR_SIZE, store_metadata=False + await run_on_background( + engine, + engine._ainit_vectorstore_table( + DEFAULT_TABLE, VECTOR_SIZE, store_metadata=False + ), ) - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + ), ) - await vs.aadd_documents(docs, ids=ids) + await run_on_background(engine, vs.aadd_documents(docs, ids=ids)) yield vs @pytest_asyncio.fixture(scope="class") async def vs_custom(self, engine): - await engine._ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - metadata_columns=[ - Column("page", "TEXT"), - Column("source", "TEXT"), - ], - store_metadata=False, - ) - - vs_custom = await AsyncPostgresVectorStore.create( - engine, - embedding_service=embeddings_service, - table_name=CUSTOM_TABLE, - id_column="myid", - content_column="mycontent", - embedding_column="myembedding", - index_query_options=HNSWQueryOptions(ef_search=1), - ) - await vs_custom.aadd_documents(docs, ids=ids) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + ], + store_metadata=False, + ), + ) + + vs_custom = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_TABLE, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + index_query_options=HNSWQueryOptions(ef_search=1), + ), + ) + await run_on_background(engine, vs_custom.aadd_documents(docs, ids=ids)) yield vs_custom @pytest_asyncio.fixture(scope="class") async def vs_custom_filter(self, engine): - await engine._ainit_vectorstore_table( - CUSTOM_FILTER_TABLE, - VECTOR_SIZE, - metadata_columns=[ - Column("name", "TEXT"), - Column("code", "TEXT"), - Column("price", "FLOAT"), - Column("is_available", "BOOLEAN"), - Column("tags", "TEXT[]"), - Column("inventory_location", "INTEGER[]"), - Column("available_quantity", "INTEGER", nullable=True), - ], - id_column="langchain_id", - store_metadata=False, - ) - - vs_custom_filter = await AsyncPostgresVectorStore.create( - engine, - embedding_service=embeddings_service, - table_name=CUSTOM_FILTER_TABLE, - metadata_columns=[ - "name", - "code", - "price", - "is_available", - "tags", - "inventory_location", - "available_quantity", - ], - id_column="langchain_id", - ) - await vs_custom_filter.aadd_documents(filter_docs, ids=ids) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + CUSTOM_FILTER_TABLE, + VECTOR_SIZE, + metadata_columns=[ + Column("name", "TEXT"), + Column("code", "TEXT"), + Column("price", "FLOAT"), + Column("is_available", "BOOLEAN"), + Column("tags", "TEXT[]"), + Column("inventory_location", "INTEGER[]"), + Column("available_quantity", "INTEGER", nullable=True), + ], + id_column="langchain_id", + store_metadata=False, + ), + ) + + vs_custom_filter = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=CUSTOM_FILTER_TABLE, + metadata_columns=[ + "name", + "code", + "price", + "is_available", + "tags", + "inventory_location", + "available_quantity", + ], + id_column="langchain_id", + ), + ) + await run_on_background( + engine, vs_custom_filter.aadd_documents(filter_docs, ids=ids) + ) yield vs_custom_filter - async def test_asimilarity_search(self, vs): - results = await vs.asimilarity_search("foo", k=1) + @pytest_asyncio.fixture(scope="class") + async def vs_hybrid_search_with_tsv_column(self, engine): + hybrid_search_config = HybridSearchConfig( + tsv_column="mycontent_tsv", + tsv_lang="pg_catalog.english", + fts_query="my_fts_query", + fusion_function=reciprocal_rank_fusion, + fusion_function_parameters={ + "rrf_k": 60, + "fetch_top_k": 10, + }, + ) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + HYBRID_SEARCH_TABLE1, + VECTOR_SIZE, + id_column=Column("myid", "TEXT"), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + Column("doc_id_key", "TEXT"), + ], + metadata_json_column="mymetadata", # ignored + store_metadata=False, + hybrid_search_config=hybrid_search_config, + ), + ) + + vs_custom = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE1, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_json_column="mymetadata", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=hybrid_search_config, + ), + ) + await run_on_background(engine, vs_custom.aadd_documents(hybrid_docs)) + yield vs_custom + + async def test_asimilarity_search(self, engine, vs): + results = await run_on_background(engine, vs.asimilarity_search("foo", k=1)) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs.asimilarity_search("foo", k=1, filter="content = 'bar'") + results = await run_on_background( + engine, vs.asimilarity_search("foo", k=1, filter={"content": "bar"}) + ) assert results == [Document(page_content="bar", id=ids[1])] - async def test_asimilarity_search_score(self, vs): - results = await vs.asimilarity_search_with_score("foo") + async def test_asimilarity_search_score(self, engine, vs): + results = await run_on_background( + engine, vs.asimilarity_search_with_score("foo") + ) assert len(results) == 4 assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_asimilarity_search_by_vector(self, vs): + async def test_asimilarity_search_by_vector(self, engine, vs): embedding = embeddings_service.embed_query("foo") - results = await vs.asimilarity_search_by_vector(embedding) + results = await run_on_background( + engine, vs.asimilarity_search_by_vector(embedding) + ) assert len(results) == 4 assert results[0] == Document(page_content="foo", id=ids[0]) - results = await vs.asimilarity_search_with_score_by_vector(embedding) + results = await run_on_background( + engine, vs.asimilarity_search_with_score_by_vector(embedding) + ) assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_similarity_search_with_relevance_scores_threshold_cosine(self, vs): + async def test_similarity_search_with_relevance_scores_threshold_cosine( + self, engine, vs + ): score_threshold = {"score_threshold": 0} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) # Note: Since tests use FakeEmbeddings which are non-normalized vectors, results might have scores beyond the range [0,1]. # For a normalized embedding service, a threshold of zero will yield all matched documents. assert len(results) == 2 score_threshold = {"score_threshold": 0.02} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 2 score_threshold = {"score_threshold": 0.9} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 1 assert results[0][0] == Document(page_content="foo", id=ids[0]) score_threshold = {"score_threshold": 0.02} vs.distance_strategy = DistanceStrategy.EUCLIDEAN - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 1 async def test_similarity_search_with_relevance_scores_threshold_euclidean( self, engine ): - vs = await AsyncPostgresVectorStore.create( + vs = await run_on_background( engine, - embedding_service=embeddings_service, - table_name=DEFAULT_TABLE, - distance_strategy=DistanceStrategy.EUCLIDEAN, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=DEFAULT_TABLE, + distance_strategy=DistanceStrategy.EUCLIDEAN, + ), ) score_threshold = {"score_threshold": 0.9} - results = await vs.asimilarity_search_with_relevance_scores( - "foo", **score_threshold + results = await run_on_background( + engine, + vs.asimilarity_search_with_relevance_scores("foo", **score_threshold), ) assert len(results) == 1 assert results[0][0] == Document(page_content="foo", id=ids[0]) - async def test_amax_marginal_relevance_search(self, vs): - results = await vs.amax_marginal_relevance_search("bar") + async def test_amax_marginal_relevance_search(self, engine, vs): + results = await run_on_background( + engine, vs.amax_marginal_relevance_search("bar") + ) assert results[0] == Document(page_content="bar", id=ids[1]) - results = await vs.amax_marginal_relevance_search( - "bar", filter="content = 'boo'" + results = await run_on_background( + engine, vs.amax_marginal_relevance_search("bar", filter={"content": "boo"}) ) assert results[0] == Document(page_content="boo", id=ids[3]) - async def test_amax_marginal_relevance_search_vector(self, vs): + async def test_amax_marginal_relevance_search_vector(self, engine, vs): embedding = embeddings_service.embed_query("bar") - results = await vs.amax_marginal_relevance_search_by_vector(embedding) + results = await run_on_background( + engine, vs.amax_marginal_relevance_search_by_vector(embedding) + ) assert results[0] == Document(page_content="bar", id=ids[1]) - async def test_amax_marginal_relevance_search_vector_score(self, vs): + async def test_amax_marginal_relevance_search_vector_score(self, engine, vs): embedding = embeddings_service.embed_query("bar") - results = await vs.amax_marginal_relevance_search_with_score_by_vector( - embedding + results = await run_on_background( + engine, vs.amax_marginal_relevance_search_with_score_by_vector(embedding) ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - results = await vs.amax_marginal_relevance_search_with_score_by_vector( - embedding, lambda_mult=0.75, fetch_k=10 + results = await run_on_background( + engine, + vs.amax_marginal_relevance_search_with_score_by_vector( + embedding, lambda_mult=0.75, fetch_k=10 + ), ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - async def test_similarity_search(self, vs_custom): - results = await vs_custom.asimilarity_search("foo", k=1) + async def test_similarity_search(self, engine, vs_custom): + results = await run_on_background( + engine, vs_custom.asimilarity_search("foo", k=1) + ) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs_custom.asimilarity_search( - "foo", k=1, filter="mycontent = 'bar'" + results = await run_on_background( + engine, + vs_custom.asimilarity_search("foo", k=1, filter={"mycontent": "bar"}), ) assert results == [Document(page_content="bar", id=ids[1])] - async def test_similarity_search_score(self, vs_custom): - results = await vs_custom.asimilarity_search_with_score("foo") + async def test_similarity_search_score(self, engine, vs_custom): + results = await run_on_background( + engine, vs_custom.asimilarity_search_with_score("foo") + ) assert len(results) == 4 assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_similarity_search_by_vector(self, vs_custom): + async def test_similarity_search_by_vector(self, engine, vs_custom): embedding = embeddings_service.embed_query("foo") - results = await vs_custom.asimilarity_search_by_vector(embedding) + results = await run_on_background( + engine, vs_custom.asimilarity_search_by_vector(embedding) + ) assert len(results) == 4 assert results[0] == Document(page_content="foo", id=ids[0]) - results = await vs_custom.asimilarity_search_with_score_by_vector(embedding) + results = await run_on_background( + engine, vs_custom.asimilarity_search_with_score_by_vector(embedding) + ) assert results[0][0] == Document(page_content="foo", id=ids[0]) assert results[0][1] == 0 - async def test_max_marginal_relevance_search(self, vs_custom): - results = await vs_custom.amax_marginal_relevance_search("bar") + async def test_max_marginal_relevance_search(self, engine, vs_custom): + results = await run_on_background( + engine, vs_custom.amax_marginal_relevance_search("bar") + ) assert results[0] == Document(page_content="bar", id=ids[1]) - results = await vs_custom.amax_marginal_relevance_search( - "bar", filter="mycontent = 'boo'" + results = await run_on_background( + engine, + vs_custom.amax_marginal_relevance_search( + "bar", filter={"mycontent": "boo"} + ), ) assert results[0] == Document(page_content="boo", id=ids[3]) - async def test_max_marginal_relevance_search_vector(self, vs_custom): + async def test_max_marginal_relevance_search_vector(self, engine, vs_custom): embedding = embeddings_service.embed_query("bar") - results = await vs_custom.amax_marginal_relevance_search_by_vector(embedding) + results = await run_on_background( + engine, vs_custom.amax_marginal_relevance_search_by_vector(embedding) + ) assert results[0] == Document(page_content="bar", id=ids[1]) - async def test_max_marginal_relevance_search_vector_score(self, vs_custom): + async def test_max_marginal_relevance_search_vector_score(self, engine, vs_custom): embedding = embeddings_service.embed_query("bar") - results = await vs_custom.amax_marginal_relevance_search_with_score_by_vector( - embedding + results = await run_on_background( + engine, + vs_custom.amax_marginal_relevance_search_with_score_by_vector(embedding), ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - results = await vs_custom.amax_marginal_relevance_search_with_score_by_vector( - embedding, lambda_mult=0.75, fetch_k=10 + results = await run_on_background( + engine, + vs_custom.amax_marginal_relevance_search_with_score_by_vector( + embedding, lambda_mult=0.75, fetch_k=10 + ), ) assert results[0][0] == Document(page_content="bar", id=ids[1]) - async def test_aget_by_ids(self, vs): + async def test_aget_by_ids(self, engine, vs): test_ids = [ids[0]] - results = await vs.aget_by_ids(ids=test_ids) + results = await run_on_background(engine, vs.aget_by_ids(ids=test_ids)) assert results[0] == Document(page_content="foo", id=ids[0]) - async def test_aget_by_ids_custom_vs(self, vs_custom): + async def test_aget_by_ids_custom_vs(self, engine, vs_custom): test_ids = [ids[0]] - results = await vs_custom.aget_by_ids(ids=test_ids) + results = await run_on_background(engine, vs_custom.aget_by_ids(ids=test_ids)) assert results[0] == Document(page_content="foo", id=ids[0]) def test_get_by_ids(self, vs): test_ids = [ids[0]] - with pytest.raises(Exception, match=sync_method_exception_str): + with pytest.raises(Exception): vs.get_by_ids(ids=test_ids) @pytest.mark.parametrize("test_filter, expected_ids", FILTERING_TEST_CASES) async def test_vectorstore_with_metadata_filters( self, + engine, vs_custom_filter, test_filter, expected_ids, ): """Test end to end construction and search.""" - docs = await vs_custom_filter.asimilarity_search( - "meow", k=5, filter=test_filter + docs = await run_on_background( + engine, vs_custom_filter.asimilarity_search("meow", k=5, filter=test_filter) ) assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter + + async def test_asimilarity_hybrid_search_rrk(self, engine, vs): + results = await run_on_background( + engine, + vs.asimilarity_search( + "foo", + k=1, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion + ), + ), + ) + assert len(results) == 1 + assert results == [Document(page_content="foo", id=ids[0])] + + results = await run_on_background( + engine, + vs.asimilarity_search( + "bar", + k=1, + filter={"content": {"$ne": "baz"}}, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion, + fusion_function_parameters={ + "rrf_k": 100, + "fetch_top_k": 10, + }, + primary_top_k=1, + secondary_top_k=1, + ), + ), + ) + assert results == [Document(page_content="bar", id=ids[1])] + + async def test_hybrid_search_weighted_sum_default( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test hybrid search with default weighted sum (0.5 vector, 0.5 FTS).""" + query = "apple" # Should match "apple" in FTS and vector + + # The vs_hybrid_search_with_tsv_column instance is already configured for hybrid search. + # Default fusion is weighted_sum_ranking with 0.5/0.5 weights. + # fts_query will default to the main query. + results_with_scores = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search_with_score(query, k=3), + ) + + assert len(results_with_scores) > 1 + result_ids = [doc.metadata["doc_id_key"] for doc, score in results_with_scores] + + # Expect "hs_doc_apple_fruit" and "hs_doc_apple_tech" to be highly ranked. + assert "hs_doc_apple_fruit" in result_ids + + # Scores should be floats (fused scores) + for doc, score in results_with_scores: + assert isinstance(score, float) + + # Check if sorted by score (descending for weighted_sum_ranking with positive scores) + assert results_with_scores[0][1] >= results_with_scores[1][1] + + async def test_hybrid_search_weighted_sum_vector_bias( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test weighted sum with higher weight for vector results.""" + query = "Apple Inc technology" # More specific for vector similarity + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", # Must match table setup + fusion_function_parameters={ + "primary_results_weight": 0.8, # Vector bias + "secondary_results_weight": 0.2, + }, + # fts_query will default to main query + ) + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ), + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(result_ids) > 0 + assert result_ids[0] == "hs_doc_generic_tech" + + async def test_hybrid_search_weighted_sum_fts_bias( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test weighted sum with higher weight for FTS results.""" + query = "fruit common tasty" # Strong FTS signal for fruit docs + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fusion_function=weighted_sum_ranking, + fusion_function_parameters={ + "primary_results_weight": 0.01, + "secondary_results_weight": 0.99, # FTS bias + }, + ) + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ), + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(result_ids) == 2 + assert "hs_doc_apple_fruit" in result_ids + + async def test_hybrid_search_reciprocal_rank_fusion( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test hybrid search with Reciprocal Rank Fusion.""" + query = "technology company" + + # Configure RRF. primary_top_k and secondary_top_k control inputs to fusion. + # fusion_function_parameters.fetch_top_k controls output count from RRF. + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fusion_function=reciprocal_rank_fusion, + primary_top_k=3, # How many dense results to consider + secondary_top_k=3, # How many sparse results to consider + fusion_function_parameters={ + "rrf_k": 60, + "fetch_top_k": 2, + }, # RRF specific params + ) + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, hybrid_search_config=config + ), + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(result_ids) == 2 + # "hs_doc_apple_tech" (FTS: technology, company; Vector: Apple Inc technology) + # "hs_doc_generic_tech" (FTS: technology; Vector: Technology drives innovation) + # RRF should combine these ranks. "hs_doc_apple_tech" is likely higher. + assert "hs_doc_apple_tech" in result_ids + assert result_ids[0] == "hs_doc_apple_tech" # Stronger combined signal + + async def test_hybrid_search_explicit_fts_query( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test hybrid search when fts_query in HybridSearchConfig is different from main query.""" + main_vector_query = "Apple Inc." # For vector search + fts_specific_query = "fruit" # For FTS + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=fts_specific_query, # Override FTS query + fusion_function_parameters={ # Using default weighted_sum_ranking + "primary_results_weight": 0.5, + "secondary_results_weight": 0.5, + }, + ) + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + main_vector_query, k=2, hybrid_search_config=config + ), + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + # Vector search for "Apple Inc.": hs_doc_apple_tech + # FTS search for "fruit": hs_doc_apple_fruit, hs_doc_orange_fruit + # Combined: hs_doc_apple_fruit (strong FTS) and hs_doc_apple_tech (strong vector) are candidates. + # "hs_doc_apple_fruit" might get a boost if "Apple Inc." vector has some similarity to "apple fruit" doc. + assert len(result_ids) > 0 + assert ( + "hs_doc_apple_fruit" in result_ids + or "hs_doc_apple_tech" in result_ids + or "hs_doc_orange_fruit" in result_ids + ) + + async def test_hybrid_search_with_filter( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test hybrid search with a metadata filter applied.""" + query = "apple" + # Filter to only include "tech" related apple docs using metadata + # Assuming metadata_columns=["doc_id_key"] was set up for vs_hybrid_search_with_tsv_column + doc_filter = {"doc_id_key": {"$eq": "hs_doc_apple_tech"}} + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + ) + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + query, k=2, filter=doc_filter, hybrid_search_config=config + ), + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + assert len(results) == 1 + assert result_ids[0] == "hs_doc_apple_tech" + + async def test_hybrid_search_fts_empty_results( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test when FTS query yields no results, should fall back to vector search.""" + vector_query = "apple" + no_match_fts_query = "zzyyxx_gibberish_term_for_fts_nomatch" + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=no_match_fts_query, + fusion_function_parameters={ + "primary_results_weight": 0.6, + "secondary_results_weight": 0.4, + }, + ) + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + vector_query, k=2, hybrid_search_config=config + ), + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + # Expect results based purely on vector search for "apple" + assert len(result_ids) > 0 + assert "hs_doc_apple_fruit" in result_ids or "hs_doc_apple_tech" in result_ids + # The top result should be one of the apple documents based on vector search + assert results[0].metadata["doc_id_key"].startswith("hs_doc_apple_fruit") + + async def test_hybrid_search_vector_empty_results_effectively( + self, engine, vs_hybrid_search_with_tsv_column + ): + """Test when vector query is very dissimilar to docs, should rely on FTS.""" + # This is hard to guarantee with fake embeddings, but we try. + # A better way might be to use a filter that excludes all docs for the vector part, + # but filters are applied to both. + vector_query_far_off = "supercalifragilisticexpialidocious_vector_nomatch" + fts_query_match = "orange fruit" # Should match hs_doc_orange_fruit + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=fts_query_match, + fusion_function_parameters={ + "primary_results_weight": 0.4, + "secondary_results_weight": 0.6, + }, + ) + results = await run_on_background( + engine, + vs_hybrid_search_with_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ), + ) + result_ids = [doc.metadata["doc_id_key"] for doc in results] + + # Expect results based purely on FTS search for "orange fruit" + assert len(result_ids) == 1 + assert result_ids[0] == "hs_doc_orange_fruit" + + async def test_hybrid_search_without_tsv_column(self, engine): + """Test hybrid search without a TSV column.""" + # This is hard to guarantee with fake embeddings, but we try. + # A better way might be to use a filter that excludes all docs for the vector part, + # but filters are applied to both. + vector_query_far_off = "apple iphone tech is better designed than macs" + fts_query_match = "apple fruit" + + config = HybridSearchConfig( + tsv_column="mycontent_tsv", + fts_query=fts_query_match, + fusion_function_parameters={ + "primary_results_weight": 0.1, + "secondary_results_weight": 0.9, + }, + ) + await run_on_background( + engine, + engine._ainit_vectorstore_table( + HYBRID_SEARCH_TABLE2, + VECTOR_SIZE, + id_column=Column("myid", "TEXT"), + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=[ + Column("page", "TEXT"), + Column("source", "TEXT"), + Column("doc_id_key", "TEXT"), + ], + store_metadata=False, + hybrid_search_config=config, + ), + ) + + vs_with_tsv_column = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE2, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=config, + ), + ) + await run_on_background(engine, vs_with_tsv_column.aadd_documents(hybrid_docs)) + + config_no_tsv = HybridSearchConfig( + tsv_column="", # no TSV column + fts_query=fts_query_match, + fusion_function_parameters={ + "primary_results_weight": 0.9, + "secondary_results_weight": 0.1, + }, + ) + vs_without_tsv_column = await run_on_background( + engine, + AsyncPostgresVectorStore.create( + engine, + embedding_service=embeddings_service, + table_name=HYBRID_SEARCH_TABLE2, + id_column="myid", + content_column="mycontent", + embedding_column="myembedding", + metadata_columns=["doc_id_key"], + index_query_options=HNSWQueryOptions(ef_search=1), + hybrid_search_config=config_no_tsv, + ), + ) + + results_with_tsv_column = await run_on_background( + engine, + vs_with_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ), + ) + results_without_tsv_column = await run_on_background( + engine, + vs_without_tsv_column.asimilarity_search( + vector_query_far_off, k=1, hybrid_search_config=config + ), + ) + result_ids_with_tsv_column = [ + doc.metadata["doc_id_key"] for doc in results_with_tsv_column + ] + result_ids_without_tsv_column = [ + doc.metadata["doc_id_key"] for doc in results_without_tsv_column + ] + + # Expect results based purely on FTS search for "orange fruit" + assert len(result_ids_with_tsv_column) == 1 + assert len(result_ids_without_tsv_column) == 1 + assert result_ids_with_tsv_column[0] == "hs_doc_apple_fruit" + assert result_ids_without_tsv_column[0] == "hs_doc_apple_fruit" diff --git a/tests/test_checkpoint.py b/tests/test_checkpoint.py index 05545168..a8b0f0d0 100644 --- a/tests/test_checkpoint.py +++ b/tests/test_checkpoint.py @@ -59,6 +59,7 @@ "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } @@ -235,6 +236,7 @@ def test_data() -> dict[str, Any]: "__start__": {"__start__": 1}, "node": {"start:node": 2}, }, + "updated_channels": [], } chkpnt_1: Checkpoint = empty_checkpoint() chkpnt_2: Checkpoint = create_checkpoint(chkpnt_1, {}, 1) diff --git a/tests/test_engine.py b/tests/test_engine.py index 7883cf4b..ca26236e 100644 --- a/tests/test_engine.py +++ b/tests/test_engine.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import asyncio import os import uuid -from typing import Sequence +from typing import Any, Coroutine, Sequence import asyncpg # type: ignore import pytest @@ -27,16 +28,18 @@ from sqlalchemy.ext.asyncio import create_async_engine from sqlalchemy.pool import NullPool -from langchain_google_cloud_sql_pg import Column, PostgresEngine +from langchain_google_cloud_sql_pg import Column, HybridSearchConfig, PostgresEngine DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") INT_ID_CUSTOM_TABLE = "test_table_custom_int_id" + str(uuid.uuid4()).replace("-", "_") +HYBRID_SEARCH_TABLE = "hybrid" + str(uuid.uuid4()).replace("-", "_") DEFAULT_TABLE_SYNC = "test_table" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE_SYNC = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") INT_ID_CUSTOM_TABLE_SYNC = "test_table_custom_int_id" + str(uuid.uuid4()).replace( "-", "_" ) +HYBRID_SEARCH_TABLE_SYNC = "hybrid_sync" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -50,27 +53,36 @@ def get_env_var(key: str, desc: str) -> str: return v +# Helper to bridge the Main Test Loop and the Engine Background Loop +async def run_on_background(engine: PostgresEngine, coro: Coroutine) -> Any: + """Runs a coroutine on the engine's background loop (if it exists).""" + if engine._loop: + return await asyncio.wrap_future( + asyncio.run_coroutine_threadsafe(coro, engine._loop) + ) + return await coro + + async def aexecute( engine: PostgresEngine, query: str, ) -> None: - async def run(engine, query): + async def _impl(): async with engine._pool.connect() as conn: await conn.execute(text(query)) await conn.commit() - await engine._run_as_async(run(engine, query)) + await run_on_background(engine, _impl()) async def afetch(engine: PostgresEngine, query: str) -> Sequence[RowMapping]: - async def run(engine, query): + async def _impl(): async with engine._pool.connect() as conn: result = await conn.execute(text(query)) result_map = result.mappings() - result_fetch = result_map.fetchall() - return result_fetch + return result_map.fetchall() - return await engine._run_as_async(run(engine, query)) + return await run_on_background(engine, _impl()) @pytest.mark.asyncio(scope="module") @@ -120,13 +132,18 @@ async def engine(self, db_project, db_region, db_instance, db_name): await aexecute(engine, f'DROP TABLE "{CUSTOM_TABLE}"') await aexecute(engine, f'DROP TABLE "{DEFAULT_TABLE}"') await aexecute(engine, f'DROP TABLE "{INT_ID_CUSTOM_TABLE}"') + await aexecute(engine, f'DROP TABLE "{HYBRID_SEARCH_TABLE}"') await engine.close() async def test_engine_args(self, engine): + # Accessing engine._pool.pool.status() is synchronous and safe on main loop objects + # assuming SQLAlchemy pool status doesn't strictly require loop context assert "Pool size: 3" in engine._pool.pool.status() async def test_init_table(self, engine): - await engine.ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + await run_on_background( + engine, engine.ainit_vectorstore_table(DEFAULT_TABLE, VECTOR_SIZE) + ) id = str(uuid.uuid4()) content = "coffee" embedding = await embeddings_service.aembed_query(content) @@ -136,14 +153,17 @@ async def test_init_table(self, engine): await aexecute(engine, stmt) async def test_init_table_custom(self, engine): - await engine.ainit_vectorstore_table( - CUSTOM_TABLE, - VECTOR_SIZE, - id_column="uuid", - content_column="my-content", - embedding_column="my_embedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=True, + await run_on_background( + engine, + engine.ainit_vectorstore_table( + CUSTOM_TABLE, + VECTOR_SIZE, + id_column="uuid", + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + ), ) stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{CUSTOM_TABLE}';" results = await afetch(engine, stmt) @@ -159,14 +179,19 @@ async def test_init_table_custom(self, engine): assert row in expected async def test_init_table_with_int_id(self, engine): - await engine.ainit_vectorstore_table( - INT_ID_CUSTOM_TABLE, - VECTOR_SIZE, - id_column=Column(name="integer_id", data_type="INTEGER", nullable="False"), - content_column="my-content", - embedding_column="my_embedding", - metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], - store_metadata=True, + await run_on_background( + engine, + engine.ainit_vectorstore_table( + INT_ID_CUSTOM_TABLE, + VECTOR_SIZE, + id_column=Column( + name="integer_id", data_type="INTEGER", nullable="False" + ), + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + ), ) stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{INT_ID_CUSTOM_TABLE}';" results = await afetch(engine, stmt) @@ -190,7 +215,10 @@ async def test_password( user, password, ): - PostgresEngine._connector = None + # Note: PostgresEngine._connector is no longer a class attribute in fixed engine.py + # But for test cleanup safety regarding the OLD code structure, we can ignore this. + # PostgresEngine._connector = None + engine = await PostgresEngine.afrom_instance( project_id=db_project, instance=db_instance, @@ -201,7 +229,6 @@ async def test_password( ) assert engine await aexecute(engine, "SELECT 1") - PostgresEngine._connector = None await engine.close() async def test_from_engine( @@ -213,7 +240,7 @@ async def test_from_engine( user, password, ): - async with Connector() as connector: + async with Connector(loop=asyncio.get_running_loop()) as connector: async def getconn() -> asyncpg.Connection: conn = await connector.connect_async( # type: ignore @@ -227,15 +254,37 @@ async def getconn() -> asyncpg.Connection: ) return conn - engine = create_async_engine( + engine_async = create_async_engine( "postgresql+asyncpg://", async_creator=getconn, ) - engine = PostgresEngine.from_engine(engine) + engine = PostgresEngine.from_engine(engine_async) await aexecute(engine, "SELECT 1") await engine.close() + async def test_from_connection_string( + self, + db_name, + user, + password, + ): + port = "5432" + url = f"postgresql+asyncpg://{user}:{password}@{host}:{port}/{db_name}" + engine = PostgresEngine.from_connection_string( + url, + echo=True, + poolclass=NullPool, + ) + await aexecute(engine, "SELECT 1") + await engine.close() + + engine = PostgresEngine.from_connection_string( + URL.create("postgresql+asyncpg", user, password, host, port, db_name) + ) + await aexecute(engine, "SELECT 1") + await engine.close() + async def test_from_engine_args_url( self, db_name, @@ -306,7 +355,11 @@ async def test_iam_account_override( async def test_ainit_checkpoint_writes_table(self, engine): table_name = f"checkpoint{uuid.uuid4()}" table_name_writes = f"{table_name}_writes" - await engine.ainit_checkpoint_table(table_name=table_name) + + await run_on_background( + engine, engine.ainit_checkpoint_table(table_name=table_name) + ) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name_writes}';" results = await afetch(engine, stmt) expected = [ @@ -329,15 +382,43 @@ async def test_ainit_checkpoint_writes_table(self, engine): {"column_name": "checkpoint_ns", "data_type": "text"}, {"column_name": "checkpoint_id", "data_type": "text"}, {"column_name": "parent_checkpoint_id", "data_type": "text"}, + {"column_name": "type", "data_type": "text"}, {"column_name": "checkpoint", "data_type": "bytea"}, {"column_name": "metadata", "data_type": "bytea"}, - {"column_name": "type", "data_type": "text"}, ] for row in results: assert row in expected await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name_writes}"') + async def test_init_table_hybrid_search(self, engine): + await run_on_background( + engine, + engine.ainit_vectorstore_table( + HYBRID_SEARCH_TABLE, + VECTOR_SIZE, + id_column="uuid", + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + hybrid_search_config=HybridSearchConfig(), + ), + ) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{HYBRID_SEARCH_TABLE}';" + results = await afetch(engine, stmt) + expected = [ + {"column_name": "uuid", "data_type": "uuid"}, + {"column_name": "my_embedding", "data_type": "USER-DEFINED"}, + {"column_name": "langchain_metadata", "data_type": "json"}, + {"column_name": "my-content", "data_type": "text"}, + {"column_name": "my-content_tsv", "data_type": "tsvector"}, + {"column_name": "page", "data_type": "text"}, + {"column_name": "source", "data_type": "text"}, + ] + for row in results: + assert row in expected + @pytest.mark.asyncio(scope="module") class TestEngineSync: @@ -381,14 +462,16 @@ async def engine(self, db_project, db_region, db_instance, db_name): await aexecute(engine, f'DROP TABLE "{CUSTOM_TABLE_SYNC}"') await aexecute(engine, f'DROP TABLE "{DEFAULT_TABLE_SYNC}"') await aexecute(engine, f'DROP TABLE "{INT_ID_CUSTOM_TABLE_SYNC}"') + await aexecute(engine, f'DROP TABLE "{HYBRID_SEARCH_TABLE_SYNC}"') await engine.close() async def test_init_table(self, engine): + # Sync method uses _run_as_sync internally -> safe to call on Main Loop engine.init_vectorstore_table(DEFAULT_TABLE_SYNC, VECTOR_SIZE) + id = str(uuid.uuid4()) content = "coffee" embedding = await embeddings_service.aembed_query(content) - # Note: DeterministicFakeEmbedding generates a numpy array, converting to list a list of float values embedding_string = [float(dimension) for dimension in embedding] stmt = f"INSERT INTO {DEFAULT_TABLE_SYNC} (langchain_id, content, embedding) VALUES ('{id}', '{content}','{embedding_string}');" await aexecute(engine, stmt) @@ -448,7 +531,6 @@ async def test_password( user, password, ): - PostgresEngine._connector = None engine = PostgresEngine.from_instance( project_id=db_project, instance=db_instance, @@ -460,7 +542,6 @@ async def test_password( ) assert engine await aexecute(engine, "SELECT 1") - PostgresEngine._connector = None await engine.close() async def test_engine_constructor_key( @@ -469,7 +550,7 @@ async def test_engine_constructor_key( ): key = object() with pytest.raises(Exception): - PostgresEngine(key, engine) + PostgresEngine(key, engine, None, None) async def test_iam_account_override( self, @@ -494,7 +575,9 @@ async def test_iam_account_override( async def test_init_checkpoints_table(self, engine): table_name = f"checkpoint{uuid.uuid4()}" table_name_writes = f"{table_name}_writes" + engine.init_checkpoint_table(table_name=table_name) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{table_name}';" results = await afetch(engine, stmt) expected = [ @@ -525,3 +608,28 @@ async def test_init_checkpoints_table(self, engine): assert row in expected await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name}"') await aexecute(engine, f'DROP TABLE IF EXISTS "{table_name_writes}"') + + async def test_init_table_hybrid_search(self, engine): + engine.init_vectorstore_table( + HYBRID_SEARCH_TABLE_SYNC, + VECTOR_SIZE, + id_column="uuid", + content_column="my-content", + embedding_column="my_embedding", + metadata_columns=[Column("page", "TEXT"), Column("source", "TEXT")], + store_metadata=True, + hybrid_search_config=HybridSearchConfig(), + ) + stmt = f"SELECT column_name, data_type FROM information_schema.columns WHERE table_name = '{HYBRID_SEARCH_TABLE_SYNC}';" + results = await afetch(engine, stmt) + expected = [ + {"column_name": "uuid", "data_type": "uuid"}, + {"column_name": "my_embedding", "data_type": "USER-DEFINED"}, + {"column_name": "langchain_metadata", "data_type": "json"}, + {"column_name": "my-content", "data_type": "text"}, + {"column_name": "my-content_tsv", "data_type": "tsvector"}, + {"column_name": "page", "data_type": "text"}, + {"column_name": "source", "data_type": "text"}, + ] + for row in results: + assert row in expected diff --git a/tests/test_standard_test_suite.py b/tests/test_standard_test_suite.py index 19c77128..2a853bdd 100644 --- a/tests/test_standard_test_suite.py +++ b/tests/test_standard_test_suite.py @@ -23,8 +23,8 @@ from langchain_google_cloud_sql_pg import Column, PostgresEngine, PostgresVectorStore -DEFAULT_TABLE = "test_table_standard_test_suite" + str(uuid.uuid4()) -DEFAULT_TABLE_SYNC = "test_table_sync_standard_test_suite" + str(uuid.uuid4()) +DEFAULT_TABLE = "test_table" + str(uuid.uuid4()) +DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()) def get_env_var(key: str, desc: str) -> str: diff --git a/tests/test_vectorstore.py b/tests/test_vectorstore.py index 7995cd63..ca0c6786 100644 --- a/tests/test_vectorstore.py +++ b/tests/test_vectorstore.py @@ -31,7 +31,7 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()) DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()) -CUSTOM_TABLE = "test-table-custom" + str(uuid.uuid4()) +CUSTOM_TABLE = "custom" + str(uuid.uuid4()) VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -364,7 +364,7 @@ async def test_from_engine( user, password, ): - async with Connector() as connector: + async with Connector(loop=asyncio.get_running_loop()) as connector: async def getconn(): conn = await connector.connect_async( # type: ignore diff --git a/tests/test_vectorstore_from_methods.py b/tests/test_vectorstore_from_methods.py index fadf8fc1..5d054dfb 100644 --- a/tests/test_vectorstore_from_methods.py +++ b/tests/test_vectorstore_from_methods.py @@ -29,10 +29,8 @@ DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") DEFAULT_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()).replace("-", "_") CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE_WITH_INT_ID = "test_table_with_int_id" + str(uuid.uuid4()).replace( - "-", "_" -) -CUSTOM_TABLE_WITH_INT_ID_SYNC = "test_table_with_int_id" + str(uuid.uuid4()).replace( +CUSTOM_TABLE_WITH_INT_ID = "test_table_int_id" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE_WITH_INT_ID_SYNC = "test_table_int_id" + str(uuid.uuid4()).replace( "-", "_" ) VECTOR_SIZE = 768 diff --git a/tests/test_vectorstore_index.py b/tests/test_vectorstore_index.py index cb797219..72a99b00 100644 --- a/tests/test_vectorstore_index.py +++ b/tests/test_vectorstore_index.py @@ -31,8 +31,8 @@ IVFFlatIndex, ) -DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") +DEFAULT_TABLE = "table" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE = "custom" + str(uuid.uuid4()).replace("-", "_") DEFAULT_INDEX_NAME = DEFAULT_TABLE + DEFAULT_INDEX_NAME_SUFFIX VECTOR_SIZE = 768 @@ -120,7 +120,7 @@ async def test_areindex(self, vs): if not vs.is_valid_index(DEFAULT_INDEX_NAME): index = HNSWIndex() vs.apply_vector_index(index) - vs.reindex() + vs.reindex(DEFAULT_INDEX_NAME) vs.reindex(DEFAULT_INDEX_NAME) assert vs.is_valid_index(DEFAULT_INDEX_NAME) vs.drop_vector_index(DEFAULT_INDEX_NAME) @@ -201,7 +201,7 @@ async def test_areindex(self, vs): if not await vs.ais_valid_index(DEFAULT_INDEX_NAME): index = HNSWIndex() await vs.aapply_vector_index(index) - await vs.areindex() + await vs.areindex(DEFAULT_INDEX_NAME) await vs.areindex(DEFAULT_INDEX_NAME) assert await vs.ais_valid_index(DEFAULT_INDEX_NAME) await vs.adrop_vector_index(DEFAULT_INDEX_NAME) diff --git a/tests/test_vectorstore_search.py b/tests/test_vectorstore_search.py index ae1341ed..963bc41b 100644 --- a/tests/test_vectorstore_search.py +++ b/tests/test_vectorstore_search.py @@ -22,16 +22,21 @@ from metadata_filtering_data import FILTERING_TEST_CASES, METADATAS, NEGATIVE_TEST_CASES from sqlalchemy import text -from langchain_google_cloud_sql_pg import Column, PostgresEngine, PostgresVectorStore +from langchain_google_cloud_sql_pg import ( # type: ignore + Column, + HybridSearchConfig, + PostgresEngine, + PostgresVectorStore, + reciprocal_rank_fusion, + weighted_sum_ranking, +) from langchain_google_cloud_sql_pg.indexes import DistanceStrategy, HNSWQueryOptions -DEFAULT_TABLE = "test_table" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE = "test_table_custom" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_TABLE_SYNC = "test_table_sync" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_FILTER_TABLE = "test_table_custom_filter" + str(uuid.uuid4()).replace("-", "_") -CUSTOM_FILTER_TABLE_SYNC = "test_table_custom_filter_sync" + str(uuid.uuid4()).replace( - "-", "_" -) +DEFAULT_TABLE = "default" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE = "custom" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_TABLE_SYNC = "custom_sync" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_FILTER_TABLE = "custom_filter" + str(uuid.uuid4()).replace("-", "_") +CUSTOM_FILTER_TABLE_SYNC = "custom_filter_sync" + str(uuid.uuid4()).replace("-", "_") VECTOR_SIZE = 768 embeddings_service = DeterministicFakeEmbedding(size=VECTOR_SIZE) @@ -191,7 +196,7 @@ async def test_asimilarity_search(self, vs): results = await vs.asimilarity_search("foo", k=1) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = await vs.asimilarity_search("foo", k=1, filter="content = 'bar'") + results = await vs.asimilarity_search("foo", k=1, filter={"content": "bar"}) assert results == [Document(page_content="bar", id=ids[1])] async def test_asimilarity_search_score(self, vs): @@ -252,7 +257,7 @@ async def test_amax_marginal_relevance_search(self, vs): results = await vs.amax_marginal_relevance_search("bar") assert results[0] == Document(page_content="bar", id=ids[1]) results = await vs.amax_marginal_relevance_search( - "bar", filter="content = 'boo'" + "bar", filter={"content": "boo"} ) assert results[0] == Document(page_content="boo", id=ids[3]) @@ -298,6 +303,37 @@ async def test_vectorstore_with_metadata_filters( ) assert [doc.metadata["code"] for doc in docs] == expected_ids, test_filter + async def test_asimilarity_hybrid_search(self, vs): + results = await vs.asimilarity_search( + "foo", k=1, hybrid_search_config=HybridSearchConfig() + ) + assert len(results) == 1 + assert results == [Document(page_content="foo", id=ids[0])] + + results = await vs.asimilarity_search( + "bar", + k=1, + hybrid_search_config=HybridSearchConfig(), + ) + assert results[0] == Document(page_content="bar", id=ids[1]) + + results = await vs.asimilarity_search( + "foo", + k=1, + filter={"content": {"$ne": "baz"}}, + hybrid_search_config=HybridSearchConfig( + fusion_function=weighted_sum_ranking, + fusion_function_parameters={ + "primary_results_weight": 0.1, + "secondary_results_weight": 0.9, + "fetch_top_k": 10, + }, + primary_top_k=1, + secondary_top_k=1, + ), + ) + assert results == [Document(page_content="foo", id=ids[0])] + class TestVectorStoreSearchSync: @pytest.fixture(scope="module") @@ -398,7 +434,7 @@ def test_similarity_search(self, vs_custom): results = vs_custom.similarity_search("foo", k=1) assert len(results) == 1 assert results == [Document(page_content="foo", id=ids[0])] - results = vs_custom.similarity_search("foo", k=1, filter="mycontent = 'bar'") + results = vs_custom.similarity_search("foo", k=1, filter={"mycontent": "bar"}) assert results == [Document(page_content="bar", id=ids[1])] def test_similarity_search_score(self, vs_custom): @@ -420,7 +456,7 @@ def test_max_marginal_relevance_search(self, vs_custom): results = vs_custom.max_marginal_relevance_search("bar") assert results[0] == Document(page_content="bar", id=ids[1]) results = vs_custom.max_marginal_relevance_search( - "bar", filter="mycontent = 'boo'" + "bar", filter={"mycontent": "boo"} ) assert results[0] == Document(page_content="boo", id=ids[3]) @@ -465,3 +501,27 @@ def test_metadata_filter_negative_tests(self, vs_custom_filter_sync, test_filter docs = vs_custom_filter_sync.similarity_search( "meow", k=5, filter=test_filter ) + + def test_similarity_hybrid_search(self, vs_custom): + results = vs_custom.similarity_search( + "foo", k=1, hybrid_search_config=HybridSearchConfig() + ) + assert len(results) == 1 + assert results == [Document(page_content="foo", id=ids[0])] + + results = vs_custom.similarity_search( + "bar", + k=1, + hybrid_search_config=HybridSearchConfig(), + ) + assert results == [Document(page_content="bar", id=ids[1])] + + results = vs_custom.similarity_search( + "foo", + k=1, + filter={"mycontent": {"$ne": "baz"}}, + hybrid_search_config=HybridSearchConfig( + fusion_function=reciprocal_rank_fusion + ), + ) + assert results == [Document(page_content="foo", id=ids[0])]