Added basic RAG example [skip ci]

ankane · ankane · commit e19df465f074 · 2025-03-17T15:58:59.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ venv/
 *.pyc
 __pycache__
 .pytest_cache/
+examples/rag/README.md
diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ And follow the instructions for your database library:
 
 Or check out some examples:
 
+- [Retrieval-augmented generation](https://github.com/pgvector/pgvector-python/blob/master/examples/rag/example.py) with Ollama
 - [Embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/openai/example.py) with OpenAI
 - [Binary embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/cohere/example.py) with Cohere
 - [Sentence embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/sentence_transformers/example.py) with SentenceTransformers
diff --git a/examples/rag/example.py b/examples/rag/example.py
@@ -0,0 +1,65 @@
+# Run:
+# ollama pull llama3.2
+# ollama pull nomic-embed-text
+# ollama serve
+
+import numpy as np
+import ollama
+from pathlib import Path
+from pgvector.psycopg import register_vector
+import psycopg
+import urllib.request
+
+query = 'What index types are supported?'
+load_data = True
+
+conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
+conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
+register_vector(conn)
+
+if load_data:
+    # get data
+    url = 'https://raw.githubusercontent.com/pgvector/pgvector/refs/heads/master/README.md'
+    dest = Path(__file__).parent / 'README.md'
+    if not dest.exists():
+        urllib.request.urlretrieve(url, dest)
+
+    with open(dest, encoding='utf-8') as f:
+        doc = f.read()
+
+    # generate chunks
+    # TODO improve chunking
+    # TODO remove markdown
+    chunks = doc.split('\n## ')
+
+    # embed chunks
+    # nomic-embed-text has task instruction prefix
+    input = ['search_document: ' + chunk for chunk in chunks]
+    embeddings = ollama.embed(model='nomic-embed-text', input=input).embeddings
+
+    # create table
+    conn.execute('DROP TABLE IF EXISTS chunks')
+    conn.execute('CREATE TABLE chunks (id bigserial PRIMARY KEY, content text, embedding vector(768))')
+
+    # store chunks
+    cur = conn.cursor()
+    with cur.copy('COPY chunks (content, embedding) FROM STDIN WITH (FORMAT BINARY)') as copy:
+        copy.set_types(['text', 'vector'])
+
+        for content, embedding in zip(chunks, embeddings):
+            copy.write_row([content, embedding])
+
+# embed query
+# nomic-embed-text has task instruction prefix
+input = 'search_query: ' + query
+embedding = ollama.embed(model='nomic-embed-text', input=input).embeddings[0]
+
+# retrieve chunks
+result = conn.execute('SELECT content FROM chunks ORDER BY embedding <=> %s LIMIT 5', (np.array(embedding),)).fetchall()
+context = '\n\n'.join([row[0] for row in result])
+
+# get answer
+# TODO improve prompt
+prompt = f'Answer this question: {query}\n\n{context}'
+response = ollama.generate(model='llama3.2', prompt=prompt).response
+print(response)
diff --git a/examples/rag/requirements.txt b/examples/rag/requirements.txt
@@ -0,0 +1,3 @@
+ollama
+pgvector
+psycopg[binary]

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+ollama`
	`2`	`+pgvector`
	`3`	`+psycopg[binary]`