Skip to content

Commit e19df46

Browse files
committed
Added basic RAG example [skip ci]
1 parent a8f2a5f commit e19df46

File tree

4 files changed

+70
-0
lines changed

4 files changed

+70
-0
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ venv/
66
*.pyc
77
__pycache__
88
.pytest_cache/
9+
examples/rag/README.md

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ And follow the instructions for your database library:
2727

2828
Or check out some examples:
2929

30+
- [Retrieval-augmented generation](https://github.com/pgvector/pgvector-python/blob/master/examples/rag/example.py) with Ollama
3031
- [Embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/openai/example.py) with OpenAI
3132
- [Binary embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/cohere/example.py) with Cohere
3233
- [Sentence embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/sentence_transformers/example.py) with SentenceTransformers

examples/rag/example.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Run:
2+
# ollama pull llama3.2
3+
# ollama pull nomic-embed-text
4+
# ollama serve
5+
6+
import numpy as np
7+
import ollama
8+
from pathlib import Path
9+
from pgvector.psycopg import register_vector
10+
import psycopg
11+
import urllib.request
12+
13+
query = 'What index types are supported?'
14+
load_data = True
15+
16+
conn = psycopg.connect(dbname='pgvector_example', autocommit=True)
17+
conn.execute('CREATE EXTENSION IF NOT EXISTS vector')
18+
register_vector(conn)
19+
20+
if load_data:
21+
# get data
22+
url = 'https://raw.githubusercontent.com/pgvector/pgvector/refs/heads/master/README.md'
23+
dest = Path(__file__).parent / 'README.md'
24+
if not dest.exists():
25+
urllib.request.urlretrieve(url, dest)
26+
27+
with open(dest, encoding='utf-8') as f:
28+
doc = f.read()
29+
30+
# generate chunks
31+
# TODO improve chunking
32+
# TODO remove markdown
33+
chunks = doc.split('\n## ')
34+
35+
# embed chunks
36+
# nomic-embed-text has task instruction prefix
37+
input = ['search_document: ' + chunk for chunk in chunks]
38+
embeddings = ollama.embed(model='nomic-embed-text', input=input).embeddings
39+
40+
# create table
41+
conn.execute('DROP TABLE IF EXISTS chunks')
42+
conn.execute('CREATE TABLE chunks (id bigserial PRIMARY KEY, content text, embedding vector(768))')
43+
44+
# store chunks
45+
cur = conn.cursor()
46+
with cur.copy('COPY chunks (content, embedding) FROM STDIN WITH (FORMAT BINARY)') as copy:
47+
copy.set_types(['text', 'vector'])
48+
49+
for content, embedding in zip(chunks, embeddings):
50+
copy.write_row([content, embedding])
51+
52+
# embed query
53+
# nomic-embed-text has task instruction prefix
54+
input = 'search_query: ' + query
55+
embedding = ollama.embed(model='nomic-embed-text', input=input).embeddings[0]
56+
57+
# retrieve chunks
58+
result = conn.execute('SELECT content FROM chunks ORDER BY embedding <=> %s LIMIT 5', (np.array(embedding),)).fetchall()
59+
context = '\n\n'.join([row[0] for row in result])
60+
61+
# get answer
62+
# TODO improve prompt
63+
prompt = f'Answer this question: {query}\n\n{context}'
64+
response = ollama.generate(model='llama3.2', prompt=prompt).response
65+
print(response)

examples/rag/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ollama
2+
pgvector
3+
psycopg[binary]

0 commit comments

Comments
 (0)