|
| 1 | +# Run: |
| 2 | +# ollama pull llama3.2 |
| 3 | +# ollama pull nomic-embed-text |
| 4 | +# ollama serve |
| 5 | + |
| 6 | +import numpy as np |
| 7 | +import ollama |
| 8 | +from pathlib import Path |
| 9 | +from pgvector.psycopg import register_vector |
| 10 | +import psycopg |
| 11 | +import urllib.request |
| 12 | + |
| 13 | +query = 'What index types are supported?' |
| 14 | +load_data = True |
| 15 | + |
| 16 | +conn = psycopg.connect(dbname='pgvector_example', autocommit=True) |
| 17 | +conn.execute('CREATE EXTENSION IF NOT EXISTS vector') |
| 18 | +register_vector(conn) |
| 19 | + |
| 20 | +if load_data: |
| 21 | + # get data |
| 22 | + url = 'https://raw.githubusercontent.com/pgvector/pgvector/refs/heads/master/README.md' |
| 23 | + dest = Path(__file__).parent / 'README.md' |
| 24 | + if not dest.exists(): |
| 25 | + urllib.request.urlretrieve(url, dest) |
| 26 | + |
| 27 | + with open(dest, encoding='utf-8') as f: |
| 28 | + doc = f.read() |
| 29 | + |
| 30 | + # generate chunks |
| 31 | + # TODO improve chunking |
| 32 | + # TODO remove markdown |
| 33 | + chunks = doc.split('\n## ') |
| 34 | + |
| 35 | + # embed chunks |
| 36 | + # nomic-embed-text has task instruction prefix |
| 37 | + input = ['search_document: ' + chunk for chunk in chunks] |
| 38 | + embeddings = ollama.embed(model='nomic-embed-text', input=input).embeddings |
| 39 | + |
| 40 | + # create table |
| 41 | + conn.execute('DROP TABLE IF EXISTS chunks') |
| 42 | + conn.execute('CREATE TABLE chunks (id bigserial PRIMARY KEY, content text, embedding vector(768))') |
| 43 | + |
| 44 | + # store chunks |
| 45 | + cur = conn.cursor() |
| 46 | + with cur.copy('COPY chunks (content, embedding) FROM STDIN WITH (FORMAT BINARY)') as copy: |
| 47 | + copy.set_types(['text', 'vector']) |
| 48 | + |
| 49 | + for content, embedding in zip(chunks, embeddings): |
| 50 | + copy.write_row([content, embedding]) |
| 51 | + |
| 52 | +# embed query |
| 53 | +# nomic-embed-text has task instruction prefix |
| 54 | +input = 'search_query: ' + query |
| 55 | +embedding = ollama.embed(model='nomic-embed-text', input=input).embeddings[0] |
| 56 | + |
| 57 | +# retrieve chunks |
| 58 | +result = conn.execute('SELECT content FROM chunks ORDER BY embedding <=> %s LIMIT 5', (np.array(embedding),)).fetchall() |
| 59 | +context = '\n\n'.join([row[0] for row in result]) |
| 60 | + |
| 61 | +# get answer |
| 62 | +# TODO improve prompt |
| 63 | +prompt = f'Answer this question: {query}\n\n{context}' |
| 64 | +response = ollama.generate(model='llama3.2', prompt=prompt).response |
| 65 | +print(response) |
0 commit comments