forked from pgvector/pgvector-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathopenai_embeddings.py
More file actions
41 lines (30 loc) · 1.31 KB
/
openai_embeddings.py
File metadata and controls
41 lines (30 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import openai
from pgvector.sqlalchemy import Vector
from sentence_transformers import SentenceTransformer
from sqlalchemy import create_engine, insert, select, text, Integer, String, Text
from sqlalchemy.orm import declarative_base, mapped_column, Session
engine = create_engine('postgresql+psycopg://localhost/pgvector_example')
with engine.connect() as conn:
conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
conn.commit()
Base = declarative_base()
class Document(Base):
__tablename__ = 'document'
id = mapped_column(Integer, primary_key=True)
content = mapped_column(Text)
embedding = mapped_column(Vector(1536))
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
input = [
'The dog is barking',
'The cat is purring',
'The bear is growling'
]
embeddings = [v['embedding'] for v in openai.Embedding.create(input=input, model='text-embedding-ada-002')['data']]
documents = [dict(content=input[i], embedding=embedding) for i, embedding in enumerate(embeddings)]
session = Session(engine)
session.execute(insert(Document), documents)
doc = session.get(Document, 1)
neighbors = session.scalars(select(Document).filter(Document.id != doc.id).order_by(Document.embedding.max_inner_product(doc.embedding)).limit(5))
for neighbor in neighbors:
print(neighbor.content)