forked from pgvector/pgvector-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsentence_embeddings.py
More file actions
42 lines (30 loc) · 1.27 KB
/
sentence_embeddings.py
File metadata and controls
42 lines (30 loc) · 1.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from pgvector.sqlalchemy import Vector
from sentence_transformers import SentenceTransformer
from sqlalchemy import create_engine, insert, select, text, Integer, String, Text
from sqlalchemy.orm import declarative_base, mapped_column, Session
engine = create_engine('postgresql+psycopg://localhost/pgvector_example')
with engine.connect() as conn:
conn.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))
conn.commit()
Base = declarative_base()
class Document(Base):
__tablename__ = 'document'
id = mapped_column(Integer, primary_key=True)
content = mapped_column(Text)
embedding = mapped_column(Vector(384))
Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
sentences = [
'The dog is barking',
'The cat is purring',
'The bear is growling'
]
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(sentences)
documents = [dict(content=sentences[i], embedding=embedding) for i, embedding in enumerate(embeddings)]
session = Session(engine)
session.execute(insert(Document), documents)
doc = session.get(Document, 1)
neighbors = session.scalars(select(Document).filter(Document.id != doc.id).order_by(Document.embedding.cosine_distance(doc.embedding)).limit(5))
for neighbor in neighbors:
print(neighbor.content)