-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvector.py
More file actions
74 lines (61 loc) · 2.16 KB
/
vector.py
File metadata and controls
74 lines (61 loc) · 2.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from langchain_community.vectorstores.pgvector import PGVector
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
import pandas as pd
import requests
import os
# Custom remote embedding using Ollama
class RemoteOllamaEmbeddings(Embeddings):
def __init__(self, model="mxbai-embed-large", url="http://localhost:11434"):
self.model = model
self.url = url
def embed_documents(self, texts):
embeddings = []
for text in texts:
res = requests.post(
f"{self.url}/api/embeddings",
json={"model": self.model, "prompt": text}
)
if res.status_code == 200:
embeddings.append(res.json()["embedding"])
else:
raise Exception(f"Embedding failed: {res.text}")
return embeddings
def embed_query(self, text):
return self.embed_documents([text])[0]
# Load data
df = pd.read_csv("realistic_restaurant_reviews.csv")
# Instantiate embedding function
embedding_function = RemoteOllamaEmbeddings()
# Postgres connection config
CONNECTION_STRING = "postgresql+psycopg2://postgres:postgres@localhost:5432/mydb"
# Unique collection name
COLLECTION_NAME = "restaurant_reviews_pg"
# Check if you need to populate the DB
add_documents = True # or make it smarter using a table check
if add_documents:
documents = []
ids = []
for i, row in df.iterrows():
document = Document(
page_content=row["Title"] + " " + row["Review"],
metadata={"rating": row["Rating"], "date": row["Date"]},
id=str(i)
)
ids.append(str(i))
documents.append(document)
# Save documents to PGVector
vectorstore = PGVector.from_documents(
embedding=embedding_function,
documents=documents,
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING
)
else:
# Load existing vectorstore
vectorstore = PGVector(
embedding_function=embedding_function,
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING
)
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})