-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvector_store.py
More file actions
58 lines (48 loc) · 1.69 KB
/
vector_store.py
File metadata and controls
58 lines (48 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from typing import List
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from config import settings
def get_embeddings() -> OpenAIEmbeddings:
return OpenAIEmbeddings(
model=settings.embedding_model,
openai_api_key=settings.openai_api_key,
)
def build_vector_store(chunks: List[Document]) -> Chroma:
"""
Embed chunks and persist them in ChromaDB.
Safe to call multiple times — recreates the collection each run.
"""
embeddings = get_embeddings()
vector_store = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
collection_name=settings.chroma_collection,
persist_directory=settings.chroma_persist_dir,
)
print(f"✅ Stored {len(chunks)} chunks in ChromaDB "
f"→ {settings.chroma_persist_dir}")
return vector_store
def load_vector_store() -> Chroma:
"""
Load an existing persisted ChromaDB collection.
Raises RuntimeError if the collection doesn't exist yet.
"""
import os
if not os.path.exists(settings.chroma_persist_dir):
raise RuntimeError(
"ChromaDB not found. Run indexing first: "
"python main.py --index"
)
embeddings = get_embeddings()
return Chroma(
collection_name=settings.chroma_collection,
embedding_function=embeddings,
persist_directory=settings.chroma_persist_dir,
)
def get_retriever(vector_store: Chroma):
"""Return a LangChain retriever ready for use in the RAG chain."""
return vector_store.as_retriever(
search_type="similarity",
search_kwargs={"k": settings.retrieval_k},
)