ChromaDB keeps returning old embeddings instead of new document vectors in RAG system

I’m working on a RAG chatbot using gradio and having trouble with vector storage. When I upload the first document everything works fine and I get correct answers. But when I upload a second document the system creates new embeddings properly but ChromaDB still returns vectors from the old document instead of the new one.

from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceEmbeddings
import gradio as gr
import os

Here’s my document processing function:

def process_document(uploaded_file):
    try:
        loader = PyPDFLoader(uploaded_file)
        docs = loader.load_and_split()
        
        splitter = CharacterTextSplitter(
            chunk_size=800, 
            chunk_overlap=150
        )
        document_chunks = splitter.split_documents(docs)
        
        embedding_model = HuggingFaceEmbeddings()
        
        # Create new vector database
        db = Chroma.from_documents(
            documents=document_chunks, 
            embedding=embedding_model
        )
        
        return db
    except Exception as error:
        print(f"Error processing document: {error}")
        return None

def setup_qa_chain(file_path):
    try:
        prompt_text = "Based on this context: {context}\n\nAnswer the question: {question}"
        
        qa_prompt = PromptTemplate(
            input_variables=["context", "question"], 
            template=prompt_text
        )
        
        model = ChatGroq(
            temperature=0.1,
            model_name="llama3-groq-8b-8192-tool-use-preview"
        )
        
        retrieval_chain = RetrievalQA.from_chain_type(
            llm=model,
            chain_type="stuff",
            retriever=process_document(file_path).as_retriever(
                search_type='similarity',
                search_kwargs={"k": 5}
            ),
            return_source_documents=True
        )
        return retrieval_chain
    except Exception as error:
        print(f"Error setting up QA chain: {error}")
        return None

def chat_with_document(file_input, user_question):
    if file_input:
        qa_system = setup_qa_chain(file=file_input)
        answer = qa_system({"query": user_question})["result"]
        return answer
    else:
        return "Please upload a document first"

The problem seems to be here:

db = Chroma.from_documents(documents=document_chunks, embedding=embedding_model)
retriever = db.as_retriever(search_type='similarity', search_kwargs={"k": 5})
test_result = retriever.invoke("tell me about this document")

Even though I create a new Chroma instance it keeps pulling results from the previous document. How can I make sure ChromaDB uses only the new document vectors?

I’ve hit this exact problem building document processing pipelines. ChromaDB persists your vectors to disk by default, even when you think you’re creating a fresh instance.

The issue: Chroma.from_documents() creates a database stored in a default directory. Upload your second document? It adds to the existing collection instead of replacing it.

Here’s my fix:

def process_document(uploaded_file):
    try:
        loader = PyPDFLoader(uploaded_file)
        docs = loader.load_and_split()
        
        splitter = CharacterTextSplitter(
            chunk_size=800, 
            chunk_overlap=150
        )
        document_chunks = splitter.split_documents(docs)
        
        embedding_model = HuggingFaceEmbeddings()
        
        # Force a completely fresh database
        import tempfile
        import shutil
        
        temp_dir = tempfile.mkdtemp()
        db = Chroma.from_documents(
            documents=document_chunks, 
            embedding=embedding_model,
            persist_directory=temp_dir
        )
        
        return db
    except Exception as error:
        print(f"Error processing document: {error}")
        return None

Or force an in-memory database:

db = Chroma.from_documents(
    documents=document_chunks, 
    embedding=embedding_model,
    persist_directory=None
)

Learned this the hard way after hours debugging why my system kept mixing content from different documents. The temp directory approach works best when you need persistence during the session but want a clean slate each time.