I’m working on developing a social media bot that analyzes user posts and creates similar content based on their style. I want to combine RAG functionality with the general knowledge that LLMs already have built in. Right now my bot can answer questions about stored posts from my vector database, but when I ask basic questions like “How tall is the Eiffel Tower?” it says it doesn’t know the answer.
from dotenv import load_dotenv
import streamlit as st
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.schema import HumanMessage, AIMessage
load_dotenv()
# Setup OpenAI model
ai_model = ChatOpenAI(
model="gpt-3.5-turbo",
temperature=0.7
)
if 'current_user' not in st.session_state:
st.session_state['current_user'] = ''
st.text_input("Please enter your username:", key='current_user')
if not st.session_state['current_user']:
st.error("Username is required to continue")
st.stop()
st.info(f"Hello {st.session_state['current_user']}!")
# Setup embeddings
embedding_model = OpenAIEmbeddings()
# Vector database configuration
db_directory = "vector_storage"
user_collection = "social_posts"
# Initialize vector database
post_vectorstore = Chroma(
embedding_function=embedding_model,
persist_directory=db_directory,
collection_name=user_collection
)
def create_user_retriever(username):
user_retriever = post_vectorstore.as_retriever(
search_type="similarity",
search_kwargs={
"k": 3,
"filter": {"username": {"$eq": username}}
}
)
return user_retriever
current_retriever = create_user_retriever(st.session_state['current_user'])
if 'conversation_memory' not in st.session_state:
st.session_state.conversation_memory = ConversationBufferMemory(
memory_key="history",
return_messages=True,
output_key='answer'
)
bot_chain = ConversationalRetrievalChain.from_llm(
llm=ai_model,
retriever=current_retriever,
memory=st.session_state.conversation_memory,
return_source_documents=True
)
st.title("Social Media Content Bot")
for msg in st.session_state.conversation_memory.chat_memory.messages:
if isinstance(msg, HumanMessage):
with st.chat_message("user"):
st.write(msg.content)
elif isinstance(msg, AIMessage):
with st.chat_message("bot"):
st.write(msg.content)
if user_query := st.chat_input("Ask me anything..."):
with st.chat_message("user"):
st.write(user_query)
bot_response = bot_chain({"question": user_query})
reply_text = bot_response.get('answer', "Sorry, I couldn't process that.")
retrieved_docs = bot_response.get('source_documents', [])
with st.chat_message("bot"):
st.write(reply_text)
if retrieved_docs:
st.subheader("Related Posts Found:")
for i, document in enumerate(retrieved_docs, 1):
with st.expander(f"Reference {i}"):
st.write(f"**Text:** {document.page_content}")
st.write(f"**Details:** {document.metadata}")
What would be the best way to make this work for both types of questions? I’m currently using ConversationalRetrievalChain but maybe there’s a better approach.