I built a Streamlit application that has a chatbot powered by Langchain, OpenAI, and LangSmith. The bot works fine and I can see all the conversation runs in the LangSmith dashboard. However, I added a feedback system with thumbs up/down buttons after each bot response, but these feedback scores never show up in LangSmith even though the button clicks work properly. The runs are being tracked correctly; just the user feedback part is not getting recorded. What am I doing wrong?
import os
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferWindowMemory
from langchain.document_loaders import DirectoryLoader
from langchain.callbacks import collect_runs
from langsmith import Client
from streamlit_feedback import streamlit_feedback
from dotenv import load_dotenv
import uuid
load_dotenv()
@st.cache_resource
def build_qa_system():
model = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.1, openai_api_key=os.environ['OPENAI_API_KEY'])
loader = DirectoryLoader('docs/')
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(loader.load())
vector_db = FAISS.from_documents(documents=chunks, embedding=OpenAIEmbeddings(chunk_size=1000))
memory = ConversationBufferWindowMemory(memory_key="history", return_messages=True, k=5)
retrieval_chain = RetrievalQA.from_chain_type(
llm=model,
chain_type="stuff",
retriever=vector_db.as_retriever(),
memory=memory
)
return retrieval_chain
def process_feedback(user_rating, emoji_icon=None):
st.toast(f"Rating received: {user_rating}", icon=emoji_icon)
return user_rating.update({"extra_data": 456})
def save_user_feedback():
rating_data = st.session_state.get("user_rating")
current_run_id = st.session_state.current_run
rating_scale = {
"š": 1.0,
"š": 0.0,
"š": 0.8,
"š": 0.2
}
final_score = rating_scale.get(rating_data.get("score"))
if final_score is not None:
feedback_category = f"thumbs {rating_data.get('score')}"
try:
saved_feedback = langsmith_client.create_feedback(
run_id=current_run_id,
feedback_type=feedback_category,
score=final_score
)
st.session_state.saved_rating = {
"rating_id": str(saved_feedback.id),
"final_score": final_score
}
st.success(f"Feedback saved: {saved_feedback.id}")
except Exception as error:
st.error(f"Could not save feedback: {error}")
else:
st.warning("Please provide valid feedback")
langsmith_client = Client()
qa_system = build_qa_system()
if "chat_history" not in st.session_state:
st.session_state["chat_history"] = []
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
user_question = st.chat_input("Ask me anything...")
if user_question:
st.session_state.chat_history.append({"role": "user", "content": user_question})
with st.chat_message("user"):
st.markdown(user_question)
with collect_runs() as run_collector:
bot_response = qa_system({"query": user_question})
if run_collector.traced_runs:
current_run = run_collector.traced_runs[0].id
st.session_state.current_run = current_run
else:
st.error("Failed to collect run data")
current_run = None
bot_answer = bot_response['result']
st.session_state.chat_history.append({"role": "assistant", "content": bot_answer})
with st.chat_message("assistant"):
st.markdown(bot_answer)
if bot_answer:
user_rating = streamlit_feedback(
feedback_type="thumbs",
key=f"rating_{current_run}"
)
if user_rating:
st.session_state["user_rating"] = user_rating
save_user_feedback()