Hey everyone, I’m pretty new to Langchain and need some guidance.
I’m building an AI assistant for a movie theater app called CinemaHub. The theater has two screening rooms (Room X and Room Y) and shows movies twice daily - matinee and evening shows for specific date periods.
I want the assistant to help users with movie info, pricing, showtimes, seat availability, and age ratings. Eventually I’d like it to handle bookings and cancellations too, but right now I’m stuck on basic retrieval.
I thought about using function calling but it seems like most of that stuff costs money. Am I wrong about this?
I’m using Mistral 7B as my chat model and built this setup:
query_pipeline = pipeline(
model=mistral_llm,
tokenizer=token_processor,
task="text-generation",
temperature=0.0,
do_sample=False,
repetition_penalty=1.1,
return_full_text=False,
max_new_tokens=800,
)
query_llm = HuggingFacePipeline(pipeline=query_pipeline)
answer_pipeline = pipeline(
model=mistral_llm,
tokenizer=token_processor,
task="text-generation",
do_sample=True,
repetition_penalty=1.1,
return_full_text=False,
max_new_tokens=4000,
)
answer_llm = HuggingFacePipeline(pipeline=answer_pipeline)
Then I chain everything together with memory:
DOC_TEMPLATE = PromptTemplate.from_template(template="{page_content}")
def merge_docs(documents, doc_template=DOC_TEMPLATE, separator="\n\n"):
formatted_docs = [format_document(doc, doc_template) for doc in documents]
return separator.join(formatted_docs)
chat_memory = ConversationBufferMemory(
return_messages=True, output_key="response", input_key="user_query"
)
with_memory = RunnablePassthrough.assign(
history=RunnableLambda(chat_memory.load_memory_variables) | itemgetter("history"),
)
rewrite_query = {
"rewritten_query": {
"user_query": lambda x: x["user_query"],
"history": lambda x: get_buffer_string(x["history"]),
}
| QUERY_REWRITE_PROMPT
| query_llm,
}
get_docs = {
"documents": itemgetter("rewritten_query") | doc_retriever,
"user_query": lambda x: x["rewritten_query"],
}
format_inputs = {
"context": lambda x: merge_docs(x["documents"]),
"user_query": itemgetter("user_query"),
}
generate_response = {
"response": format_inputs | RESPONSE_PROMPT | answer_llm,
"user_query": itemgetter("user_query"),
"context": format_inputs["context"]
}
rag_chain = with_memory | rewrite_query | get_docs | generate_response
I call it with this function:
def ask_rag_system(user_question, chain, memory):
query_input = {"user_query": user_question}
output = chain.invoke(query_input)
print(output)
memory.save_context(query_input, {"response": output["response"]})
return output
For my data, I have a JSON file with 21 movies that looks like this:
[
{
"title": "Inception Dreams",
"tagline": "Reality Is Just The Beginning",
"plot": "A mind-bending thriller about a team of specialists who enter people's dreams to steal secrets. When they're tasked with the impossible mission of planting an idea instead of stealing one, reality and dreams blur together.",
"actors": [
"Leonardo DiCaprio",
"Marion Cotillard",
"Tom Hardy"
],
"category": "Sci-Fi Thriller",
"duration": "148 minutes",
"showDates": {
"from": "2024-07-01",
"to": "2024-07-15"
},
"matinee": {
"startTime": "15:30",
"regularPrice": 28,
"discountPrice": 15
},
"evening": {
"startTime": "20:30",
"regularPrice": 38,
"discountPrice": 20
},
"standardSeats": {
"open": 180,
"booked": 145
},
"accessibleSeats": {
"open": 9,
"booked": 6
},
"allSeats": {
"open": 189,
"booked": 151
},
"screeningRoom": "Room X",
"minAge": "16+"
}
]
I convert each movie to text like this and create documents:
def create_movie_text(movie_data):
movie_text = f"""Movie Information: {movie_data['title']}
Tagline: {movie_data['tagline']}
Plot Summary: {movie_data['plot']}
Starring: {', '.join(movie_data['actors'])}
Genre: {movie_data['category']}
Runtime: {movie_data['duration']}
Showing from {movie_data['showDates']['from']} to {movie_data['showDates']['to']}
Screening Times:
- Matinee at {movie_data['matinee']['startTime']}: Regular tickets {movie_data['matinee']['regularPrice']}€, Discount tickets {movie_data['matinee']['discountPrice']}€
- Evening at {movie_data['evening']['startTime']}: Regular tickets {movie_data['evening']['regularPrice']}€, Discount tickets {movie_data['evening']['discountPrice']}€
Seating Availability:
Standard seats: {movie_data['standardSeats']['open']} available, {movie_data['standardSeats']['booked']} sold
Accessible seats: {movie_data['accessibleSeats']['open']} available, {movie_data['accessibleSeats']['booked']} sold
Screening in {movie_data['screeningRoom']}
Age restriction: {movie_data['minAge']}"""
return movie_text
chunker = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
vector_db = Chroma.from_documents(movie_docs, embedding_model)
doc_retriever = vector_db.as_retriever(k=4)
My main issues:
- The retrieval doesn’t seem to find the right documents
- Sometimes the model gets confused with long prompts
- I’m not sure if I’m doing something fundamentally wrong
I’ve seen others do this with more complex data and it works fine. What am I missing here? Any help would be awesome!