How to transfer session context data as conversation history in Langchain with AWS Lex integration

I’m working on a chatbot that uses AWS Lex with Langchain. I managed to store chat history in session_attributes[‘sessionContext’] and it works fine. When I check the logs, I can see the conversation history is properly saved in session_attributes[‘sessionContext’]. However, there’s an issue with the {history} parameter in my prompt template. It only shows the current message from Lex instead of the full conversation history. I need to figure out how to pass the session_attributes data as the history parameter in my prompt.

import json
import logging
from langchain.llms import SagemakerEndpoint
from langchain.retrievers import KendraIndexRetriever
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

def finish_conversation(session_attrs, context_data, state, user_intent, response_msg):
    result = {
        'sessionState': {
            'activeContexts':[{
                'name': 'mainContext',
                'contextAttributes': context_data,
                'timeToLive': {
                    'timeToLiveInSeconds': 500,
                    'turnsToLive': 1
                }
            }],
            'sessionAttributes': session_attrs,
            'dialogAction': {
                'type': 'Close',
            },
            'intent': user_intent,
        },
        'messages': [{'contentType': 'PlainText', 'content': response_msg}]
    }
    return result

def continue_dialog(session_attrs, context_data, user_intent, response_msg):
    return {
        'sessionState': {
            'activeContexts':[{
                'name': 'mainContext',
                'contextAttributes': context_data,
                'timeToLive': {
                    'timeToLiveInSeconds': 500,
                    'turnsToLive': 1
                }
            }],
            'sessionAttributes': session_attrs,
            'dialogAction': {
                'type': 'Delegate',
            },
            'intent': user_intent,
        },
        'messages': [{'contentType': 'PlainText', 'content': response_msg}]
    }

def get_slot_value(slot_data):
    if slot_data is not None:
        return slot_data['value']['interpretedValue']
    else:
        return None

def call_ai_model(user_question, chat_history):
    model_endpoint = 'my-sagemaker-endpoint'
    aws_region = 'us-west-2'
    search_index = 'my-kendra-index'
    
    print("AI model chat history: ", chat_history)
    
    class ResponseHandler(ContentHandlerBase):
        content_type = "application/json"
        accepts = "application/json"
        
        def transform_input(self, input_text: str, params: dict) -> bytes:
            payload = json.dumps({"text_inputs": input_text, **params})
            return payload.encode('utf-8')
            
        def transform_output(self, model_output: bytes) -> str:
            result = json.loads(model_output.read().decode("utf-8"))
            return result["generated_texts"][0]
    
    handler = ResponseHandler()
    language_model = SagemakerEndpoint(
        endpoint_name=model_endpoint,
        region_name=aws_region,
        model_kwargs={"temperature": 1e-10, "max_length": 400},
        content_handler=handler
    )
    
    document_retriever = KendraIndexRetriever(
        kendraindex=search_index,
        awsregion=aws_region,
        return_source_documents=True
    )
    
    prompt_template = """
    Based on the provided context (in <context></context> tags) and previous conversation (in <chat></chat> tags), answer the user's question:
    ------
    <context>
    {context}
    </context>
    ------
    <chat>
    {history}
    </chat>
    ------
    Question: {question}
    Response:
    """
    
    qa_prompt = PromptTemplate(
        input_variables=["history", "context", "question"],
        template=prompt_template
    )
    
    qa_chain = RetrievalQA.from_chain_type(
        llm=language_model,
        chain_type='stuff',
        retriever=document_retriever,
        verbose=True,
        chain_type_kwargs={
            "verbose": True,
            "prompt": qa_prompt,
            "memory": ConversationBufferMemory(
                memory_key="history",
                input_key="question",
                return_messages=True
            ),
        }
    )
    
    final_result = qa_chain({'query': user_question, 'history': chat_history})
    ai_response = qa_chain(user_question)
    
    print("Final answer: ", ai_response['result'])
    return ai_response

def main_handler(lex_request, lambda_context):
    print("Received request: ", lex_request)
    logger.debug(lex_request)
    
    current_intent = lex_request['sessionState']['intent']
    session_attrs = lex_request['sessionState']['sessionAttributes']
    
    if 'sessionContext' not in session_attrs.keys():
        print("Starting new session")
        session_attrs['sessionContext'] = ''
    
    context_info = {}
    
    if current_intent['name'] == 'FallbackIntent':
        user_input = lex_request['inputTranscript'] + session_attrs['sessionContext']
        ai_result = call_ai_model(user_input, session_attrs['sessionContext'])
        
        result_data = json.dumps({
            'Response': ai_result['result'],
        })
        
        context_info['UserQuery'] = result_data
        logger.debug('AI Response={}'.format(result_data))
        
        current_intent['confirmationState'] = "Confirmed"
        current_intent['state'] = "Fulfilled"
        session_attrs['sessionContext'] = session_attrs['sessionContext'] + ' ' + lex_request['inputTranscript'] + ' ' + ai_result['result']
        
        print("Updated history: ", session_attrs['sessionContext'])
        return finish_conversation(session_attrs, context_info, 'Fulfilled', current_intent, ai_result['result'])
    
    user_query = get_slot_value(lex_request['sessionState']['intent']['slots']['Query'])
    print("User's question: ", user_query)
    
    if user_query or current_intent['name'] == 'FallbackIntent':
        ai_result = call_ai_model(user_query, session_attrs['sessionContext'])
        
        result_data = json.dumps({
            'Response': ai_result['result'],
        })
        
        logger.debug('AI Response={}'.format(result_data))
        current_intent['confirmationState'] = "Confirmed"
        current_intent['state'] = "Fulfilled"
        session_attrs['sessionContext'] = session_attrs['sessionContext'] + ' ' + lex_request['inputTranscript'] + ' ' + ai_result['result']
        
        print("Updated history: ", session_attrs['sessionContext'])
        return finish_conversation(session_attrs, context_info, 'Fulfilled', current_intent, ai_result['result'])

Your issue is that you’re passing session_attrs['sessionContext'] as history, but ConversationBufferMemory expects a different format. RetrievalQA chains don’t actually use the history you’re providing - they handle memory differently. I hit this same problem. You need to manually populate the memory before calling the chain. Instead of passing history directly to the chain call, initialize your ConversationBufferMemory and use memory.save_context() to add the previous conversations from sessionContext before running the chain. Or just skip the built-in memory completely. Remove the memory parameter from chain_type_kwargs and handle the history formatting yourself when you prepare the input. You can inject your sessionContext directly into the prompt that way.

You’re creating ConversationBufferMemory but not actually using it. The memory needs to be loaded with your session context before you call the chain.

I hit this same issue last year building a customer support bot. Here’s what fixed it:

def call_ai_model(user_question, chat_history):
    # ... your existing setup code ...
    
    # Create memory and load it with session context
    memory = ConversationBufferMemory(
        memory_key="history",
        input_key="question",
        return_messages=False  # Keep as string format
    )
    
    # Parse and add the chat history to memory
    if chat_history and chat_history.strip():
        # Split the history into alternating user/ai messages
        history_parts = chat_history.strip().split()
        for i in range(0, len(history_parts), 2):
            if i + 1 < len(history_parts):
                user_msg = history_parts[i]
                ai_msg = history_parts[i + 1]
                memory.chat_memory.add_user_message(user_msg)
                memory.chat_memory.add_ai_message(ai_msg)
    
    qa_chain = RetrievalQA.from_chain_type(
        llm=language_model,
        chain_type='stuff',
        retriever=document_retriever,
        verbose=True,
        chain_type_kwargs={
            "verbose": True,
            "prompt": qa_prompt,
            "memory": memory,
        }
    )
    
    # Now the history will be properly injected
    ai_response = qa_chain({"query": user_question})
    return ai_response

The fix is manually loading the memory object with your session context before creating the chain. This way {history} gets the full conversation instead of just the current message.

Also consider formatting your session context better when you store it. Instead of concatenating strings, use a delimiter or JSON structure to make parsing cleaner.

Your ConversationBufferMemory isn’t connecting to your session_attributes data - that’s why you’re only getting the current exchange instead of the full conversation history.

Don’t rely on the memory parameter in chain_type_kwargs. Instead, pass your session_attributes[‘sessionContext’] directly as the history value when calling the chain. Change your qa_chain call to: qa_chain({‘query’: user_question, ‘history’: session_attrs[‘sessionContext’]}).

Or you can populate the ConversationBufferMemory with your existing session context first. Parse your session_attrs[‘sessionContext’] string and use memory.save_context() to add the previous exchanges. Either way, your prompt template will get the complete conversation history instead of just the current interaction.

I’ve been down this exact rabbit hole before. You’re mixing two different approaches for handling conversation history and they’re fighting each other.

Your RetrievalQA chain expects the memory object to handle history injection, but you’re also manually passing history through query parameters. Pick one and stick with it.

Here’s what I’d do - ditch ConversationBufferMemory completely and handle history manually since you already have it working in session_attributes:

# Remove memory from chain_type_kwargs entirely
qa_chain = RetrievalQA.from_chain_type(
    llm=language_model,
    chain_type='stuff',
    retriever=document_retriever,
    verbose=True,
    chain_type_kwargs={
        "verbose": True,
        "prompt": qa_prompt,
        # No memory parameter here
    }
)

# Then call it like this:
ai_response = qa_chain({
    "query": user_question,
    "history": session_attrs['sessionContext']
})

The memory object was overriding your manual history parameter. Remove it and your prompt template gets the full sessionContext as expected.

This video explains conversation history mechanics really well:

Also, consider formatting your sessionContext better when you store it. Right now you’re just concatenating strings which makes parsing messy later if you need structured data.

you’re passing session_attrs[‘sessionContext’] as history, but the memory object isn’t picking it up. initialize your ConversationBufferMemory with the existing chat history from session context before you create the chain. use memory.chat_memory.add_user_message() and memory.chat_memory.add_ai_message() to rebuild the conversation from your stored sessionContext string.