I need help with creating a streaming response system for my Azure Function that connects to Azure OpenAI. My setup involves a Python-based Azure Function that talks to OpenAI, a .NET Core API as middleware, and an Angular frontend. Right now my function works but only sends complete responses after processing is done. I want to stream the responses in real-time so users can see the text being generated as it happens. What changes do I need to make to enable this streaming behavior?
import azure.functions as func
from azure.identity import DefaultAzureCredential
from openai import AzureOpenAI
import json
def main(request: func.HttpRequest, ctx: func.Context):
try:
request_data = request.get_json()
deployment_name = request_data['model_deployment']
endpoint_url = request_data['endpoint']
credential = DefaultAzureCredential()
access_token = credential.get_token("https://cognitiveservices.azure.com/.default")
openai_client = AzureOpenAI(
api_version="2023-07-01-preview",
azure_endpoint=endpoint_url,
api_key=access_token.token
)
chat_response = openai_client.chat.completions.create(
model=deployment_name,
messages=request_data['conversation'],
temperature=0.1,
max_tokens=800,
top_p=0.9,
frequency_penalty=0.2,
presence_penalty=0.1,
stop=None
)
result_text = chat_response.choices[0].message.content
result_json = json.dumps({"response": result_text})
return func.HttpResponse(
result_json,
mimetype="application/json",
status_code=200
)
except Exception as error:
error_json = json.dumps({"error": str(error)})
return func.HttpResponse(
error_json,
mimetype="application/json",
status_code=500
)