I’m building a skillset in Azure AI Search and need help with skill ordering. My pipeline uses document extraction, OCR processing, image analysis, text merging, content splitting, and OpenAI embeddings.
Currently my flow works like this: extract document content and images, run OCR on images, analyze images for descriptions, merge OCR text with original content, then merge image descriptions, split into chunks, and finally create embeddings. Does this sequence make sense?
Here’s my current configuration:
{
"@odata.etag": "\"pipeline-v1\"",
"name": "content-processing-skillset",
"skills": [
{
"@odata.type": "#Microsoft.Skills.Util.DocumentExtractionSkill",
"name": "doc-extractor",
"description": "Pulls text and images from uploaded files",
"context": "/document",
"parsingMode": "default",
"dataToExtract": "contentAndMetadata",
"inputs": [
{
"name": "file_data",
"source": "/document/file_data"
}
],
"outputs": [
{
"name": "content",
"targetName": "document_content"
},
{
"name": "normalized_images",
"targetName": "processed_images"
}
],
"configuration": {
"imageAction": "generateNormalizedImages",
"normalizedImageMaxWidth": 1800,
"normalizedImageMaxHeight": 1800
}
},
{
"@odata.type": "#Microsoft.Skills.Vision.OcrSkill",
"description": "Reads text from images",
"context": "/document/processed_images/*",
"defaultLanguageCode": "en",
"detectOrientation": true,
"inputs": [
{
"name": "image",
"source": "/document/processed_images/*"
}
],
"outputs": [
{
"name": "text",
"targetName": "image_text"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Vision.ImageAnalysisSkill",
"context": "/document/processed_images/*",
"visualFeatures": ["tags", "description"],
"inputs": [
{
"name": "image",
"source": "/document/processed_images/*"
}
],
"outputs": [
{
"name": "description",
"targetName": "image_description"
},
{
"name": "tags",
"targetName": "image_tags"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.MergeSkill",
"description": "Combines document text with OCR results",
"context": "/document",
"insertPreTag": " ",
"insertPostTag": " ",
"inputs": [
{
"name": "text",
"source": "/document/document_content"
},
{
"name": "itemsToInsert",
"source": "/document/processed_images/*/image_text"
}
],
"outputs": [
{
"name": "mergedText",
"targetName": "content_with_ocr"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.MergeSkill",
"description": "Adds image descriptions to merged content",
"context": "/document",
"inputs": [
{
"name": "text",
"source": "/document/content_with_ocr"
},
{
"name": "itemsToInsert",
"source": "/document/processed_images/*/image_description"
}
],
"outputs": [
{
"name": "mergedText",
"targetName": "complete_content"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.SplitSkill",
"description": "Breaks content into chunks for embedding",
"context": "/document",
"defaultLanguageCode": "en",
"textSplitMode": "pages",
"maximumPageLength": 2500,
"pageOverlapLength": 150,
"inputs": [
{
"name": "text",
"source": "/document/complete_content"
}
],
"outputs": [
{
"name": "textItems",
"targetName": "text_chunks"
}
]
},
{
"@odata.type": "#Microsoft.Skills.Text.AzureOpenAIEmbeddingSkill",
"description": "Creates vector embeddings for search",
"context": "/document/text_chunks/*",
"resourceUri": "https://my-openai-service.openai.azure.com",
"deploymentId": "text-embedding-ada-002",
"inputs": [
{
"name": "text",
"source": "/document/text_chunks/*"
}
],
"outputs": [
{
"name": "embedding",
"targetName": "vector_embedding"
}
]
}
]
}
Any feedback on this approach would be helpful!