Skip to content

Instantly share code, notes, and snippets.

@yorek
Created November 9, 2024 01:02
Show Gist options
  • Save yorek/01d03d8db46df0f463f0b8af1b0d4fa2 to your computer and use it in GitHub Desktop.
Save yorek/01d03d8db46df0f463f0b8af1b0d4fa2 to your computer and use it in GitHub Desktop.
Vectorize a document and ingest it into Azure SQL using Azure Logic Apps
{
"definition": {
"$schema": "https://schema.management.azure.com/providers/Microsoft.Logic/schemas/2016-06-01/workflowdefinition.json#",
"actions": {
"Chunk_text": {
"type": "ChunkText",
"inputs": {
"chunkingStrategy": "TokenSize",
"text": "@body('Parse_a_document')?['text']",
"EncodingModel": "cl100k_base",
"PageOverlapLength": 0,
"TokenSize": 5000
},
"runAfter": {
"Parse_a_document": [
"SUCCEEDED"
]
}
},
"Azure_OpenAI_-_Get_multiple_embeddings": {
"type": "ServiceProvider",
"inputs": {
"parameters": {
"deploymentId": "@parameters('OpenAI_TextEmbedding_Deployment_Identifier_ingesttosql')",
"input": "@body('Chunk_text')?['value']"
},
"serviceProviderConfiguration": {
"connectionName": "openai",
"operationId": "getArrayEmbeddings",
"serviceProviderId": "/serviceProviders/openai"
}
},
"runAfter": {
"Chunk_text": [
"SUCCEEDED"
]
}
},
"Parse_a_document": {
"type": "ParseDocument",
"inputs": {
"content": "@body('Azure_Blob_-_Read_blob_content')?['content']"
},
"runAfter": {
"Azure_Blob_-_Read_blob_content": [
"SUCCEEDED"
]
}
},
"Azure_Blob_-_Read_blob_content": {
"type": "ServiceProvider",
"inputs": {
"parameters": {
"containerName": "@parameters('BlobPath')",
"blobName": "@triggerBody()?['name']"
},
"serviceProviderConfiguration": {
"connectionName": "AzureBlob",
"operationId": "readBlob",
"serviceProviderId": "/serviceProviders/AzureBlob"
}
},
"runAfter": {}
},
"Select_embeddings_and_map_to_AI_Search_schema": {
"type": "Select",
"inputs": {
"from": "@range(0, length(body('Chunk_text')['value']))",
"select": {
"chunk": "@body('Chunk_text')['value'][item()]",
"filename": "@triggerBody()?['name']",
"embedding": "@body('Azure_OpenAI_-_Get_multiple_embeddings')['embeddings'][item()]",
"chunkid": "@concat(triggerBody()?['name'], item())"
}
},
"runAfter": {
"Azure_OpenAI_-_Get_multiple_embeddings": [
"SUCCEEDED"
]
}
},
"For_each": {
"type": "Foreach",
"foreach": "@body('Select_embeddings_and_map_to_AI_Search_schema')",
"actions": {
"SQL_-_Insert_vectors_in_table": {
"type": "ServiceProvider",
"description": "This step inserts the vector embeddings, content and other metadata into the SQL vector database. Update the Query and Query parameters based on your SQL table schema",
"inputs": {
"parameters": {
"query": "INSERT INTO resumedocs (chunkid, filename, chunk, embedding)\n VALUES (@chunkid, @filename, @chunk, CAST(@embedding as VECTOR(1536)))",
"queryParameters": {
"chunkid": "@{item()['chunkid']}",
"filename": "@{item()['filename']}",
"chunk": "@{item()['chunk']}",
"embedding": "@{item()['embedding']}"
}
},
"serviceProviderConfiguration": {
"connectionName": "sql",
"operationId": "executeQuery",
"serviceProviderId": "/serviceProviders/sql"
}
}
}
},
"runAfter": {
"Select_embeddings_and_map_to_AI_Search_schema": [
"SUCCEEDED"
]
}
}
},
"contentVersion": "1.0.0.0",
"outputs": {},
"triggers": {
"Azure_Blob_-_When_a_blob_is_added_or_updated": {
"type": "ServiceProvider",
"inputs": {
"parameters": {
"path": "@parameters('BlobPath')"
},
"serviceProviderConfiguration": {
"connectionName": "AzureBlob",
"operationId": "whenABlobIsAddedOrModified",
"serviceProviderId": "/serviceProviders/AzureBlob"
}
}
}
}
},
"kind": "stateful"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment