Created
March 31, 2025 16:44
-
-
Save pamelafox/f4e42ec03b47889045fb73b8c146ff08 to your computer and use it in GitHub Desktop.
MCP Server for azure-search-openai-demo (Basic)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io | |
import os | |
import traceback | |
from enum import Enum | |
from azure.identity.aio import ( | |
AzureDeveloperCliCredential, | |
) | |
from azure.search.documents.aio import SearchClient | |
from azure.search.documents.models import ( | |
QueryType, | |
) | |
from azure.storage.blob.aio import BlobServiceClient | |
from mcp.server.fastmcp import FastMCP | |
from load_azd_env import load_azd_env | |
from prepdocs import ( | |
clean_key_if_exists, | |
setup_embeddings_service, | |
setup_file_processors, | |
setup_search_info, | |
) | |
from prepdocslib.filestrategy import UploadUserFileStrategy | |
from prepdocslib.listfilestrategy import File | |
load_azd_env() | |
mcp = FastMCP("Search my data") | |
AZURE_TENANT_ID = os.environ.get("AZURE_TENANT_ID") | |
azure_credential = AzureDeveloperCliCredential(tenant_id=AZURE_TENANT_ID, process_timeout=60) | |
class DocumentationTopic(str, Enum): | |
"""Enum for documentation topics.""" | |
MODELCONTEXTPROTOCOL = "Model Context Protocol" | |
FLASK = "Flask" | |
ESLINT = "ESLint" | |
search_indices = { | |
DocumentationTopic.MODELCONTEXTPROTOCOL: "gptkbindex", | |
DocumentationTopic.FLASK: "gptkbindex-flask", | |
DocumentationTopic.ESLINT: "gptkbindex-eslint" | |
} | |
@mcp.tool() | |
async def search_my_documentation(search_query: str, search_topic: DocumentationTopic) -> str: | |
"""Search the Azure Search index for documentation about the given search_query. | |
Args: | |
search_query: The search query to use for the search | |
search_topic: The topic to search for, which should be either "Model Context Protocol", "Flask", or "ESLint" | |
Returns: | |
The search results, formatted as a string | |
""" | |
try: | |
AZURE_SEARCH_SERVICE = os.environ["AZURE_SEARCH_SERVICE"] | |
#search_topic: str = "Model Context Protocol" | |
AZURE_SEARCH_INDEX = search_indices.get(search_topic) | |
if not AZURE_SEARCH_INDEX: | |
return "Error: Could not find appropriate search index for the given topic." | |
search_client = SearchClient( | |
endpoint=f"https://{AZURE_SEARCH_SERVICE}.search.windows.net", | |
index_name=AZURE_SEARCH_INDEX, | |
credential=azure_credential, | |
) | |
results = await search_client.search( | |
search_text=search_query, | |
top=10, | |
query_type=QueryType.SEMANTIC, | |
semantic_configuration_name="default", | |
semantic_query=search_query, | |
) | |
sources = "\n\n".join([f"[{doc['sourcepage']}]: {doc['content']}\n" async for doc in results]) | |
return sources | |
except Exception as e: | |
return f"Error: {e}" | |
@mcp.tool() | |
async def upload_to_my_documentation(filepath: str, search_topic: DocumentationTopic) -> str: | |
""" | |
Upload a file to the main storage account and ingest it into an Azure AI Search index. | |
Args: | |
filepath: The absolute path on the local filesystem to the file to upload | |
search_topic: The most related topic for the file, which should be either "Model Context Protocol", "Flask", or "ESLint" | |
Returns: | |
A message indicating success or failure | |
""" | |
try: | |
file_data = None | |
with open(filepath, "rb") as file: | |
file_data = file.read() | |
if not file_data: | |
return "Error: File is empty." | |
filename = os.path.basename(filepath) | |
# Get environment variables | |
AZURE_STORAGE_ACCOUNT = os.environ.get("AZURE_STORAGE_ACCOUNT") | |
AZURE_STORAGE_CONTAINER = os.environ.get("AZURE_STORAGE_CONTAINER") | |
USE_GPT4V = os.environ.get("USE_GPT4V", "").lower() == "true" | |
if not AZURE_STORAGE_ACCOUNT or not AZURE_STORAGE_CONTAINER: | |
return "Error: Storage account is not configured." | |
# Set up storage client | |
blob_service_client = BlobServiceClient( | |
f"https://{AZURE_STORAGE_ACCOUNT}.blob.core.windows.net", | |
credential=azure_credential, | |
) | |
container_client = blob_service_client.get_container_client(AZURE_STORAGE_CONTAINER) | |
# Upload the file | |
blob_client = container_client.get_blob_client(filename) | |
file_io = io.BytesIO(file_data) | |
file_io.name = filename | |
await blob_client.upload_blob(file_io, overwrite=True) | |
# Reset file pointer for indexing | |
file_io.seek(0) | |
# Set up ingester components | |
file_processors = setup_file_processors( | |
azure_credential=azure_credential, | |
document_intelligence_service=os.getenv("AZURE_DOCUMENTINTELLIGENCE_SERVICE"), | |
local_pdf_parser=os.getenv("USE_LOCAL_PDF_PARSER", "").lower() == "true", | |
local_html_parser=os.getenv("USE_LOCAL_HTML_PARSER", "").lower() == "true", | |
search_images=USE_GPT4V, | |
) | |
search_topic = DocumentationTopic.MODELCONTEXTPROTOCOL | |
search_index = search_indices.get(search_topic) | |
if not search_index: | |
return "Error: Could not find appropriate search index for the given topic." | |
search_info = await setup_search_info( | |
search_service=os.environ["AZURE_SEARCH_SERVICE"], | |
index_name=search_index, | |
azure_credential=azure_credential | |
) | |
text_embeddings_service = setup_embeddings_service( | |
azure_credential=azure_credential, | |
openai_host=os.getenv("OPENAI_HOST"), | |
openai_model_name=os.environ["AZURE_OPENAI_EMB_MODEL_NAME"], | |
openai_service=os.getenv("AZURE_OPENAI_SERVICE", ""), | |
openai_custom_url=os.getenv("AZURE_OPENAI_CUSTOM_URL", ""), | |
openai_deployment=os.getenv("AZURE_OPENAI_EMB_DEPLOYMENT", ""), | |
openai_dimensions=int(os.environ["AZURE_OPENAI_EMB_DIMENSIONS"]), | |
openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION", ""), | |
openai_key=clean_key_if_exists(os.getenv("OPENAI_API_KEY", "")), | |
openai_org=os.getenv("OPENAI_ORGANIZATION", ""), | |
disable_vectors=os.getenv("USE_VECTORS", "").lower() == "false", | |
) | |
# Create the ingester and process the file | |
ingester = UploadUserFileStrategy( | |
search_info=search_info, | |
embeddings=text_embeddings_service, | |
file_processors=file_processors | |
) | |
# Add the file to the search index as a global document (no ACLs) | |
await ingester.add_file(File(content=file_io, acls={}, url=blob_client.url)) | |
return f"File '{filename}' uploaded and processed successfully. It is now searchable in the index." | |
except Exception as e: | |
return f"Error uploading file: {str(e)}\nTraceback: {traceback.format_exc()}" | |
if __name__ == "__main__": | |
mcp.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment