Last active
May 30, 2024 12:49
-
-
Save ams0/507a02d489d32fc6b892d5c89bdf3dee to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Open source AI/ML workloads on Cloud Native Infrastructure: match made in heaven - Devtalks.ro - 2024 | |
# https://docs.google.com/presentation/d/16P7rNydNXLn6AA3rDL8ZB7yevkZ3lWLwTPizlV5OB3A/edit?usp=sharing | |
# | |
export RG=aksclusters | |
export AZURE_LOCATION=westeurope | |
export CLUSTER_NAME=kaito | |
export K8S_VERSION=1.29.4 | |
export ACR_NAME=aicommdaynl | |
az acr create --resource-group $RG --name $ACR_NAME --sku Basic | |
MODELNAME=mistral-7b-instruct | |
TAG="0.0.2" | |
# Copy over the mistral image to our ACR | |
az acr import -g $RG --name $ACR_NAME --source mcr.microsoft.com/aks/kaito/kaito-$MODELNAME:$TAG --image $MODELNAME:$TAG | |
MODELNAME=phi-2 | |
TAG="0.0.3" | |
az acr import -g $RG --name $ACR_NAME --source mcr.microsoft.com/aks/kaito/kaito-$MODELNAME:$TAG --image $MODELNAME:$TAG | |
az acr repository list -g $RG --name $ACR_NAME | |
Result | |
------------------- | |
mistral-7b-instruct | |
phi-2 | |
az aks create --location $AZURE_LOCATION \ | |
--resource-group $RG \ | |
--tier Standard \ | |
--name ${CLUSTER_NAME} \ | |
--node-count 2 \ | |
--node-vm-size Standard_B4ms \ | |
--enable-oidc-issuer \ | |
--enable-workload-identity \ | |
--enable-ai-toolchain-operator \ | |
--node-provisioning-mode auto \ | |
--enable-keda \ | |
--enable-vpa \ | |
--network-dataplan cilium \ | |
--network-plugin azure \ | |
--network-plugin-mode overlay \ | |
--kubernetes-version $K8S_VERSION \ | |
--attach-acr $ACR_NAME | |
az aks get-credentials --resource-group $RG --name ${CLUSTER_NAME} --overwrite-existing | |
# Get the Cluster Resource Group | |
export RG_ID=$(az group show -n $RG -o tsv --query id) | |
# Get the managed cluster Resource Group | |
export MC_RESOURCE_GROUP=$(az aks show --resource-group ${RG} --name ${CLUSTER_NAME} --query nodeResourceGroup -o tsv) | |
# Set a variable for the KAITO IDentity name | |
export KAITO_IDENTITY_NAME="ai-toolchain-operator-${CLUSTER_NAME}" | |
# Get the principal ID for the KAITO managed identity | |
export PRINCIPAL_ID=$(az identity show --name "${KAITO_IDENTITY_NAME}" --resource-group "${MC_RESOURCE_GROUP}" --query 'principalId' -o tsv) | |
# Grant contributor on the cluster resource group | |
az role assignment create --role "Contributor" --assignee "${PRINCIPAL_ID}" --scope $RG_ID | |
# Get the OIDC Issuer URL | |
export AKS_OIDC_ISSUER=$(az aks show --resource-group "${RG}" --name "${CLUSTER_NAME}" --query "oidcIssuerProfile.issuerUrl" -o tsv) | |
# Create the federation between the KAITO service account and the KAITO Azure Managed Identity | |
az identity federated-credential create --name "kaito-federated-identity" --identity-name "${KAITO_IDENTITY_NAME}" -g "${MC_RESOURCE_GROUP}" --issuer "${AKS_OIDC_ISSUER}" --subject system:serviceaccount:"kube-system:kaito-gpu-provisioner" --audience api://AzureADTokenExchange | |
# If you check the kaito-gpu-provisioner pod, you'll see it's in CrashLoopBackOff | |
# due to the identity not yet having been configured with proper rights. | |
kubectl get pods -l app=ai-toolchain-operator -n kube-system | |
# Restart the GPU provisioner to reload authorization | |
kubectl rollout restart deployment/kaito-gpu-provisioner -n kube-system | |
# Check the pod again to confirm it's now running | |
kubectl get pods -l app=ai-toolchain-operator -n kube-system | |
MACHINE_SIZE=Standard_NC64as_T4_v3 | |
MODELNAME=phi-2 | |
ACR_NAME=aicommdaynl | |
kubectl apply -f - <<EOF | |
apiVersion: kaito.sh/v1alpha1 | |
kind: Workspace | |
metadata: | |
annotations: | |
kaito.sh/enablelb: "True" | |
name: workspace-${MODELNAME} | |
resource: | |
instanceType: "${MACHINE_SIZE}" | |
labelSelector: | |
matchLabels: | |
apps: ${MODELNAME} | |
inference: | |
preset: | |
name: "${MODELNAME}" | |
presetOptions: | |
image: ${ACR_NAME}.azurecr.io/${MODELNAME}:${TAG} | |
EOF | |
watch kubectl get workspace,nodes,svc,pods,machines | |
helm repo add open-webui https://helm.openwebui.com/ | |
helm repo update | |
helm upgrade --install \ | |
--set ollama.enabled=false \ | |
--set ollamaUrls='http://workspace-phi-2:80/chat' \ | |
--set service.type=LoadBalancer \ | |
--set service.annotations."service\.beta\.kubernetes\.io/azure-dns-label-name"=openwebui \ | |
--set "extraEnvVars[0].name=WEBUI_AUTH" --set "extraEnvVars[0].value=none" \ | |
open-webui open-webui/open-webui | |
open http://openwebui.westeurope.cloudapp.azure.com/ | |
#kubectl run tmp-shell --rm -i --tty --overrides='{ "apiVersion": "v1", "spec": { "nodeSelector": { "kubernetes.io/os": "linux" } } }' --image nicolaka/netshoot -- /bin/bash | |
#curl -X POST "http://workspace-phi-2:80/chat" -H "accept: application/json" -H "Content-Type: application/json" -d '{"return_full_text": true, "generate_kwargs": {"max_length":1200},"prompt":"Who was the greatest Rome emperor?"}' | |
curl -X POST "http://devtalks.westeurope.cloudapp.azure.com/chat" -H "accept: application/json" -H "Content-Type: application/json" -d '{"return_full_text": true, "generate_kwargs": {"max_length":1200},"prompt":"Who was the greatest Rome emperor?"}' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment