Last active
March 11, 2025 20:05
-
-
Save dmattia/b7b72a699def0494d9b181f63fada949 to your computer and use it in GitHub Desktop.
Sombra Helm Deployment with LLM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
provider "aws" { | |
region = "eu-west-1" | |
} | |
module "vpc" { | |
source = "terraform-aws-modules/vpc/aws" | |
version = "~> 5.1.2" | |
name = "sombra-vpc" | |
cidr = "10.0.0.0/16" | |
azs = ["eu-west-1a", "eu-west-1b"] | |
private_subnets = ["10.0.101.0/24", "10.0.102.0/24"] | |
public_subnets = ["10.0.201.0/24", "10.0.202.0/24"] | |
enable_nat_gateway = true | |
enable_dns_hostnames = true | |
enable_dns_support = true | |
} | |
module "eks_cluster" { | |
source = "terraform-aws-modules/eks/aws" | |
version = "~> 20.31.6" | |
cluster_name = "sombra-eks-cluster" | |
cluster_version = "1.32" | |
vpc_id = module.vpc.vpc_id | |
subnet_ids = module.vpc.private_subnets | |
cluster_endpoint_public_access = true | |
enable_cluster_creator_admin_permissions = true | |
cluster_compute_config = { | |
enabled = true | |
node_pools = ["general-purpose"] | |
} | |
} | |
################################################################################### | |
# Everything below here depends on the above kubernetes cluster already existing, # | |
# so you may want to comment out the below resources during the first apply # | |
################################################################################### | |
provider "kubernetes" { | |
host = module.eks_cluster.cluster_endpoint | |
cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data) | |
exec { | |
api_version = "client.authentication.k8s.io/v1beta1" | |
args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name] | |
command = "aws" | |
} | |
} | |
provider "helm" { | |
kubernetes { | |
host = module.eks_cluster.cluster_endpoint | |
cluster_ca_certificate = base64decode(module.eks_cluster.cluster_certificate_authority_data) | |
exec { | |
api_version = "client.authentication.k8s.io/v1beta1" | |
args = ["eks", "get-token", "--cluster-name", module.eks_cluster.cluster_name] | |
command = "aws" | |
} | |
} | |
} | |
resource "kubernetes_manifest" "allow_gpu_node_pool" { | |
manifest = { | |
apiVersion = "karpenter.sh/v1" | |
kind = "NodePool" | |
metadata = { | |
name = "default" | |
} | |
spec = { | |
template = { | |
spec = { | |
nodeClassRef = { | |
group = "eks.amazonaws.com" | |
kind = "NodeClass" | |
name = "default" | |
} | |
requirements = [ | |
{ | |
key = "eks.amazonaws.com/instance-gpu-manufacturer" | |
operator = "In" | |
values = ["nvidia"] | |
}, | |
# The A10G GPU has been tested to work well with our LLM Classifier | |
{ | |
key = "eks.amazonaws.com/instance-gpu-name" | |
operator = "In" | |
values = ["a10g"] | |
} | |
] | |
} | |
} | |
limits = { | |
cpu = "16000" | |
memory = "64Gi" | |
} | |
} | |
} | |
} | |
resource "helm_release" "sombra" { | |
name = "sombra-servers" | |
namespace = "sombra" | |
chart = "sombra" | |
create_namespace = true | |
repository = "https://transcend-io.github.io/helm-charts/" | |
version = "0.5.0" | |
timeout = 900 # 15 minutes for any helm operation | |
values = [yamlencode({ | |
imageCredentials = { | |
registry = "docker.transcend.io" | |
username = "Transcend" | |
password = "<DOCKER_API_KEY_FROM_STEP_1>" | |
} | |
replicaCount = 1 | |
envs = [ | |
{ | |
name = "ORGANIZATION_URI" | |
value = "<ORGANIZATION_URI_FROM_STEP_2>" | |
}, | |
{ | |
name = "SOMBRA_ID" | |
value = "<SOMBRA_ID_FROM_STEP_2>" | |
}, | |
{ | |
name = "SOMBRA_REVERSE_TUNNEL_API_KEY" | |
value = "<SOMBRA_REVERSE_TUNNEL_API_KEY_FROM_STEP_2>" | |
}, | |
{ | |
name = "TRANSCEND_URL" | |
value = "<TRANSCEND_URL_FROM_STEP_2>" | |
}, | |
{ | |
name = "LLM_CLASSIFIER_URL" | |
value = "http://sombra-servers-llm-classifier.transcend.svc.cluster.local:6081/" | |
} | |
] | |
envs_as_secret = [ | |
{ | |
name = "JWT_ECDSA_KEY" | |
# This is just a sample key, in production you'd want to generate your own key via a command | |
# such as `openssl ecparam -genkey -name secp384r1 -noout | (base64 --wrap=0 2>/dev/null || base64 -b 0)`. | |
value = "LS0tLS1CRUdJTiBFQyBQUklWQVRFIEtFWS0tLS0tCk1JR2tBZ0VCQkRCVTdtcVg1MEdqa2dtMzZKME9zeEhzWGR3MG9NOEQ0dzZla3Y1RVJtV0lRSGM2c0tud043QkkKQk1uSlpYNHVPKytnQndZRks0RUVBQ0toWkFOaUFBUlQ1MFk3My8wdGhpazdvd2tOaFhmRWRtbi9UQ1pKM1ZzRQptT1JvUVJtUGlxZkhrQlpQRVZpSHIxMDRteHlYejNIc3JabEdHbDVnWmgwYnFXTHU2Z2R0Y1BtaXZhRWZtRU9KCkVnZE1iRXh4QjJtVEk2QWdKZk9EdFJZa2xUd0pqVWc9Ci0tLS0tRU5EIEVDIFBSSVZBVEUgS0VZLS0tLS0K" | |
} | |
] | |
llm-classifier = { enabled = true } | |
affinity = { | |
nodeAffinity = { | |
requiredDuringSchedulingIgnoredDuringExecution = { | |
nodeSelectorTerms = [{ | |
matchExpressions = [{ | |
key = "eks.amazonaws.com/instance-gpu-manufacturer" | |
operator = "DoesNotExist" | |
}] | |
}] | |
} | |
} | |
} | |
})] | |
} |
@dmattia we will need to add one for new gliNER algorithm as well here. Per Alessandro this is already included in the same pod, could we add a clarifying comment in this file that both LLM (Structured Disc) and GliNER (Unstructed Disc) are available in same pod.
"And optionally replace the JWT_ECDSA_KEY secret environment variable's value with your own encryption key, which you can get by running openssl ecparam -genkey -name secp384r1 -noout | (base64 --wrap=0 2>/dev/null || base64 -b 0)"
Can we write an example how to read from AWS KMS, as mentioned as supported in earlier sections?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@dmattia This is still a Beta version ? I would suggest update to simply v1, so the customer is aware they are hitting a production endpoint.