Skip to content

Instantly share code, notes, and snippets.

@ruivieira
Last active October 18, 2024 00:35
Show Gist options
  • Save ruivieira/173317674f63f955e3d0baed30033fb3 to your computer and use it in GitHub Desktop.
Save ruivieira/173317674f63f955e3d0baed30033fb3 to your computer and use it in GitHub Desktop.
KServe Inference Logging Testing
apiVersion: datasciencecluster.opendatahub.io/v1
kind: DataScienceCluster
metadata:
name: default-dsc
finalizers:
- datasciencecluster.opendatahub.io/finalizer
labels:
app.kubernetes.io/created-by: opendatahub-operator
app.kubernetes.io/instance: default
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/name: datasciencecluster
app.kubernetes.io/part-of: opendatahub-operator
spec:
components:
codeflare:
managementState: Removed
kserve:
defaultDeploymentMode: Serverless
managementState: Managed
serving:
ingressGateway:
certificate:
type: OpenshiftDefaultIngress
managementState: Managed
name: knative-serving
modelregistry:
managementState: Removed
registriesNamespace: odh-model-registries
trustyai:
devFlags:
manifests:
- contextDir: config
sourcePath: ''
uri: 'https://github.com/ruivieira/trustyai-service-operator/tarball/kserve-testing'
managementState: Managed
ray:
managementState: Removed
kueue:
managementState: Removed
workbenches:
managementState: Removed
dashboard:
managementState: Managed
modelmeshserving:
managementState: Managed
datasciencepipelines:
managementState: Removed
trainingoperator:
managementState: Removed
apiVersion: v1
kind: Service
metadata:
name: minio
namespace: test
spec:
ports:
- name: minio-client-port
port: 9000
protocol: TCP
targetPort: 9000
selector:
app: minio
---
apiVersion: v1
kind: Secret
metadata:
name: aws-connection-minio-data-connection
namespace: test
labels:
opendatahub.io/dashboard: "true"
opendatahub.io/managed: "true"
annotations:
opendatahub.io/connection-type: s3
openshift.io/display-name: Minio Data Connection
data: #these are dummy values to populate the ODH UI with, and do not correspond to any real AWS credentials
AWS_ACCESS_KEY_ID: VEhFQUNDRVNTS0VZ
AWS_DEFAULT_REGION: dXMtc291dGg=
AWS_S3_BUCKET: bW9kZWxtZXNoLWV4YW1wbGUtbW9kZWxz
AWS_S3_ENDPOINT: aHR0cDovL21pbmlvOjkwMDA=
AWS_SECRET_ACCESS_KEY: VEhFU0VDUkVUS0VZ
type: Opaque
---
apiVersion: v1
kind: Pod
metadata:
labels:
app: minio
maistra.io/expose-route: "true"
annotations:
sidecar.istio.io/inject: "true"
name: minio
namespace: test
spec:
containers:
- args:
- server
- /data1
env:
- name: MINIO_ACCESS_KEY
value: THEACCESSKEY
- name: MINIO_SECRET_KEY
value: THESECRETKEY
image: quay.io/trustyai/modelmesh-minio-examples:latest
name: minio
---
apiVersion: serving.kserve.io/v1beta1
kind: InferenceService
metadata:
name: demo-loan-nn-onnx-alpha
namespace: test
annotations:
openshift.io/display-name: demo-loan-nn-onnx-alpha
# security.opendatahub.io/enable-auth: "true" <-- I disable this just for my convenience, perhaps it's better to test with this enabled
serving.knative.openshift.io/enablePassthrough: "true"
sidecar.istio.io/inject: "true"
sidecar.istio.io/rewriteAppHTTPProbers: "true"
labels:
opendatahub.io/dashboard: "true"
spec:
predictor:
maxReplicas: 1
minReplicas: 1
model:
modelFormat:
name: onnx
version: "1"
resources:
limits:
cpu: "2"
memory: 8Gi
requests:
cpu: "1"
memory: 4Gi
runtime: ovms-1.x
storage:
key: aws-connection-minio-data-connection
path: ovms/loan_model_alpha
---
apiVersion: serving.kserve.io/v1alpha1
kind: ServingRuntime
metadata:
name: ovms-1.x
namespace: "test"
annotations:
opendatahub.io/accelerator-name: ""
opendatahub.io/apiProtocol: REST
opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]'
opendatahub.io/template-display-name: OpenVINO Model Server
opendatahub.io/template-name: kserve-ovms
openshift.io/display-name: ovms-1.x
prometheus.io/path: /metrics
prometheus.io/port: "8888"
labels:
opendatahub.io/dashboard: "true"
spec:
containers:
- name: kserve-container
image: "quay.io/opendatahub/openvino_model_server:stable-nightly-2024-05-26"
args:
- --model_name={{.Name}}
- --port=8001
- --rest_port=8888
- --model_path=/mnt/models
- --file_system_poll_wait_seconds=0
- --grpc_bind_address=0.0.0.0
- --rest_bind_address=0.0.0.0
- --target_device=AUTO
- --metrics_enable
ports:
- containerPort: 8888
protocol: TCP
volumeMounts:
- mountPath: /dev/shm
name: shm
multiModel: false
protocolVersions:
- v2
- grpc-v2
supportedModelFormats:
- autoSelect: true
name: openvino_ir
version: opset13
- name: onnx
version: "1"
- autoSelect: true
name: tensorflow
version: "1"
- autoSelect: true
name: tensorflow
version: "2"
- autoSelect: true
name: paddle
version: "2"
- autoSelect: true
name: pytorch
version: "2"
volumes:
- emptyDir:
medium: Memory
sizeLimit: 2Gi
name: shm
apiVersion: trustyai.opendatahub.io/v1alpha1
kind: TrustyAIService
metadata:
name: trustyai-service
namespace: test
spec:
storage:
format: PVC
folder: /inputs
size: 1Gi
data:
filename: data.csv
format: CSV
metrics:
schedule: 5s
# Optional, just to certify port 80 is not used (only 443 should be present on this service)
kubectl -n test patch service trustyai-service --type='json' -p='[{"op":"remove","path":"/spec/ports/0"}]'
# Verify VirtualService
kubectl describe virtualservice trustyai-service-redirect -n test
# Verify DestinationRule
kubectl describe destinationrule trustyai-service- -n test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment