Greg Bray gbrayut

👨‍💻

Living life one byte at a time

Currently: GCP Customer Engineer Formerly: SRE @reddit, CDN/Edge Platform @walmartlabs, SRE at Stack Overflow a.k.a. @StackExchange

gbrayut / ServiceDirectoryRegistrationPolicy.yaml

Created September 29, 2025 22:27

ServiceDirectoryRegistrationPolicy.yaml

	apiVersion: networking.gke.io/v1alpha1
	kind: ServiceDirectoryRegistrationPolicy
	metadata:
	# Only the name "default" is allowed.
	name: default
	# The ServiceDirectoryRegistrationPolicy is a namespaced resource
	namespace: app-1
	spec:
	resources:
	# Kind specifies the types of Kubernetes resources that can be synced into Service Directory.

gbrayut / 01-gemini.sh

Created August 22, 2025 22:51

Gemini CLI instructions for Ubuntu Nobel 24.04.3

	# Install Gemini CLI https://github.com/google-gemini/gemini-cli
	apt-cache policy nodejs
	# if distro is using node < 20 then manage versions using nvm instead https://github.com/nvm-sh/nvm/tree/master
	curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh \| bash
	# open new shell or copy/paste export/load commands
	nvm install 22 # current LTS from https://nodejs.org/en/about/previous-releases
	npm install -g @google/gemini-cli

	# API keys not working in gemini-cli >= 0.1.15 https://aistudio.google.com/u/0/apikey
	#export GEMINI_API_KEY="AIzaSyCRSFCdFFn8G_REDACTED-..." # gregbray-vpc

gbrayut / gke-dcgm-exporter.yaml

Created August 12, 2025 23:08

gke 1.33 managed DCGM exporter for Nvidia A100 a2-highgpu-1g

	# From https://cloud.google.com/kubernetes-engine/docs/how-to/dcgm-metrics#configure-dcgm-collection
	# Managed DCGM metrics, including its underlying manifests and container images, is intended to work only on GKE clusters.
	# Don't use managed DCGM metrics standalone or run it outside of GKE.
	apiVersion: apps/v1
	kind: DaemonSet
	metadata:
	annotations:
	components.gke.io/layer: addon
	deprecated.daemonset.template.generation: "1"
	creationTimestamp: "2025-08-12T22:55:26Z"

gbrayut / describe.yaml

Created March 24, 2025 22:22

GKE Pollicy Controller Fleet Default Bundle

	# gcloud container fleet policycontroller describe
	# https://console.cloud.google.com/kubernetes/features/policy/configuration
	createTime: '2025-02-18T21:05:33.021592569Z'
	fleetDefaultMemberConfig:
	policycontroller:
	policyControllerHubConfig:
	auditIntervalSeconds: '60'
	constraintViolationLimit: '40'
	deploymentConfigs:
	admission:

gbrayut / 00-dws-nodepool.sh

Last active March 10, 2025 20:01

DWS hack for deployment using H100

	# Moved these examples to https://github.com/gbrayut/cloud-examples/tree/main/gke-dws
	# so better to use that instead of the files below



	# https://cloud.google.com/kubernetes-engine/docs/how-to/provisioningrequest#create-node-pool
	# https://cloud.google.com/compute/docs/gpus#h100-gpus
	gcloud container node-pools create dws-h100 \
	--cluster=gke-iowa \
	--location=us-central1 \

gbrayut / 01-grpc.yaml

Created July 30, 2024 18:09

GKE grpc gateway testing

	# Modified from https://gist.github.com/gauravkghildiyal/a3ed6aaf7010835b5e3d253c2472aef2
	# Changes: Use TCP health check, one service instead of two, and set grpc as default instead of only explicit routes.
	apiVersion: apps/v1
	kind: Deployment
	metadata:
	labels:
	app: grpc-app
	name: grpc
	spec:
	replicas: 1

gbrayut / 00-setup.sh

Last active April 30, 2024 02:02

GKE 1.27 nvidia-smi -p2p testing

	# Add GPU node pool with automatic driver installation. Manual drivers requred before 1.27 https://cloud.google.com/kubernetes-engine/docs/how-to/gpus#installing_drivers
	# If you used the installation DaemonSet to manually install GPU drivers on or before January 25, 2023, you might need to re-apply the DaemonSet to get a version that ignores nodes that use automatic driver installation.

	# COS based L4 via g2-standard-24 VMs https://cloud.google.com/compute/docs/accelerator-optimized-machines#g2-vms
	gcloud beta container --project "gregbray-vpc" node-pools create "nvidia-l4-cos" --cluster "gke-iowa" --region "us-central1" \
	--machine-type "g2-standard-24" --accelerator type=nvidia-l4,count=2,gpu-driver-version=default \
	--image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" \
	--num-nodes "1" --enable-autoscaling --min-nodes=1 --max-nodes=1 \
	--max-pods-per-node "110" --node-locations "us-central1-a"

gbrayut / test.sh

Created April 23, 2024 16:52

LinkedIn TLS Error *.azureedge.net

	$ echo "GET /" \| openssl s_client -showcerts -servername www.linkedin.com -connect www.linkedin.com:443 \| openssl x509 -noout -text
	depth=2 C = US, O = DigiCert Inc, OU = www.digicert.com, CN = DigiCert Global Root G2
	verify return:1
	depth=1 C = US, O = Microsoft Corporation, CN = Microsoft Azure RSA TLS Issuing CA 04
	verify return:1
	depth=0 C = US, ST = WA, L = Redmond, O = Microsoft Corporation, CN = *.azureedge.net
	verify return:1
	DONE
	Certificate:
	Data:

gbrayut / 01-systemd-unit.sh

Created March 30, 2024 04:49

Configure static ipv6 ULA address

	# Create systemd unit
	cat << EOF > /etc/systemd/system/theg2-ipv6-ula.service
	[Unit]
	Description=Add ipv6 static ULA
	After=network-online.target
	Requires=network-online.target

	[Service]
	Type=oneshot
	ExecStart=/sbin/ip address add fd0b:dead:b0b1::123 dev wlan0

gbrayut / gke-cgroupmode-test.yaml

Created March 29, 2024 22:44

KCC GKE cgroupMode Testing

	apiVersion: compute.cnrm.cloud.google.com/v1beta1
	kind: ComputeNetwork
	metadata:
	annotations:
	cnrm.cloud.google.com/management-conflict-prevention-policy: "none"
	cnrm.cloud.google.com/deletion-policy: "abandon"
	name: default
	spec:
	description: Default network for the project
	---