Last active
July 25, 2024 07:32
-
-
Save miticojo/5c68460cc8f9c8176b98b5d1ff3ad06b to your computer and use it in GitHub Desktop.
Analyze GKE realtime utilization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
# Function to convert resources to milli format | |
convert_to_m() { | |
local value=$1 | |
case ${value: -1} in | |
m) echo "${value%?}";; | |
"") echo "$((value * 1000))";; | |
*) echo "$value";; | |
esac | |
} | |
# Function to convert memory resources to Ki | |
convert_memory_to_ki() { | |
local value=$1 | |
value=${value%i} # Remove trailing 'i' if present | |
case ${value: -1} in | |
K) echo "${value%K}";; | |
M) echo "$((${value%M} * 1024))";; | |
G) echo "$((${value%G} * 1024 * 1024))";; | |
T) echo "$((${value%T} * 1024 * 1024 * 1024))";; | |
*) echo "$value";; | |
esac | |
} | |
# Check for required commands | |
for cmd in kubectl jq bc; do | |
if ! command -v $cmd &> /dev/null; then | |
echo "Error: $cmd is not installed. Please install it and try again." | |
exit 1 | |
fi | |
done | |
# Function to get cluster type | |
get_cluster_type() { | |
local node_names=$(kubectl get nodes -o jsonpath='{.items[*].metadata.name}') | |
if [[ $node_names == *"autopilot"* ]]; then | |
echo "GKE Autopilot" | |
else | |
echo "GKE Standard" | |
fi | |
} | |
# Function to get node allocations | |
get_node_allocations() { | |
echo "Node Allocations:" | |
local total_cpu_alloc=0 | |
local total_mem_alloc=0 | |
while read -r line; do | |
local node=$(echo $line | awk '{print $1}') | |
local cpu_alloc=$(convert_to_m $(echo $line | awk '{print $2}')) | |
local mem_alloc=$(convert_memory_to_ki $(echo $line | awk '{print $3}')) | |
local cpu_cap=$(convert_to_m $(echo $line | awk '{print $4}')) | |
local mem_cap=$(convert_memory_to_ki $(echo $line | awk '{print $5}')) | |
total_cpu_alloc=$((total_cpu_alloc + cpu_alloc)) | |
total_mem_alloc=$((total_mem_alloc + mem_alloc)) | |
printf "Node: %-50s CPU Allocatable: %8sm/%8sm Memory Allocatable: %10sKi/%10sKi\n" "$node" "$cpu_alloc" "$cpu_cap" "$mem_alloc" "$mem_cap" | |
done < <(kubectl get nodes -o json | jq -r '.items[] | .metadata.name as $name | .status.allocatable as $alloc | .status.capacity as $cap | "\($name) \($alloc.cpu) \($alloc.memory) \(($cap.cpu | tonumber * 1000 | tostring) + "m") \($cap.memory)"') | |
echo "Total CPU Allocatable: ${total_cpu_alloc}m, Total Memory Allocatable: ${total_mem_alloc}Ki" | |
} | |
# Function to get node utilization | |
get_node_utilization() { | |
echo "Node Utilization:" | |
local node_top_output=$(kubectl top nodes --no-headers 2>/dev/null) | |
if [ -z "$node_top_output" ]; then | |
echo "Error: Metrics API not available. Please ensure metrics-server is installed and running." | |
else | |
echo "$node_top_output" | while read -r line; do | |
local node=$(echo $line | awk '{print $1}') | |
local cpu_used=$(echo $line | awk '{print $3}') | |
local mem_used=$(echo $line | awk '{print $5}') | |
printf "Node: %-50s CPU Used: %8s Memory Used: %10s\n" "$node" "$cpu_used" "$mem_used" | |
done | |
fi | |
} | |
# Function to get namespace utilization | |
get_namespace_utilization() { | |
echo "Namespace Resource Utilization:" | |
kubectl get namespaces -o json | jq -r '.items[].metadata.name' | while read -r ns; do | |
echo "Namespace: $ns" | |
local cpu_request=0 | |
local mem_request=0 | |
local cpu_limit=0 | |
local mem_limit=0 | |
local cpu_used=0 | |
local mem_used=0 | |
# Get resource requests and limits for pods in the namespace | |
while read -r cpu_req mem_req cpu_lim mem_lim; do | |
cpu_request=$((cpu_request + $(convert_to_m "$cpu_req"))) | |
mem_request=$((mem_request + $(convert_memory_to_ki "$mem_req"))) | |
cpu_limit=$((cpu_limit + $(convert_to_m "$cpu_lim"))) | |
mem_limit=$((mem_limit + $(convert_memory_to_ki "$mem_lim"))) | |
done < <(kubectl get pods -n "$ns" -o json | jq -r '.items[] | .spec.containers[] | (.resources.requests.cpu // "0") + " " + (.resources.requests.memory // "0") + " " + (.resources.limits.cpu // "0") + " " + (.resources.limits.memory // "0")') | |
# Get resource usage for pods in the namespace | |
local pod_top_output=$(kubectl top pods -n "$ns" --no-headers 2>/dev/null) | |
if [ -n "$pod_top_output" ]; then | |
while read -r _ cpu mem _; do | |
cpu_used=$((cpu_used + $(convert_to_m "$cpu"))) | |
mem_used=$((mem_used + $(convert_memory_to_ki "$mem"))) | |
done <<< "$pod_top_output" | |
fi | |
printf " CPU Request: %8sm, CPU Limit: %8sm, CPU Used: %8sm\n" "$cpu_request" "$cpu_limit" "$cpu_used" | |
printf " Memory Request: %10sKi, Memory Limit: %10sKi, Memory Used: %10sKi\n" "$mem_request" "$mem_limit" "$mem_used" | |
# Analysis and suggestions | |
if [ $cpu_used -ne 0 ]; then | |
local cpu_request_ratio=$(echo "scale=2; $cpu_request / $cpu_used" | bc) | |
local cpu_limit_ratio=$(echo "scale=2; $cpu_limit / $cpu_used" | bc) | |
if (( $(echo "$cpu_request_ratio > 1.5" | bc -l) )); then | |
local suggested_cpu_request=$(echo "scale=0; $cpu_used * 1.2 / 1" | bc) | |
echo " CPU request is significantly higher than usage. Consider reducing to ${suggested_cpu_request}m" | |
elif (( $(echo "$cpu_request_ratio < 0.5" | bc -l) )); then | |
local suggested_cpu_request=$(echo "scale=0; $cpu_used * 0.8 / 1" | bc) | |
echo " CPU request is significantly lower than usage. Consider increasing to ${suggested_cpu_request}m" | |
fi | |
if (( $(echo "$cpu_limit_ratio > 2" | bc -l) )); then | |
local suggested_cpu_limit=$(echo "scale=0; $cpu_used * 1.5 / 1" | bc) | |
echo " CPU limit is significantly higher than usage. Consider reducing to ${suggested_cpu_limit}m" | |
elif [ $cpu_limit -eq 0 ]; then | |
local suggested_cpu_limit=$(echo "scale=0; $cpu_used * 1.5 / 1" | bc) | |
echo " CPU limit is not set. Consider setting a limit of ${suggested_cpu_limit}m" | |
fi | |
fi | |
if [ $mem_used -ne 0 ]; then | |
local mem_request_ratio=$(echo "scale=2; $mem_request / $mem_used" | bc) | |
local mem_limit_ratio=$(echo "scale=2; $mem_limit / $mem_used" | bc) | |
if (( $(echo "$mem_request_ratio > 1.5" | bc -l) )); then | |
local suggested_mem_request=$(echo "scale=0; $mem_used * 1.2 / 1" | bc) | |
echo " Memory request is significantly higher than usage. Consider reducing to ${suggested_mem_request}Ki" | |
elif (( $(echo "$mem_request_ratio < 0.5" | bc -l) )); then | |
local suggested_mem_request=$(echo "scale=0; $mem_used * 0.8 / 1" | bc) | |
echo " Memory request is significantly lower than usage. Consider increasing to ${suggested_mem_request}Ki" | |
fi | |
if (( $(echo "$mem_limit_ratio > 2" | bc -l) )); then | |
local suggested_mem_limit=$(echo "scale=0; $mem_used * 1.5 / 1" | bc) | |
echo " Memory limit is significantly higher than usage. Consider reducing to ${suggested_mem_limit}Ki" | |
elif [ $mem_limit -eq 0 ]; then | |
local suggested_mem_limit=$(echo "scale=0; $mem_used * 1.5 / 1" | bc) | |
echo " Memory limit is not set. Consider setting a limit of ${suggested_mem_limit}Ki" | |
fi | |
fi | |
done | |
} | |
# Function to analyze resource efficiency | |
analyze_resource_efficiency() { | |
echo "Resource Efficiency Analysis:" | |
local total_cpu_alloc=0 | |
local total_cpu_cap=0 | |
local total_mem_alloc=0 | |
local total_mem_cap=0 | |
while read -r line; do | |
local node=$(echo $line | awk '{print $1}') | |
local cpu_alloc=$(convert_to_m $(echo $line | awk '{print $2}')) | |
local mem_alloc=$(convert_memory_to_ki $(echo $line | awk '{print $3}')) | |
local cpu_cap=$(convert_to_m $(echo $line | awk '{print $4}')) | |
local mem_cap=$(convert_memory_to_ki $(echo $line | awk '{print $5}')) | |
total_cpu_alloc=$((total_cpu_alloc + cpu_alloc)) | |
total_cpu_cap=$((total_cpu_cap + cpu_cap)) | |
total_mem_alloc=$((total_mem_alloc + mem_alloc)) | |
total_mem_cap=$((total_mem_cap + mem_cap)) | |
local cpu_efficiency=$(echo "scale=2; $cpu_alloc / $cpu_cap * 100" | bc) | |
local mem_efficiency=$(echo "scale=2; $mem_alloc / $mem_cap * 100" | bc) | |
echo "Node: $node" | |
echo " CPU Allocatable/Capacity: ${cpu_alloc}m/${cpu_cap}m (${cpu_efficiency}%)" | |
echo " Memory Allocatable/Capacity: ${mem_alloc}Ki/${mem_cap}Ki (${mem_efficiency}%)" | |
if (( $(echo "$cpu_efficiency < 70" | bc -l) )); then | |
echo " Consider adjusting CPU requests/limits or node size for better CPU utilization" | |
fi | |
if (( $(echo "$mem_efficiency < 70" | bc -l) )); then | |
echo " Consider adjusting memory requests/limits or node size for better memory utilization" | |
fi | |
echo | |
done < <(kubectl get nodes -o json | jq -r '.items[] | .metadata.name as $name | .status.allocatable as $alloc | .status.capacity as $cap | "\($name) \($alloc.cpu) \($alloc.memory) \(($cap.cpu | tonumber * 1000 | tostring) + "m") \($cap.memory)"') | |
# Calculate cluster-wide efficiency | |
local cluster_cpu_efficiency=$(echo "scale=2; $total_cpu_alloc / $total_cpu_cap * 100" | bc) | |
local cluster_mem_efficiency=$(echo "scale=2; $total_mem_alloc / $total_mem_cap * 100" | bc) | |
echo "Cluster-wide Resource Efficiency:" | |
echo " Total CPU Allocatable/Capacity: ${total_cpu_alloc}m/${total_cpu_cap}m (${cluster_cpu_efficiency}%)" | |
echo " Total Memory Allocatable/Capacity: ${total_mem_alloc}Ki/${total_mem_cap}Ki (${cluster_mem_efficiency}%)" | |
if (( $(echo "$cluster_cpu_efficiency < 70" | bc -l) )); then | |
echo " Consider adjusting cluster-wide CPU allocation for better utilization" | |
fi | |
if (( $(echo "$cluster_mem_efficiency < 70" | bc -l) )); then | |
echo " Consider adjusting cluster-wide memory allocation for better utilization" | |
fi | |
} | |
# New function to get GKE version | |
get_gke_version() { | |
local server_version=$(kubectl version -o json | jq -r '.serverVersion.gitVersion') | |
echo "$server_version" | |
} | |
# Main execution | |
print_header() { | |
echo -e "\n\033[1m${1}\033[0m" | |
echo "${1//?/=}" | |
} | |
print_subheader() { | |
echo -e "\n\033[1m${1}\033[0m" | |
echo "${1//?/-}" | |
} | |
print_key_value() { | |
printf "%-25s : %s\n" "$1" "$2" | |
} | |
main() { | |
print_header "GKE Cluster Analysis" | |
print_key_value "Cluster Type" "$(get_cluster_type)" | |
print_key_value "GKE Version" "$(get_gke_version)" | |
print_subheader "Node Allocations" | |
get_node_allocations | |
print_subheader "Node Utilization" | |
get_node_utilization | |
print_subheader "Namespace Resource Utilization" | |
get_namespace_utilization | |
print_subheader "Resource Efficiency Analysis" | |
analyze_resource_efficiency | |
print_subheader "Explanation of Terms" | |
echo "- Capacity: The total amount of resources on the node." | |
echo "- Allocatable: The amount of resources that can be requested by and allocated to pods." | |
echo "- Request: The minimum amount of resources a pod is guaranteed to have." | |
echo "- Limit: The maximum amount of resources a pod is allowed to use." | |
echo "- Used: The actual amount of resources currently being consumed by pods." | |
echo | |
echo "Note: In Autopilot clusters, GKE automatically manages resource allocation and scaling." | |
echo "Resource efficiency percentages represent Allocatable/Capacity ratios." | |
echo "CPU values are in millicores, where 1000m = 1 CPU core." | |
} | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment