You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
here is a modified version wrapped in bash functions for easier discovery and invocation. Hoping this can help others
# Produces a file in current directory of format# - fullkey: full etcd-key# - k8s api# - k8s group# - k8s namespace# - resource name# - size in bytes# - versions## Example:
#{
# "fullkey": "/registry/apiextensions.k8s.io/customresourcedefinitions/grafanas.grafana.integreatly.org",# "api": "customresourcedefinitions",# "group": "apiextensions.k8s.io",# "namespace": null,# "resource": "grafanas.grafana.integreatly.org",# "size": 336909,# "versions": 3#}##
#{"fullkey":"/registry/apiextensions.k8s.io/customresourcedefinitions/grafanas.grafana.integreatly.org","api":"customresourcedefinitions","group":"apiextensions.k8s.io","namespace":null,"resource":"grafanas.grafana.integreatly.org","size":336909,"versions":3}
# Credit from https://gist.github.com/dkeightley/8f2211d6e93a0d5bc294242248ca8fbf?permalink_comment_id=4836323#gistcomment-4836323functionextract_k8s_etcd_keys_size_and_versions() {
LIMIT=500
TMPFILE=$(mktemp)
RESULT_FILE=keys_raw.json
NEXT_KEY=$(etcdctl get --limit 1 --keys-only --prefix / )whiletrue;do
etcdctl get --limit $LIMIT --write-out=json --from-key "$NEXT_KEY"|
tee >(jq -r '(.kvs[-1].key | @base64d),(.count)'>$TMPFILE)|
jq -c '.kvs[1:][] | ( (.key | @base64d) as $key | ($key | split("/")) as $keya | { "fullkey": $key, "api": $keya[3], "group": $keya[2], "namespace": $keya[5], "resource": $keya[4], "size": (.value | @base64d | length), "versions": (.version), } )'|
tee -a $RESULT_FILE|
jq -r '.fullkey'if [ "$(sed -n -e 2p <$TMPFILE)"=="1" ];thenbreakfi
NEXT_KEY="$(sed -n -e 1p <$TMPFILE)"echo"Remaining keys: $(sed -n -e 2p <$TMPFILE)"done
rm $TMPFILE
}
functionextract_k8s_etcd_keys_size_and_versions_commented() {
LIMIT=500
TMPFILE=$(mktemp)
RESULT_FILE=keys_raw.json
NEXT_KEY=$(etcdctl get --limit 1 --keys-only --prefix / )whiletrue;do
etcdctl get --limit $LIMIT --write-out=json --from-key "$NEXT_KEY"|
tee >(jq -r '(.kvs[-1].key | @base64d),(.count)'>$TMPFILE)|# Main jq processing pipeline:
jq -c '.kvs[1:][] | # Extract all key-value pairs except the first one (already processed), iterate over each ( (.key | @base64d) as $key | # Decode the base64-encoded key and store it in variable $key ($key | split("/")) as $keya | # Split the decoded key by "/" delimiter and store the array in variable $keya { "fullkey": $key, # Store the full decoded key path "api": $keya[3], # Extract the API component (2nd element after split, e.g., "registry") "group": $keya[2], # Extract the group component (3rd element, e.g., "acme.cert-manager.io") "namespace": $keya[5], # Extract the namespace component (4th element, e.g., "challenges") "resource": $keya[4], # Extract the resource name (5th element, e.g., "08-mdb-spike") "size": (.value | @base64d | length), # Decode the base64-encoded value and calculate its byte length "versions": (.version), # Extract the version number from the etcd key-value metadata } )'|
tee -a $RESULT_FILE|
jq -r '.fullkey'if [ "$(sed -n -e 2p <$TMPFILE)"=="1" ];thenbreakfi
NEXT_KEY="$(sed -n -e 1p <$TMPFILE)"echo"Remaining keys: $(sed -n -e 2p <$TMPFILE)"done
rm $TMPFILE
}
functiondisplay_largest_10_groups_by_size() {
jq -s 'group_by(.group) | map({ group: (.[0].group), total: ([.[] | .size] | reduce .[] as $num (0; .+$num)) }) | sort_by(.total) | reverse | .[0:10]' keys_raw.json
}
functiondisplay_largest_10_namespaces_by_size() {
jq -s 'group_by(.namespace) | map({ namespace: (.[0].namespace), total: ([.[] | .size] | reduce .[] as $num (0; .+$num)) }) | sort_by(.total) | reverse | .[0:10]' keys_raw.json
}
functiondisplay_largest_10_namespaces_by_size_commented() {
jq -s ' # Group all objects by their namespace field group_by(.namespace) # For each group, create a new object with: # - namespace: the namespace name (taken from first element) # - total: sum of all size values in that namespace | map({ namespace: (.[0].namespace), total: ( # Extract all size values from the current group [.[] | .size] # Use reduce to sum all size values: # - Iterate through each size value (as $num) # - Start with accumulator = 0 # - For each iteration, add $num to the accumulator (. + $num) # - Result is the total sum of all sizes in this namespace | reduce .[] as $num (0; . + $num) ) }) # Sort the resulting array by the total field in ascending order | sort_by(.total) # Reverse to get descending order (largest first) | reverse # Take only the first 10 elements (top 10 largest namespaces) | .[0:10]' keys_raw.json
}
functiondisplay_largest_10_namespaces_by_size_time_versions_commented() {
jq -s ' # Define a function to format numbers with thousand separators (commas) def format_number: tostring # Split string into array of characters, reverse it | explode | reverse # Insert comma (ASCII 44) every 3 digits | to_entries | map( if (.key > 0 and (.key % 3) == 0) then [44, .value] # 44 is ASCII code for comma else [.value] end ) | flatten # Reverse back and convert to string | reverse | implode; # Group all entries by namespace field group_by(.namespace) # Transform each group into namespace summary | map({ namespace: (.[0].namespace), # Calculate total size: sum of (size × versions) for all resources total_bytes: ( map(.size * .versions) # Multiply size by version count for each resource | add # Sum all values (cleaner than reduce) ) }) # Sort by total size in descending order | sort_by(.total_bytes) | reverse # Keep only top 10 namespaces | limit(10; .[]) # Collect back into array and format | [.] | map({ namespace: .namespace, total_bytes: (.total_bytes | format_number ) })' keys_raw.json
}
#functiondisplay_largest_10_namespaces_by_key_count() {
jq -s 'group_by(.namespace) | map({ namespace: (.[0].namespace), count: (. | length)}) | sort_by(.count) | reverse | .[0:10]' keys_raw.json
}
functiondisplay_largest_10_groups_by_key_count() {
jq -s 'group_by(.group) | map({ group: (.[0].group), count: (. | length)}) | sort_by(.count) | reverse | .[0:10]' keys_raw.json
}
functiondisplay_highest_10_versions() {
jq -s 'sort_by(.versions) | reverse | .[0:10]' keys_raw.json
}
Thanks, I'll take a look into this approach 👍
Correct, I mistakenly conflated this gist with another one-liner to gather the object counts:
Note the
cattle.ioportion is just to split out the different CRDs we work with.