etcd object counts and sizes

Exec into the etcd container

RKE1

docker exec -it etcd sh

RKE2

export CRI_CONFIG_FILE=/var/lib/rancher/rke2/agent/etc/crictl.yaml
etcdcontainer=$(/var/lib/rancher/rke2/bin/crictl ps --label io.kubernetes.container.name=etcd --quiet)
/var/lib/rancher/rke2/bin/crictl exec -it $etcdcontainer sh -c "ETCDCTL_ENDPOINTS='https://127.0.0.1:2379' ETCDCTL_CACERT='/var/lib/rancher/rke2/server/tls/etcd/server-ca.crt' ETCDCTL_CERT='/var/lib/rancher/rke2/server/tls/etcd/server-client.crt' ETCDCTL_KEY='/var/lib/rancher/rke2/server/tls/etcd/server-client.key' ETCDCTL_API=3 bash"

Count all objects

for key in `etcdctl get --prefix --keys-only /`
do
  size=`etcdctl get $key --print-value-only | wc -c`
  count=`etcdctl get $key --write-out=fields | grep \"Count\" | cut -f2 -d':'`
  if [ $count -ne 0 ]; then
    versions=`etcdctl get $key --write-out=fields | grep \"Version\" | cut -f2 -d':'`
  else
    versions=0
  fi
  total=$(($size * $versions))
  echo $total $size $versions $count $key >> /tmp/etcdkeys.txt
done

Optional: only specific object, eg: secrets

for key in `etcdctl get --prefix --keys-only /registry/secrets`
do
  size=`etcdctl get $key --print-value-only | wc -c`
  count=`etcdctl get $key --write-out=fields | grep \"Count\" | cut -f2 -d':'`
  if [ $count -ne 0 ]; then
    versions=`etcdctl get $key --write-out=fields | grep \"Version\" | cut -f2 -d':'`
  else
    versions=0
  fi
  total=$(($size * $versions))
  echo $total $size $versions $count $key >> /tmp/etcdkeys-secrets.txt
done

Sort the output

sort -n /tmp/etcdkeys.txt

Copy the file if needed

RKE1

docker cp etcd:/tmp/etcdkeys.txt .

RKE2

kubectl cp -n kube-system <etcd pod name>:/tmp/etcdkeys.txt ./etcdkeys.txt

thanks @dkeightley for sharing this.

When testing https://gist.github.com/dkeightley/8f2211d6e93a0d5bc294242248ca8fbf?permalink_comment_id=4836323#gistcomment-4836323 against etcd 3.6 I spotted mistmatch of the api/group/namespace

here is a modified version wrapped in bash functions for easier discovery and invocation. Hoping this can help others

# Produces a file in current directory of format
# - fullkey: full etcd-key
# - k8s api
# - k8s group
# - k8s namespace
# - resource name
# - size in bytes
# - versions
#
# Example:
#{
#  "fullkey": "/registry/apiextensions.k8s.io/customresourcedefinitions/grafanas.grafana.integreatly.org",
#  "api": "customresourcedefinitions",
#  "group": "apiextensions.k8s.io",
#  "namespace": null,
#  "resource": "grafanas.grafana.integreatly.org",
#  "size": 336909,
#  "versions": 3
#}
#
#
#{"fullkey":"/registry/apiextensions.k8s.io/customresourcedefinitions/grafanas.grafana.integreatly.org","api":"customresourcedefinitions","group":"apiextensions.k8s.io","namespace":null,"resource":"grafanas.grafana.integreatly.org","size":336909,"versions":3}
# Credit from https://gist.github.com/dkeightley/8f2211d6e93a0d5bc294242248ca8fbf?permalink_comment_id=4836323#gistcomment-4836323
function extract_k8s_etcd_keys_size_and_versions() {
  LIMIT=500
  TMPFILE=$(mktemp)
  RESULT_FILE=keys_raw.json
  NEXT_KEY=$(etcdctl get --limit 1 --keys-only --prefix / )
  while true; do
  	etcdctl get --limit $LIMIT --write-out=json --from-key "$NEXT_KEY" |
  	tee >(jq -r '(.kvs[-1].key | @base64d),(.count)' > $TMPFILE) |
  	jq -c '.kvs[1:][] |
  		(
  			(.key | @base64d) as $key |
  			($key | split("/")) as $keya |
  			{
  			  "fullkey": $key,
  			  "api": $keya[3],
  			  "group": $keya[2],
  			  "namespace": $keya[5],
  			  "resource": $keya[4],
  			  "size": (.value | @base64d | length),
  			  "versions": (.version),
  			}
  		)' |
  	tee -a $RESULT_FILE |
  	jq -r '.fullkey'

  	if [ "$(sed -n -e 2p <$TMPFILE)" == "1" ]; then
  		break
  	fi
  	NEXT_KEY="$(sed -n -e 1p <$TMPFILE)"

  	echo "Remaining keys: $(sed -n -e 2p <$TMPFILE)"
  done
  rm $TMPFILE
}

function extract_k8s_etcd_keys_size_and_versions_commented() {
  LIMIT=500
  TMPFILE=$(mktemp)
  RESULT_FILE=keys_raw.json
  NEXT_KEY=$(etcdctl get --limit 1 --keys-only --prefix / )
  while true; do
   etcdctl get --limit $LIMIT --write-out=json --from-key "$NEXT_KEY" |
   tee >(jq -r '(.kvs[-1].key | @base64d),(.count)' > $TMPFILE) |
   # Main jq processing pipeline:
   jq -c '.kvs[1:][] |              # Extract all key-value pairs except the first one (already processed), iterate over each
   (
   (.key | @base64d) as $key |      # Decode the base64-encoded key and store it in variable $key
   ($key | split("/")) as $keya |   # Split the decoded key by "/" delimiter and store the array in variable $keya
   {
     "fullkey": $key,               # Store the full decoded key path
     "api": $keya[3],               # Extract the API component (2nd element after split, e.g., "registry")
     "group": $keya[2],             # Extract the group component (3rd element, e.g., "acme.cert-manager.io")
     "namespace": $keya[5],         # Extract the namespace component (4th element, e.g., "challenges")
     "resource": $keya[4],          # Extract the resource name (5th element, e.g., "08-mdb-spike")
     "size": (.value | @base64d | length),  # Decode the base64-encoded value and calculate its byte length
     "versions": (.version),        # Extract the version number from the etcd key-value metadata
   }
   )' |
   tee -a $RESULT_FILE |
   jq -r '.fullkey'

   if [ "$(sed -n -e 2p <$TMPFILE)" == "1" ]; then
   break
   fi
   NEXT_KEY="$(sed -n -e 1p <$TMPFILE)"

   echo "Remaining keys: $(sed -n -e 2p <$TMPFILE)"
  done
  rm $TMPFILE
}


function display_largest_10_groups_by_size() {
  jq -s 'group_by(.group) | map({ group: (.[0].group), total: ([.[] | .size] | reduce .[] as $num (0; .+$num)) }) | sort_by(.total) | reverse | .[0:10]' keys_raw.json
}

function display_largest_10_namespaces_by_size() {
  jq -s 'group_by(.namespace) | map({ namespace: (.[0].namespace), total: ([.[] | .size] | reduce .[] as $num (0; .+$num)) }) | sort_by(.total) | reverse | .[0:10]' keys_raw.json
}

function display_largest_10_namespaces_by_size_commented() {
  jq -s '
    # Group all objects by their namespace field
    group_by(.namespace)

    # For each group, create a new object with:
    # - namespace: the namespace name (taken from first element)
    # - total: sum of all size values in that namespace
    | map({
        namespace: (.[0].namespace),
        total: (
          # Extract all size values from the current group
          [.[] | .size]

          # Use reduce to sum all size values:
          # - Iterate through each size value (as $num)
          # - Start with accumulator = 0
          # - For each iteration, add $num to the accumulator (. + $num)
          # - Result is the total sum of all sizes in this namespace
          | reduce .[] as $num (0; . + $num)
        )
      })

    # Sort the resulting array by the total field in ascending order
    | sort_by(.total)

    # Reverse to get descending order (largest first)
    | reverse

    # Take only the first 10 elements (top 10 largest namespaces)
    | .[0:10]
  ' keys_raw.json
}


function display_largest_10_namespaces_by_size_time_versions_commented() {
   jq -s '
     # Define a function to format numbers with thousand separators (commas)
     def format_number:
       tostring
       # Split string into array of characters, reverse it
       | explode | reverse
       # Insert comma (ASCII 44) every 3 digits
       | to_entries
       | map(
           if (.key > 0 and (.key % 3) == 0)
           then [44, .value]  # 44 is ASCII code for comma
           else [.value]
           end
         )
       | flatten
       # Reverse back and convert to string
       | reverse | implode;

     # Group all entries by namespace field
     group_by(.namespace)
     # Transform each group into namespace summary
     | map({
         namespace: (.[0].namespace),
         # Calculate total size: sum of (size × versions) for all resources
         total_bytes: (
           map(.size * .versions)  # Multiply size by version count for each resource
           | add                    # Sum all values (cleaner than reduce)
         )
       })
     # Sort by total size in descending order
     | sort_by(.total_bytes) | reverse
     # Keep only top 10 namespaces
     | limit(10; .[])
     # Collect back into array and format
     | [.]
     | map({
         namespace: .namespace,
         total_bytes: (.total_bytes | format_number )
       })
   ' keys_raw.json
 }

#
function display_largest_10_namespaces_by_key_count() {
  jq -s 'group_by(.namespace) | map({ namespace: (.[0].namespace), count: (. | length)}) | sort_by(.count) | reverse | .[0:10]' keys_raw.json
}

function display_largest_10_groups_by_key_count() {
  jq -s 'group_by(.group) | map({ group: (.[0].group), count: (. | length)}) | sort_by(.count) | reverse | .[0:10]' keys_raw.json
}

function display_highest_10_versions() {
 jq -s 'sort_by(.versions) | reverse | .[0:10]' keys_raw.json
}

dkeightley/object-count-size.md

Select an option

No results found

Select an option

No results found

Exec into the etcd container

RKE1

RKE2

Count all objects

Optional: only specific object, eg: secrets

Sort the output

Copy the file if needed

RKE1

RKE2

dkeightley commented Jan 17, 2024

Uh oh!

gberche-orange commented Mar 17, 2026

Uh oh!