Last active
January 21, 2021 21:10
-
-
Save portante/6445817500377826f6620e6527af16c2 to your computer and use it in GitHub Desktop.
A script to generate a report of Elasticsearch index usage (from _cat/indices?v&bytes=b) by prefix for a set of known date suffixes.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python2 | |
# A script to generate a report of Elasticsearch index usage | |
# (from _cat/indices?v&bytes=b) by prefix for a set of known | |
# date suffixes. | |
# | |
# E.g. | |
# $ curl -X GET http://localhost:9200/_cat/indices?v\&bytes=b -o indices.lis | |
# $ ./sum-es-indices.py indices.lis | |
# | |
# A second argument of b, k, m, or g can be given to specify the | |
# units in which the "size" of an index is reported. | |
import sys | |
import re | |
import collections | |
import operator | |
import locale | |
locale.setlocale(locale.LC_ALL, 'en_US') | |
open_indices = [] | |
closed_indices = [] | |
prefixes = collections.defaultdict(int) | |
_factors = { 'b': 1, 'k': 1024, 'm': 1024*1024, 'g': 1024*1024*1024 } | |
try: | |
units = sys.argv[2] | |
except IndexError: | |
units = 'b' | |
try: | |
factor = _factors[units] | |
except KeyError: | |
factor = 1 | |
with open(sys.argv[1], "r") as fp: | |
header = fp.readline() | |
header_parts = header.split() | |
for line in fp.readlines(): | |
parts = line[:-1].split() | |
if len(parts) == 2 and parts[0] == "close": | |
closed_indices.append({ 'index': parts[1] }) | |
elif header_parts[3] == 'uuid': | |
open_indices.append({ 'index': parts[2], 'status': parts[1], 'health': parts[0], 'pri': int(parts[4]), 'rep': int(parts[5]), 'docs.count': int(parts[6]), 'docs.deleted': int(parts[7]), 'store.size': int(parts[8]) * factor, 'pri.store.size': int(parts[9]) * factor }) | |
else: | |
open_indices.append({ 'index': parts[2], 'status': parts[1], 'health': parts[0], 'pri': int(parts[3]), 'rep': int(parts[4]), 'docs.count': int(parts[5]), 'docs.deleted': int(parts[6]), 'store.size': int(parts[7]) * factor, 'pri.store.size': int(parts[8]) * factor }) | |
dotdate_r = re.compile(r"(.+)([0-9]{4,})\.([0-9]{2,})\.([0-9]{2,})$") | |
dashdate_r = re.compile(r"(.+)([0-9]{4,})-([0-9]{2,})-([0-9]{2,})$") | |
dashdateym_r = re.compile(r"(.+)([0-9]{4,})-([0-9]{2,})$") | |
numdate_r = re.compile(r"(.+)([0-9]{4,})([0-9]{2,})([0-9]{2,})$") | |
numdateym_r = re.compile(r"(.+)([0-9]{4,})([0-9]{2,})$") | |
patterns = [ dotdate_r, dashdate_r, dashdateym_r, numdate_r, numdateym_r ] | |
def domatch(pat, index_name): | |
m = pat.match(index_name) | |
if m is None: | |
return None | |
prefix = m.group(1) | |
prefixes[prefix] += 1 | |
return prefix | |
for idx in open_indices: | |
name = idx['index'] | |
for pat in patterns: | |
prefix = domatch(pat, name) | |
if prefix: | |
idx['prefix'] = prefix | |
break | |
else: | |
prefixes[name] += 1 | |
for idx in closed_indices: | |
name = idx['index'] | |
for pat in patterns: | |
prefix = domatch(pat, name) | |
if prefix: | |
idx['prefix'] = prefix | |
break | |
else: | |
prefixes[name] += 1 | |
stats = {} | |
for pre in prefixes: | |
closed = 0 | |
for idx in closed_indices: | |
try: | |
prefix = idx['prefix'] | |
except KeyError: | |
prefix = idx['index'] | |
if prefix == pre: | |
closed += 1 | |
opened = 0 | |
green = 0 | |
yellow = 0 | |
red = 0 | |
docs = 0 | |
deleted = 0 | |
size = 0 | |
max_pri = 0 | |
for idx in open_indices: | |
try: | |
prefix = idx['prefix'] | |
except KeyError: | |
prefix = idx['index'] | |
if prefix == pre: | |
opened += 1 | |
if idx['health'] == 'green': | |
green += 1 | |
elif idx['health'] == 'yellow': | |
yellow += 1 | |
else: | |
assert idx['health'] == 'red' | |
red += 1 | |
docs += idx['docs.count'] | |
deleted += idx['docs.deleted'] | |
size += idx['store.size'] | |
max_pri = max(idx['pri'], max_pri) | |
stats[pre] = { 'closed': closed, 'opened': opened, 'green': green, 'yellow': yellow, 'red': red, 'docs': docs, 'deleted': deleted, 'size': size, 'max_pri': max_pri } | |
def n(val): | |
return locale.format("%d", val, grouping=True) | |
def f(val): | |
return locale.format("%0.1f", val, grouping=True) | |
format_str = "%9s %6s %6s %5s %6s %5s %20s %10s %7s %20s %10s %s" | |
print format_str % ("indices", 'closed', 'opened', 'green', 'yellow', 'red', 'docs', 'deleted', 'max_pri', 'size', 'avg/sz', 'prefix') | |
sorted_prefixes = sorted(prefixes.items(), key=operator.itemgetter(1), reverse=True) | |
for pre,v in sorted_prefixes: | |
stat = stats[pre] | |
print format_str % (n(v), n(stat['closed']), n(stat['opened']), n(stat['green']), n(stat['yellow']), n(stat['red']), n(stat['docs']), n(stat['deleted']), stat['max_pri'], f(stat['size']/factor), f((stat['size']/float(stat['docs'])) if stat['docs'] else 0), pre) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment