Created
July 24, 2017 16:37
-
-
Save flou/830756bcdb5064e5e5610d46adaa623b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
List unhealthy services in your ECS clusters | |
The script will list the clusters and fetch details about services that are | |
running in it. | |
Then it determines if the service is healthy or not: | |
* the service status must be ACTIVE | |
* the service must have a non-empty events list | |
* the service desiredCount equals its runningCount | |
* the last event message of the service ends with 'has reached a steady state.' | |
Usage: | |
# List unhealthy services in all ECS clusters | |
$ ecs_sick.py | |
# List unhealthy services in ECS clusters that contains 'dev' in their name | |
$ ecs_sick.py --filter dev | |
""" | |
from __future__ import print_function | |
from argparse import ArgumentParser | |
import boto3 | |
ECS_CLIENT = boto3.client('ecs') | |
AWS_PAGE_SIZE = 10 | |
class Colors: | |
RESET = '\033[0;0m' | |
RED = '\033[1;31m' | |
GREEN = '\033[0;32m' | |
class EcsService(object): | |
""" | |
Utility class that represents an ECS Service. | |
""" | |
def __init__(self, **entries): | |
self.__dict__.update(entries) | |
def is_up(self): | |
""" | |
Check that `service` in ECS is running and stable. | |
The `service` is stable when the last event message ends with 'has reached a | |
steady state.' and its runningCount equals its desiredCount. | |
""" | |
return ( | |
self.desiredCount == self.runningCount and | |
self.events and | |
self.events[0]['message'].endswith('has reached a steady state.') | |
) | |
def is_down(self): | |
""" | |
Check that `service` in ECS is down or unstable. | |
""" | |
return not self.is_up() | |
def status_to_text(self): | |
status_line = '{begin}{text}{end}' | |
if self.is_up(): | |
status = status_line.format(begin=Colors.GREEN, text='[OK]', end=Colors.RESET) | |
else: | |
status = status_line.format(begin=Colors.RED, text='[KO]', end=Colors.RESET) | |
return status | |
def details(self): | |
return '{status} {service:50s} desired {desired} / running {running}'.format( | |
status=self.status_to_text(), service=self.serviceName, | |
desired=self.desiredCount, running=self.runningCount) | |
def chunk(iterable, size=AWS_PAGE_SIZE): | |
""" | |
Split an `iterable` into chunks of `size` items | |
:param iterable: the iterable that is split | |
:param size: the maximum number of items in each chunk | |
""" | |
return (iterable[pos:pos + size] for pos in xrange(0, len(iterable), size)) | |
def __paginate_call(client, method, output_key, params=None): | |
""" | |
Paginate a Boto3 API call on a given method to return a flat list. | |
""" | |
def is_response_success(response): | |
return response['ResponseMetadata']['HTTPStatusCode'] == 200 | |
params = dict() if params is None else params | |
params['PaginationConfig'] = dict(PageSize=AWS_PAGE_SIZE) | |
paginator = client.get_paginator(method) | |
responses = list(paginator.paginate(**params)) | |
if not all([is_response_success(r) for r in responses]): | |
raise Exception('Error during execution of method {method}'.format(method=method)) | |
responses = [r[output_key] for r in responses] | |
return reduce(lambda x, y: x + y, responses) | |
def list_clusters(_filter=None): | |
""" | |
Return available clusters in the AWS account. | |
:param _filter: filters on any part of the name of the cluster | |
""" | |
ecs_clusters = __paginate_call(ECS_CLIENT, 'list_clusters', 'clusterArns') | |
if _filter: | |
ecs_clusters = [cluster for cluster in ecs_clusters if _filter in cluster] | |
return sorted(ecs_clusters) | |
def services_in_cluster(cluster): | |
""" | |
Return running services in the ECS cluster named `cluster`. | |
:param cluster: the cluster name or the cluster ARN | |
""" | |
params = {'cluster': cluster} | |
deployed_services = __paginate_call(ECS_CLIENT, 'list_services', 'serviceArns', params) | |
return [ | |
EcsService(**service) | |
for services in chunk(deployed_services) | |
for service in ECS_CLIENT.describe_services(cluster=cluster, services=services)['services'] | |
] | |
def parse_args(): | |
""" | |
Parse command line arguments. | |
""" | |
parser = ArgumentParser(description='List unhealthy services in your ECS clusters.') | |
parser.add_argument('-f', '--filter', required=False) | |
return parser.parse_args() | |
def main(): | |
""" | |
Main function. | |
""" | |
args = parse_args() | |
clusters = list_clusters(args.filter) | |
for cluster in clusters: | |
deployed_services = services_in_cluster(cluster) | |
down_services = [ | |
s for s in deployed_services | |
if s.status.upper() == 'ACTIVE' and s.is_down() | |
] | |
cluster_name = cluster.split('/')[-1].upper() | |
print('---- {cluster} : {size} services'.format( | |
cluster=cluster_name, size=len(deployed_services))) | |
for service in down_services: | |
print(service.details()) | |
print('') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment