-
-
Save Shaverdoff/4243c9529f28b263e192f37eb33016c8 to your computer and use it in GitHub Desktop.
vk cloud prometheus exporter example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import http.client | |
| import logging | |
| import os | |
| import json_logging | |
| import requests | |
| from flask import Flask, Response | |
| from flask_caching import Cache | |
| from healthcheck import HealthCheck, EnvironmentDump | |
| from prometheus_client import CollectorRegistry, generate_latest | |
| from prometheus_client.metrics_core import ( | |
| GaugeMetricFamily, | |
| ) | |
| config = { | |
| "DEBUG": True, | |
| "CACHE_TYPE": "SimpleCache", | |
| "CACHE_DEFAULT_TIMEOUT": 300, | |
| } | |
| app = Flask(__name__) | |
| app.config.from_mapping(config) | |
| cache = Cache(app) | |
| log_level = os.environ.get("EXPORTER_LOG_LEVEL", "INFO") | |
| enable_json_formatter = os.environ.get("ENABLE_JSON_FORMATTER", "true") | |
| vk_regions = os.environ.get("VK_REGIONS", "msk,kz").split(",") | |
| json_logging.init_flask(enable_json=enable_json_formatter) | |
| json_logging.init_request_instrument(app) | |
| logger = logging.getLogger("vk-services-exporter") | |
| logging.basicConfig() | |
| json_logging.config_root_logger() | |
| logger.setLevel(log_level) | |
| logging.addLevelName(logging.ERROR, "error") | |
| logging.addLevelName(logging.CRITICAL, "critical") | |
| logging.addLevelName(logging.WARNING, "warning") | |
| logging.addLevelName(logging.INFO, "info") | |
| logging.addLevelName(logging.DEBUG, "debug") | |
| logger.addHandler(logging.NullHandler()) | |
| health = HealthCheck() | |
| environment_dump = EnvironmentDump(include_process=False) | |
| app.add_url_rule("/healthcheck", "healthcheck", view_func=lambda: health.run()) | |
| app.add_url_rule( | |
| "/environment", | |
| "environment", | |
| view_func=lambda: environment_dump.run(), | |
| ) | |
| def get_regions(regions): | |
| dc = [] | |
| for region in regions: | |
| dc.append(region.get("attributes").get("name")) | |
| return dc | |
| class VkCloudCollectorAll(CollectorRegistry): | |
| def __init__(self): | |
| super().__init__() | |
| logger.info("Start collector") | |
| def collect(self): | |
| # self.past_incidents() | |
| number_of_affected_components_metric = GaugeMetricFamily( | |
| "vk_current_number_of_affected_components", | |
| "VK Current number of affected components", labels=["region"]) | |
| number_of_components_under_incident_metric = GaugeMetricFamily( | |
| "vk_current_number_of_components_under_incident", | |
| "VK Current number of components under incident", labels=["region"]) | |
| number_of_components_under_maintenance_metric = GaugeMetricFamily( | |
| "vk_current_number_of_components_under_maintenance", | |
| "VK Current number of components under maintenance", labels=["region"]) | |
| number_of_maintenances_metric = GaugeMetricFamily("vk_current_number_of_maintenances", | |
| "VK Current number of maintenances", labels=["region"]) | |
| number_of_incidents_metric = GaugeMetricFamily("vk_current_number_of_incidents", | |
| "VK Current number of incidents", labels=["region"]) | |
| for region in vk_regions: | |
| get_current_status = requests.get( | |
| f"https://status.msk.cloud.vk.com/instrapi/api/components/current-status/{region}", | |
| timeout=15, | |
| ).json() | |
| number_of_affected_components = get_current_status["data"].get("numberOfAffectedComponents") | |
| number_of_components_under_incident = get_current_status["data"].get("numberOfComponentsUnderIncident") | |
| number_of_components_under_maintenance = get_current_status["data"].get( | |
| "numberOfComponentsUnderMaintenance") | |
| number_of_maintenances = get_current_status["data"].get("numberOfMaintenances") | |
| number_of_incidents = get_current_status.get("data").get("numberOfIncidents") | |
| number_of_affected_components_metric.add_metric([region], value=number_of_affected_components) | |
| number_of_components_under_incident_metric.add_metric([region], value=number_of_components_under_incident) | |
| number_of_components_under_maintenance_metric.add_metric([region], | |
| value=number_of_components_under_maintenance) | |
| number_of_maintenances_metric.add_metric([region], value=number_of_maintenances) | |
| number_of_incidents_metric.add_metric([region], value=number_of_incidents) | |
| headers = {"Accept-Language": "en-US,en;q=0.5"} | |
| status = None | |
| incident_value = None | |
| get_past_incidents = requests.get( | |
| f"https://status.msk.cloud.vk.com/instrapi/api/incidents?populate=*", | |
| timeout=15, headers=headers | |
| ).json() | |
| past_incidents_metric = GaugeMetricFamily( | |
| "vk_past_incidents", | |
| "VK previous incidents metric", | |
| labels=["uuid", "name", "initial_date", "created_at", "updated_at", "published_at", "region", | |
| "postmortem", "status"]) | |
| for past_incidents in get_past_incidents['data']: | |
| uuid = past_incidents.get('attributes').get('uuid') | |
| name = past_incidents.get("attributes").get('name') | |
| initial_date = past_incidents.get("attributes").get('initial_date') | |
| created_at = past_incidents.get("attributes").get('createdAt') | |
| updated_at = past_incidents.get("attributes").get('updatedAt') | |
| published_at = past_incidents.get("attributes").get('publishedAt') | |
| region = ','.join( | |
| [str(elem) for elem in get_regions(past_incidents.get('attributes').get('regions').get('data'))]) | |
| postmortem = past_incidents.get("attributes").get('postmortem').get('data') | |
| if not postmortem: | |
| postmortem = "No Data" | |
| incident_updates = past_incidents.get("attributes").get('incidentUpdate') | |
| for incident_update in incident_updates: | |
| if incident_update.get("status") == "resolved": | |
| status = "resolved" | |
| incident_value = 0 | |
| else: | |
| status = "in progress" | |
| incident_value = 1 | |
| past_incidents_metric.add_metric( | |
| [uuid, name, initial_date, created_at, updated_at, published_at, region, postmortem, status], | |
| value=incident_value) | |
| yield number_of_affected_components_metric | |
| yield number_of_components_under_incident_metric | |
| yield number_of_components_under_maintenance_metric | |
| yield number_of_maintenances_metric | |
| yield number_of_incidents_metric | |
| yield past_incidents_metric | |
| @staticmethod | |
| def past_incidents(): | |
| headers = {"Accept-Language": "en-US,en;q=0.5"} | |
| get_past_incidents = requests.get( | |
| f"https://status.msk.cloud.vk.com/instrapi/api/incidents?populate=*", | |
| timeout=15, headers=headers | |
| ).json() | |
| past_incidents_metric = GaugeMetricFamily( | |
| "vk_current_number_of_affected_components", | |
| "VK previous incidents metric", | |
| labels=["uuid", "name", "initial_date", "created_at", "updated_at", "published_at", "region", "message", | |
| "postmortem", "status"]) | |
| for past_incidents in get_past_incidents['data']: | |
| uuid = past_incidents.get('attributes').get('uuid') | |
| name = past_incidents.get("attributes").get('name') | |
| initial_date = past_incidents.get("attributes").get('initial_date') | |
| created_at = past_incidents.get("attributes").get('createdAt') | |
| updated_at = past_incidents.get("attributes").get('updatedAt') | |
| published_at = past_incidents.get("attributes").get('publishedAt') | |
| region = get_regions(past_incidents.get('attributes').get('regions').get('data')) | |
| message = past_incidents.get("attributes").get('message') | |
| postmortem = past_incidents.get("attributes").get('postmortem') | |
| status = past_incidents.get("attributes").get('incidentUpdate') | |
| past_incidents_metric.add_metric( | |
| [uuid, name, initial_date, created_at, updated_at, published_at, region, message, postmortem, status], | |
| value=0) | |
| yield past_incidents_metric | |
| @staticmethod | |
| def severity_handler(incident): | |
| if incident.get("status") == "resolved": | |
| return 0 | |
| elif incident.get("levelId") == 1: | |
| return 1 | |
| elif incident.get("levelId") == 2: | |
| return 2 | |
| else: | |
| return 3 | |
| @app.route("/") | |
| @cache.cached(timeout=50, key_prefix="home") | |
| def home(): | |
| exporter = """ | |
| <html><head><title>VK Cloud Exporter</title></head> | |
| <body> | |
| <h1>VK Cloud Status Exporter for Prometheus </h1> | |
| <p><a href="/metrics">Metrics</a></p> | |
| <p><a href="/healthcheck">Status</a></p> | |
| <p><a href="/environment">Environment</a></p> | |
| </body> | |
| </html> | |
| """ | |
| return exporter | |
| @app.route("/metrics", methods=["GET"]) | |
| @cache.cached(timeout=50, key_prefix="all_services") | |
| def get_all_metrics(): | |
| registry = CollectorRegistry() | |
| registry.register(VkCloudCollectorAll()) | |
| return Response(generate_latest(registry), mimetype="text/plain") | |
| if __name__ == "__main__": | |
| listen_port = os.getenv("EXPORTER_PORT", default="5000") | |
| if log_level == "DEBUG": | |
| http.client.HTTPConnection.debuglevel = 1 | |
| logger.info("Starting VK Cloud Status prometheus exporter") | |
| cache.app.run("0.0.0.0", int(listen_port)) # nosec B104 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment