Skip to content

Instantly share code, notes, and snippets.

@pyToshka
Created March 7, 2025 10:06
Show Gist options
  • Select an option

  • Save pyToshka/f9ace07f5fba7e07d7f144c058ced7ef to your computer and use it in GitHub Desktop.

Select an option

Save pyToshka/f9ace07f5fba7e07d7f144c058ced7ef to your computer and use it in GitHub Desktop.
vk cloud prometheus exporter example
import http.client
import logging
import os
import json_logging
import requests
from flask import Flask, Response
from flask_caching import Cache
from healthcheck import HealthCheck, EnvironmentDump
from prometheus_client import CollectorRegistry, generate_latest
from prometheus_client.metrics_core import (
GaugeMetricFamily,
)
config = {
"DEBUG": True,
"CACHE_TYPE": "SimpleCache",
"CACHE_DEFAULT_TIMEOUT": 300,
}
app = Flask(__name__)
app.config.from_mapping(config)
cache = Cache(app)
log_level = os.environ.get("EXPORTER_LOG_LEVEL", "INFO")
enable_json_formatter = os.environ.get("ENABLE_JSON_FORMATTER", "true")
vk_regions = os.environ.get("VK_REGIONS", "msk,kz").split(",")
json_logging.init_flask(enable_json=enable_json_formatter)
json_logging.init_request_instrument(app)
logger = logging.getLogger("vk-services-exporter")
logging.basicConfig()
json_logging.config_root_logger()
logger.setLevel(log_level)
logging.addLevelName(logging.ERROR, "error")
logging.addLevelName(logging.CRITICAL, "critical")
logging.addLevelName(logging.WARNING, "warning")
logging.addLevelName(logging.INFO, "info")
logging.addLevelName(logging.DEBUG, "debug")
logger.addHandler(logging.NullHandler())
health = HealthCheck()
environment_dump = EnvironmentDump(include_process=False)
app.add_url_rule("/healthcheck", "healthcheck", view_func=lambda: health.run())
app.add_url_rule(
"/environment",
"environment",
view_func=lambda: environment_dump.run(),
)
def get_regions(regions):
dc = []
for region in regions:
dc.append(region.get("attributes").get("name"))
return dc
class VkCloudCollectorAll(CollectorRegistry):
def __init__(self):
super().__init__()
logger.info("Start collector")
def collect(self):
# self.past_incidents()
number_of_affected_components_metric = GaugeMetricFamily(
"vk_current_number_of_affected_components",
"VK Current number of affected components", labels=["region"])
number_of_components_under_incident_metric = GaugeMetricFamily(
"vk_current_number_of_components_under_incident",
"VK Current number of components under incident", labels=["region"])
number_of_components_under_maintenance_metric = GaugeMetricFamily(
"vk_current_number_of_components_under_maintenance",
"VK Current number of components under maintenance", labels=["region"])
number_of_maintenances_metric = GaugeMetricFamily("vk_current_number_of_maintenances",
"VK Current number of maintenances", labels=["region"])
number_of_incidents_metric = GaugeMetricFamily("vk_current_number_of_incidents",
"VK Current number of incidents", labels=["region"])
for region in vk_regions:
get_current_status = requests.get(
f"https://status.msk.cloud.vk.com/instrapi/api/components/current-status/{region}",
timeout=15,
).json()
number_of_affected_components = get_current_status["data"].get("numberOfAffectedComponents")
number_of_components_under_incident = get_current_status["data"].get("numberOfComponentsUnderIncident")
number_of_components_under_maintenance = get_current_status["data"].get(
"numberOfComponentsUnderMaintenance")
number_of_maintenances = get_current_status["data"].get("numberOfMaintenances")
number_of_incidents = get_current_status.get("data").get("numberOfIncidents")
number_of_affected_components_metric.add_metric([region], value=number_of_affected_components)
number_of_components_under_incident_metric.add_metric([region], value=number_of_components_under_incident)
number_of_components_under_maintenance_metric.add_metric([region],
value=number_of_components_under_maintenance)
number_of_maintenances_metric.add_metric([region], value=number_of_maintenances)
number_of_incidents_metric.add_metric([region], value=number_of_incidents)
headers = {"Accept-Language": "en-US,en;q=0.5"}
status = None
incident_value = None
get_past_incidents = requests.get(
f"https://status.msk.cloud.vk.com/instrapi/api/incidents?populate=*",
timeout=15, headers=headers
).json()
past_incidents_metric = GaugeMetricFamily(
"vk_past_incidents",
"VK previous incidents metric",
labels=["uuid", "name", "initial_date", "created_at", "updated_at", "published_at", "region",
"postmortem", "status"])
for past_incidents in get_past_incidents['data']:
uuid = past_incidents.get('attributes').get('uuid')
name = past_incidents.get("attributes").get('name')
initial_date = past_incidents.get("attributes").get('initial_date')
created_at = past_incidents.get("attributes").get('createdAt')
updated_at = past_incidents.get("attributes").get('updatedAt')
published_at = past_incidents.get("attributes").get('publishedAt')
region = ','.join(
[str(elem) for elem in get_regions(past_incidents.get('attributes').get('regions').get('data'))])
postmortem = past_incidents.get("attributes").get('postmortem').get('data')
if not postmortem:
postmortem = "No Data"
incident_updates = past_incidents.get("attributes").get('incidentUpdate')
for incident_update in incident_updates:
if incident_update.get("status") == "resolved":
status = "resolved"
incident_value = 0
else:
status = "in progress"
incident_value = 1
past_incidents_metric.add_metric(
[uuid, name, initial_date, created_at, updated_at, published_at, region, postmortem, status],
value=incident_value)
yield number_of_affected_components_metric
yield number_of_components_under_incident_metric
yield number_of_components_under_maintenance_metric
yield number_of_maintenances_metric
yield number_of_incidents_metric
yield past_incidents_metric
@staticmethod
def past_incidents():
headers = {"Accept-Language": "en-US,en;q=0.5"}
get_past_incidents = requests.get(
f"https://status.msk.cloud.vk.com/instrapi/api/incidents?populate=*",
timeout=15, headers=headers
).json()
past_incidents_metric = GaugeMetricFamily(
"vk_current_number_of_affected_components",
"VK previous incidents metric",
labels=["uuid", "name", "initial_date", "created_at", "updated_at", "published_at", "region", "message",
"postmortem", "status"])
for past_incidents in get_past_incidents['data']:
uuid = past_incidents.get('attributes').get('uuid')
name = past_incidents.get("attributes").get('name')
initial_date = past_incidents.get("attributes").get('initial_date')
created_at = past_incidents.get("attributes").get('createdAt')
updated_at = past_incidents.get("attributes").get('updatedAt')
published_at = past_incidents.get("attributes").get('publishedAt')
region = get_regions(past_incidents.get('attributes').get('regions').get('data'))
message = past_incidents.get("attributes").get('message')
postmortem = past_incidents.get("attributes").get('postmortem')
status = past_incidents.get("attributes").get('incidentUpdate')
past_incidents_metric.add_metric(
[uuid, name, initial_date, created_at, updated_at, published_at, region, message, postmortem, status],
value=0)
yield past_incidents_metric
@staticmethod
def severity_handler(incident):
if incident.get("status") == "resolved":
return 0
elif incident.get("levelId") == 1:
return 1
elif incident.get("levelId") == 2:
return 2
else:
return 3
@app.route("/")
@cache.cached(timeout=50, key_prefix="home")
def home():
exporter = """
<html><head><title>VK Cloud Exporter</title></head>
<body>
<h1>VK Cloud Status Exporter for Prometheus </h1>
<p><a href="/metrics">Metrics</a></p>
<p><a href="/healthcheck">Status</a></p>
<p><a href="/environment">Environment</a></p>
</body>
</html>
"""
return exporter
@app.route("/metrics", methods=["GET"])
@cache.cached(timeout=50, key_prefix="all_services")
def get_all_metrics():
registry = CollectorRegistry()
registry.register(VkCloudCollectorAll())
return Response(generate_latest(registry), mimetype="text/plain")
if __name__ == "__main__":
listen_port = os.getenv("EXPORTER_PORT", default="5000")
if log_level == "DEBUG":
http.client.HTTPConnection.debuglevel = 1
logger.info("Starting VK Cloud Status prometheus exporter")
cache.app.run("0.0.0.0", int(listen_port)) # nosec B104
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment