-
-
Save krzko/9523e9aec7eab81a65698e176bbb35e2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import math | |
import textwrap | |
from datetime import timedelta | |
DURATIONS = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} | |
ERROR_RATES = [ | |
0.0001, | |
0.0005, | |
0.001, | |
0.005, | |
0.01, | |
0.05, | |
0.1, | |
0.3, | |
0.5, | |
0.9, | |
1, | |
] | |
# Honeycomb will alert based on the error rate measured over the last 1/4 of | |
# the configured exhaustion interval | |
LOOKBACK_FRACTION = 0.25 | |
def duration(s): | |
if len(s) == 1: | |
count = s | |
unit = "s" | |
else: | |
count = s[:-1] | |
unit = s[-1].lower() | |
if unit not in DURATIONS.keys(): | |
raise ValueError(f"'{unit}' is not a recognised unit of duration") | |
return int(count) * DURATIONS[unit] | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--slo", type=float, default=99.9) | |
parser.add_argument("--slo-interval", type=duration, default="30d") | |
parser.add_argument("--starting-budget", type=int, default=100) | |
parser.add_argument("--exhaustion-interval", type=duration, default="1d") | |
def time_to_alert(error_rate, slo, slo_interval, starting_budget, exhaustion_interval): | |
max_error_rate = starting_budget * (1 - slo) | |
max_error_rate_exhaustion = max_error_rate * (slo_interval/exhaustion_interval) | |
time_to_alert = (LOOKBACK_FRACTION * max_error_rate_exhaustion * exhaustion_interval) / error_rate | |
if time_to_alert > exhaustion_interval: | |
return math.inf | |
return int(time_to_alert) | |
def remaining_budget(error_rate, slo, slo_interval, starting_budget, calculation_interval): | |
slo_error_rate = (1 - slo) | |
burn = (error_rate / slo_error_rate) * (calculation_interval/slo_interval) | |
return starting_budget - burn | |
def duration_to_string(d): | |
return str(timedelta(seconds=d)) | |
def when(d): | |
if d == math.inf: | |
return "never" | |
return f"after {duration_to_string(d)}" | |
def main(): | |
args = parser.parse_args() | |
slo = args.slo/100 | |
slo_interval = args.slo_interval | |
starting_budget = args.starting_budget/100 | |
exhaustion_interval = args.exhaustion_interval | |
print(f"SLO: {slo:.3%} over {duration_to_string(slo_interval)}") | |
print(f"Budget remaining at start: {starting_budget:.2%}") | |
print(f"Burn alert exhaustion interval: {duration_to_string(exhaustion_interval)}") | |
print() | |
print(f" error rate alert fires* budget remaining**") | |
print(f" ---------- ------------ ------------------") | |
for e in ERROR_RATES: | |
t = time_to_alert(e, slo, slo_interval, starting_budget, exhaustion_interval) | |
b = remaining_budget(e, slo, slo_interval, starting_budget, min(t, exhaustion_interval)) | |
print(f"{e:12.2%} {when(t).ljust(27)} {b:8.2%}") | |
print(textwrap.dedent(""" | |
* | |
'never' is only true if the errors stop completely within the exhaustion | |
interval. If they persist then an alert will eventually fire so long as the | |
error rate is above the complement of the SLO (i.e. 0.05% for an SLO of | |
99.95%) and the alert exhaustion interval is less than or equal to the SLO | |
interval. | |
** | |
'budget remaining' indicates the remaining error budget when the alert | |
fires (or, if no alert fires, at the end of the exhaustion interval). You | |
can simulate successive periods by plugging this number back into the | |
--starting-budget argument.""")) | |
if __name__ == "__main__": | |
main() |
Author
krzko
commented
Oct 24, 2022
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment