chore(slo): simplify error budget calculations (#158941)

This commit is contained in:
Kevin Delemme 2023-06-04 20:43:39 -04:00 committed by GitHub
parent 8e7e2632bb
commit 4af1bd5443
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 317 additions and 570 deletions

View file

@ -11,6 +11,7 @@ export const SLO_INDEX_TEMPLATE_NAME = '.slo-observability.sli';
export const SLO_RESOURCES_VERSION = 1;
export const SLO_INGEST_PIPELINE_NAME = `${SLO_INDEX_TEMPLATE_NAME}.monthly`;
export const SLO_DESTINATION_INDEX_NAME = `${SLO_INDEX_TEMPLATE_NAME}-v${SLO_RESOURCES_VERSION}`;
export const SLO_DESTINATION_INDEX_PATTERN = `${SLO_DESTINATION_INDEX_NAME}*`;
export const getSLOTransformId = (sloId: string, sloRevision: number) =>
`slo-${sloId}-${sloRevision}`;

View file

@ -1,197 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { oneMinute } from '../../services/slo/fixtures/duration';
import { createSLO } from '../../services/slo/fixtures/slo';
import { sevenDaysRolling, weeklyCalendarAligned } from '../../services/slo/fixtures/time_window';
import { computeErrorBudget } from './compute_error_budget';
import { toDateRange } from './date_range';
describe('computeErrorBudget', () => {
describe('for rolling time window', () => {
describe('for occurrences budgeting method', () => {
it('computes the error budget', () => {
const slo = createSLO({
budgetingMethod: 'occurrences',
timeWindow: sevenDaysRolling(),
objective: { target: 0.95 },
});
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, {
good: 97,
total: 100,
dateRange,
});
expect(errorBudget).toEqual({
initial: 0.05,
consumed: 0.6,
remaining: 0.4,
isEstimated: false,
});
});
});
describe('for timeslices budgeting method', () => {
it('computes the error budget', () => {
const slo = createSLO({
budgetingMethod: 'timeslices',
timeWindow: sevenDaysRolling(),
objective: { target: 0.95, timesliceTarget: 0.95, timesliceWindow: oneMinute() },
});
const dateRange = toDateRange(slo.timeWindow);
// 7 days sliced in 1m buckets = 10,080 slices
const errorBudget = computeErrorBudget(slo, {
good: 9987,
total: 10080,
dateRange,
});
expect(errorBudget).toEqual({
initial: 0.05,
consumed: 0.184524,
remaining: 0.815476,
isEstimated: false,
});
});
});
});
describe('for calendar aligned time window', () => {
describe('for occurrences budgeting method', () => {
beforeEach(() => {
jest.useFakeTimers({ now: new Date('2023-05-09') });
});
it('computes the error budget with an estimation of total events', () => {
const slo = createSLO({
budgetingMethod: 'occurrences',
timeWindow: weeklyCalendarAligned(),
objective: { target: 0.95 },
});
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, {
good: 97,
total: 100,
dateRange,
});
expect(errorBudget).toEqual({
initial: 0.05,
consumed: 0.171429,
remaining: 0.828571,
isEstimated: true,
});
});
});
describe('for timeslices budgeting method', () => {
it('computes the error budget', () => {
const slo = createSLO({
budgetingMethod: 'timeslices',
timeWindow: weeklyCalendarAligned(),
objective: { target: 0.95, timesliceTarget: 0.95, timesliceWindow: oneMinute() },
});
const dateRange = toDateRange(slo.timeWindow);
// 2 days sliced in 1m buckets = 2,880 slices (slices we have data for) = total
// 7 days sliced in 1m buckets = 10,080 slices (all slices for the window) = window_total
const errorBudget = computeErrorBudget(slo, {
good: 2823,
total: 2880,
dateRange,
});
// error rate = (total - good) / window_total = (2880 - 2823) / 10080 = 0.00565476
// consumed = error rate / error budget = 0.00565476 / 0.05 = 0.1130952
expect(errorBudget).toEqual({
initial: 0.05,
consumed: 0.113106,
remaining: 0.886894,
isEstimated: false,
});
});
});
});
it("returns default values when total events is '0'", () => {
const slo = createSLO();
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, { good: 100, total: 0, dateRange });
expect(errorBudget).toEqual({
initial: 0.001, // 0.1%
consumed: 0, // 0% consumed
remaining: 1, // 100% remaining
isEstimated: false,
});
});
it("returns default values when 'good >= total' events", () => {
const slo = createSLO();
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, { good: 9999, total: 9, dateRange });
expect(errorBudget).toEqual({
initial: 0.001,
consumed: 0,
remaining: 1,
isEstimated: false,
});
});
it('computes the error budget with all good events', () => {
const slo = createSLO();
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, { good: 100, total: 100, dateRange });
expect(errorBudget).toEqual({
initial: 0.001,
consumed: 0,
remaining: 1,
isEstimated: false,
});
});
it('computes the error budget when exactly consumed', () => {
const slo = createSLO();
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, { good: 999, total: 1000, dateRange });
expect(errorBudget).toEqual({
initial: 0.001,
consumed: 1,
remaining: 0,
isEstimated: false,
});
});
it('computes the error budget with rounded values', () => {
const slo = createSLO();
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, { good: 770, total: 777, dateRange });
expect(errorBudget).toEqual({
initial: 0.001,
consumed: 9.009009, // i.e. 900.90% consumed
remaining: -8.009009, // i.e. -800.90% remaining
isEstimated: false,
});
});
it('computes the error budget with no good events', () => {
const slo = createSLO();
const dateRange = toDateRange(slo.timeWindow);
const errorBudget = computeErrorBudget(slo, { good: 0, total: 100, dateRange });
expect(errorBudget).toEqual({
initial: 0.001,
consumed: 1000, // i.e. 100,000% consumed
remaining: -999,
isEstimated: false,
});
});
});

View file

@ -1,101 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import moment from 'moment';
import {
calendarAlignedTimeWindowSchema,
Duration,
occurrencesBudgetingMethodSchema,
rollingTimeWindowSchema,
timeslicesBudgetingMethodSchema,
} from '@kbn/slo-schema';
import { DateRange, ErrorBudget, IndicatorData, SLO, toMomentUnitOfTime } from '../models';
import { toHighPrecision } from '../../utils/number';
// More details about calculus: https://github.com/elastic/kibana/issues/143980
export function computeErrorBudget(slo: SLO, sliData: IndicatorData): ErrorBudget {
const { good, total } = sliData;
if (total === 0 || good >= total) {
const initialErrorBudget = 1 - slo.objective.target;
return toErrorBudget(initialErrorBudget, 0);
}
if (rollingTimeWindowSchema.is(slo.timeWindow)) {
return computeForRolling(slo, sliData);
}
if (calendarAlignedTimeWindowSchema.is(slo.timeWindow)) {
if (timeslicesBudgetingMethodSchema.is(slo.budgetingMethod)) {
return computeForCalendarAlignedWithTimeslices(slo, sliData);
}
if (occurrencesBudgetingMethodSchema.is(slo.budgetingMethod)) {
return computeForCalendarAlignedWithOccurrences(slo, sliData);
}
}
throw new Error('Invalid slo time window');
}
function computeForRolling(slo: SLO, sliData: IndicatorData) {
const { good, total } = sliData;
const initialErrorBudget = 1 - slo.objective.target;
const consumedErrorBudget = (total - good) / (total * initialErrorBudget);
return toErrorBudget(initialErrorBudget, consumedErrorBudget);
}
function computeForCalendarAlignedWithOccurrences(slo: SLO, sliData: IndicatorData) {
const { good, total, dateRange } = sliData;
const initialErrorBudget = 1 - slo.objective.target;
const now = moment();
const durationCalendarPeriod = moment(dateRange.to).diff(dateRange.from, 'minutes');
const durationSinceBeginning = now.isAfter(dateRange.to)
? durationCalendarPeriod
: moment(now).diff(dateRange.from, 'minutes');
const totalEventsEstimatedAtPeriodEnd = Math.round(
(total / durationSinceBeginning) * durationCalendarPeriod
);
const consumedErrorBudget =
(total - good) / (totalEventsEstimatedAtPeriodEnd * initialErrorBudget);
return toErrorBudget(initialErrorBudget, consumedErrorBudget, true);
}
function computeForCalendarAlignedWithTimeslices(slo: SLO, sliData: IndicatorData) {
const { good, total, dateRange } = sliData;
const initialErrorBudget = 1 - slo.objective.target;
const totalSlices = computeTotalSlicesFromDateRange(dateRange, slo.objective.timesliceWindow!);
const consumedErrorBudget = (total - good) / (totalSlices * initialErrorBudget);
return toErrorBudget(initialErrorBudget, consumedErrorBudget);
}
export function computeTotalSlicesFromDateRange(dateRange: DateRange, timesliceWindow: Duration) {
const dateRangeDurationInUnit = moment(dateRange.to).diff(
dateRange.from,
toMomentUnitOfTime(timesliceWindow.unit)
);
const totalSlices = Math.ceil(dateRangeDurationInUnit / timesliceWindow!.value);
return totalSlices;
}
export function toErrorBudget(
initial: number,
consumed: number,
isEstimated: boolean = false
): ErrorBudget {
return {
initial: toHighPrecision(initial),
consumed: toHighPrecision(consumed),
remaining: toHighPrecision(1 - consumed),
isEstimated,
};
}

View file

@ -9,18 +9,18 @@ import { computeSLI } from './compute_sli';
describe('computeSLI', () => {
it('returns -1 when no total events', () => {
expect(computeSLI({ good: 100, total: 0 })).toEqual(-1);
expect(computeSLI(100, 0)).toEqual(-1);
});
it('returns the sli value', () => {
expect(computeSLI({ good: 100, total: 1000 })).toEqual(0.1);
expect(computeSLI(100, 1000)).toEqual(0.1);
});
it('returns 1 when good is greater than total events', () => {
expect(computeSLI({ good: 9999, total: 9 })).toEqual(1);
expect(computeSLI(9999, 9)).toEqual(1);
});
it('returns rounds the value to 6 digits', () => {
expect(computeSLI({ good: 33, total: 90 })).toEqual(0.366667);
expect(computeSLI(33, 90)).toEqual(0.366667);
});
});

View file

@ -5,13 +5,11 @@
* 2.0.
*/
import { IndicatorData } from '../models';
import { toHighPrecision } from '../../utils/number';
const NO_DATA = -1;
export function computeSLI(sliData: Pick<IndicatorData, 'good' | 'total'>): number {
const { good, total } = sliData;
export function computeSLI(good: number, total: number): number {
if (total === 0) {
return NO_DATA;
}

View file

@ -12,7 +12,7 @@ export function computeSummaryStatus(slo: SLO, sliValue: number, errorBudget: Er
return 'NO_DATA';
}
if (slo.objective.target <= sliValue) {
if (sliValue >= slo.objective.target) {
return 'HEALTHY';
} else {
return errorBudget.remaining > 0 ? 'DEGRADING' : 'VIOLATED';

View file

@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { toHighPrecision } from '../../utils/number';
import { ErrorBudget } from '../models';
export function toErrorBudget(
initial: number,
consumed: number,
isEstimated: boolean = false
): ErrorBudget {
return {
initial: toHighPrecision(initial),
consumed: toHighPrecision(consumed),
remaining: toHighPrecision(1 - consumed),
isEstimated,
};
}

View file

@ -6,7 +6,7 @@
*/
export * from './compute_burn_rate';
export * from './compute_error_budget';
export * from './error_budget';
export * from './compute_sli';
export * from './compute_summary_status';
export * from './date_range';

View file

@ -9,6 +9,7 @@ import { MsearchMultisearchBody } from '@elastic/elasticsearch/lib/api/typesWith
import { ElasticsearchClient } from '@kbn/core/server';
import {
calendarAlignedTimeWindowSchema,
Duration,
occurrencesBudgetingMethodSchema,
rollingTimeWindowSchema,
timeslicesBudgetingMethodSchema,
@ -16,13 +17,11 @@ import {
} from '@kbn/slo-schema';
import { assertNever } from '@kbn/std';
import moment from 'moment';
import { SLO_DESTINATION_INDEX_NAME } from '../../assets/constants';
import { SLO_DESTINATION_INDEX_PATTERN } from '../../assets/constants';
import { DateRange, HistoricalSummary, SLO, SLOId } from '../../domain/models';
import {
computeSLI,
computeSummaryStatus,
computeTotalSlicesFromDateRange,
toDateRange,
toErrorBudget,
} from '../../domain/services';
@ -59,7 +58,7 @@ export class DefaultHistoricalSummaryClient implements HistoricalSummaryClient {
}, {});
const searches = sloList.flatMap((slo) => [
{ index: `${SLO_DESTINATION_INDEX_NAME}*` },
{ index: SLO_DESTINATION_INDEX_PATTERN },
generateSearchQuery(slo, dateRangeBySlo[slo.id]),
]);
@ -98,11 +97,9 @@ export class DefaultHistoricalSummaryClient implements HistoricalSummaryClient {
}
if (occurrencesBudgetingMethodSchema.is(slo.budgetingMethod)) {
const dateRange = dateRangeBySlo[slo.id];
historicalSummaryBySlo[slo.id] = handleResultForCalendarAlignedAndOccurrences(
slo,
buckets,
dateRange
buckets
);
continue;
}
@ -119,29 +116,15 @@ export class DefaultHistoricalSummaryClient implements HistoricalSummaryClient {
function handleResultForCalendarAlignedAndOccurrences(
slo: SLO,
buckets: DailyAggBucket[],
dateRange: DateRange
buckets: DailyAggBucket[]
): HistoricalSummary[] {
const initialErrorBudget = 1 - slo.objective.target;
return buckets.map((bucket: DailyAggBucket): HistoricalSummary => {
const good = bucket.cumulative_good?.value ?? 0;
const total = bucket.cumulative_total?.value ?? 0;
const sliValue = computeSLI({ good, total });
const durationCalendarPeriod = moment(dateRange.to).diff(dateRange.from, 'minutes');
const bucketDate = moment(bucket.key_as_string);
const durationSinceBeginning = bucketDate.isSameOrAfter(dateRange.to)
? durationCalendarPeriod
: moment(bucketDate).diff(dateRange.from, 'minutes');
const totalEventsEstimatedAtPeriodEnd = Math.round(
(total / durationSinceBeginning) * durationCalendarPeriod
);
const consumedErrorBudget =
(total - good) / (totalEventsEstimatedAtPeriodEnd * initialErrorBudget);
const sliValue = computeSLI(good, total);
const consumedErrorBudget = sliValue < 0 ? 0 : (1 - sliValue) / initialErrorBudget;
const errorBudget = toErrorBudget(initialErrorBudget, consumedErrorBudget, true);
return {
@ -163,7 +146,7 @@ function handleResultForCalendarAlignedAndTimeslices(
return buckets.map((bucket: DailyAggBucket): HistoricalSummary => {
const good = bucket.cumulative_good?.value ?? 0;
const total = bucket.cumulative_total?.value ?? 0;
const sliValue = computeSLI({ good, total });
const sliValue = computeSLI(good, total);
const totalSlices = computeTotalSlicesFromDateRange(dateRange, slo.objective.timesliceWindow!);
const consumedErrorBudget = (total - good) / (totalSlices * initialErrorBudget);
const errorBudget = toErrorBudget(initialErrorBudget, consumedErrorBudget);
@ -190,8 +173,8 @@ function handleResultForRolling(slo: SLO, buckets: DailyAggBucket[]): Historical
.map((bucket: DailyAggBucket): HistoricalSummary => {
const good = bucket.cumulative_good?.value ?? 0;
const total = bucket.cumulative_total?.value ?? 0;
const sliValue = computeSLI({ good, total });
const consumedErrorBudget = total === 0 ? 0 : (total - good) / (total * initialErrorBudget);
const sliValue = computeSLI(good, total);
const consumedErrorBudget = sliValue < 0 ? 0 : (1 - sliValue) / initialErrorBudget;
const errorBudget = toErrorBudget(initialErrorBudget, consumedErrorBudget);
return {
@ -305,6 +288,14 @@ function getDateRange(slo: SLO) {
assertNever(slo.timeWindow);
}
function computeTotalSlicesFromDateRange(dateRange: DateRange, timesliceWindow: Duration) {
const dateRangeDurationInUnit = moment(dateRange.to).diff(
dateRange.from,
toMomentUnitOfTime(timesliceWindow.unit)
);
return Math.ceil(dateRangeDurationInUnit / timesliceWindow!.value);
}
export function getFixedIntervalAndBucketsPerDay(durationInDays: number): {
fixedInterval: string;
bucketsPerDay: number;

View file

@ -13,13 +13,12 @@ import {
MsearchMultisearchBody,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient } from '@kbn/core/server';
import { assertNever } from '@kbn/std';
import { occurrencesBudgetingMethodSchema, timeslicesBudgetingMethodSchema } from '@kbn/slo-schema';
import { SLO_DESTINATION_INDEX_NAME } from '../../assets/constants';
import { assertNever } from '@kbn/std';
import { SLO_DESTINATION_INDEX_PATTERN } from '../../assets/constants';
import { DateRange, Duration, IndicatorData, SLO } from '../../domain/models';
import { toDateRange } from '../../domain/services/date_range';
import { InternalQueryError } from '../../errors';
import { DateRange, Duration, IndicatorData, SLO } from '../../domain/models';
export interface SLIClient {
fetchSLIDataFrom(
@ -56,7 +55,7 @@ export class DefaultSLIClient implements SLIClient {
if (occurrencesBudgetingMethodSchema.is(slo.budgetingMethod)) {
const result = await this.esClient.search<unknown, EsAggregations>({
...commonQuery(slo, longestDateRange),
index: `${SLO_DESTINATION_INDEX_NAME}*`,
index: SLO_DESTINATION_INDEX_PATTERN,
aggs: toLookbackWindowsAggregationsQuery(sortedLookbackWindows),
});
@ -66,7 +65,7 @@ export class DefaultSLIClient implements SLIClient {
if (timeslicesBudgetingMethodSchema.is(slo.budgetingMethod)) {
const result = await this.esClient.search<unknown, EsAggregations>({
...commonQuery(slo, longestDateRange),
index: `${SLO_DESTINATION_INDEX_NAME}*`,
index: SLO_DESTINATION_INDEX_PATTERN,
aggs: toLookbackWindowsSlicedAggregationsQuery(slo, sortedLookbackWindows),
});
@ -159,8 +158,8 @@ function handleWindowedResult(
}
const indicatorDataPerLookbackWindow: Record<WindowName, IndicatorData> = {};
lookbackWindows.forEach((lookbackWindow) => {
const windowAggBuckets = aggregations[lookbackWindow.name]?.buckets;
for (const lookbackWindow of lookbackWindows) {
const windowAggBuckets = aggregations[lookbackWindow.name]?.buckets ?? [];
if (!Array.isArray(windowAggBuckets) || windowAggBuckets.length === 0) {
throw new InternalQueryError('Invalid aggregation bucket response');
}
@ -176,7 +175,7 @@ function handleWindowedResult(
total,
dateRange: { from: new Date(bucket.from_as_string!), to: new Date(bucket.to_as_string!) },
};
});
}
return indicatorDataPerLookbackWindow;
}

View file

@ -7,11 +7,18 @@
import { MsearchMultisearchBody } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient } from '@kbn/core/server';
import { occurrencesBudgetingMethodSchema, timeslicesBudgetingMethodSchema } from '@kbn/slo-schema';
import { SLO_DESTINATION_INDEX_NAME } from '../../assets/constants';
import { toDateRange } from '../../domain/services/date_range';
import {
calendarAlignedTimeWindowSchema,
Duration,
occurrencesBudgetingMethodSchema,
timeslicesBudgetingMethodSchema,
toMomentUnitOfTime,
} from '@kbn/slo-schema';
import moment from 'moment';
import { SLO_DESTINATION_INDEX_PATTERN } from '../../assets/constants';
import { DateRange, SLO, SLOId, Summary } from '../../domain/models';
import { computeErrorBudget, computeSLI, computeSummaryStatus } from '../../domain/services';
import { computeSLI, computeSummaryStatus, toErrorBudget } from '../../domain/services';
import { toDateRange } from '../../domain/services/date_range';
export interface SummaryClient {
fetchSummary(sloList: SLO[]): Promise<Record<SLOId, Summary>>;
@ -26,7 +33,7 @@ export class DefaultSummaryClient implements SummaryClient {
return acc;
}, {});
const searches = sloList.flatMap((slo) => [
{ index: `${SLO_DESTINATION_INDEX_NAME}*` },
{ index: SLO_DESTINATION_INDEX_PATTERN },
generateSearchQuery(slo, dateRangeBySlo[slo.id]),
]);
@ -45,12 +52,31 @@ export class DefaultSummaryClient implements SummaryClient {
const good = aggregations?.good?.value ?? 0;
const total = aggregations?.total?.value ?? 0;
const sliValue = computeSLI({ good, total });
const errorBudget = computeErrorBudget(slo, {
dateRange: dateRangeBySlo[slo.id],
good,
total,
});
const sliValue = computeSLI(good, total);
const initialErrorBudget = 1 - slo.objective.target;
let errorBudget;
if (
calendarAlignedTimeWindowSchema.is(slo.timeWindow) &&
timeslicesBudgetingMethodSchema.is(slo.budgetingMethod)
) {
const totalSlices = computeTotalSlicesFromDateRange(
dateRangeBySlo[slo.id],
slo.objective.timesliceWindow!
);
const consumedErrorBudget =
sliValue < 0 ? 0 : (total - good) / (totalSlices * initialErrorBudget);
errorBudget = toErrorBudget(initialErrorBudget, consumedErrorBudget);
} else {
const consumedErrorBudget = sliValue < 0 ? 0 : (1 - sliValue) / initialErrorBudget;
errorBudget = toErrorBudget(
initialErrorBudget,
consumedErrorBudget,
calendarAlignedTimeWindowSchema.is(slo.timeWindow)
);
}
summaryBySlo[slo.id] = {
sliValue,
errorBudget,
@ -62,6 +88,14 @@ export class DefaultSummaryClient implements SummaryClient {
}
}
function computeTotalSlicesFromDateRange(dateRange: DateRange, timesliceWindow: Duration) {
const dateRangeDurationInUnit = moment(dateRange.to).diff(
dateRange.from,
toMomentUnitOfTime(timesliceWindow.unit)
);
return Math.ceil(dateRangeDurationInUnit / timesliceWindow!.value);
}
function generateSearchQuery(slo: SLO, dateRange: DateRange): MsearchMultisearchBody {
return {
size: 0,