feat(slo): Introduce burn rate calculation service (#144823)

This commit is contained in:
Kevin Delemme 2022-11-10 12:14:57 -05:00 committed by GitHub
parent f82bb68072
commit 38251947e3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 552 additions and 72 deletions

View file

@ -0,0 +1,63 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { computeBurnRate } from './compute_burn_rate';
import { toDateRange } from './date_range';
import { createSLO } from '../../services/slo/fixtures/slo';
import { sixHoursRolling } from '../../services/slo/fixtures/time_window';
describe('computeBurnRate', () => {
it('computes 0 when total is 0', () => {
expect(
computeBurnRate(createSLO(), {
good: 10,
total: 0,
date_range: toDateRange(sixHoursRolling()),
})
).toEqual(0);
});
it('computes 0 when good is greater than total', () => {
expect(
computeBurnRate(createSLO(), {
good: 9999,
total: 1,
date_range: toDateRange(sixHoursRolling()),
})
).toEqual(0);
});
it('computes the burn rate as 1x the error budget', () => {
expect(
computeBurnRate(createSLO({ objective: { target: 0.9 } }), {
good: 90,
total: 100,
date_range: toDateRange(sixHoursRolling()),
})
).toEqual(1);
});
it('computes the burn rate as 10x the error budget', () => {
expect(
computeBurnRate(createSLO({ objective: { target: 0.99 } }), {
good: 90,
total: 100,
date_range: toDateRange(sixHoursRolling()),
})
).toEqual(10);
});
it('computes the burn rate as 0.5x the error budget', () => {
expect(
computeBurnRate(createSLO({ objective: { target: 0.8 } }), {
good: 90,
total: 100,
date_range: toDateRange(sixHoursRolling()),
})
).toEqual(0.5);
});
});

View file

@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { IndicatorData, SLO } from '../../types/models';
import { toHighPrecision } from '../../utils/number';
/**
* A Burn Rate is computed with the Indicator Data retrieved from a specific lookback period
* It tells how fast we are consumming our error budget during a specific period
*/
export function computeBurnRate(slo: SLO, sliData: IndicatorData): number {
const { good, total } = sliData;
if (total === 0 || good >= total) {
return 0;
}
const errorBudget = 1 - slo.objective.target;
const errorRate = 1 - good / total;
return toHighPrecision(errorRate / errorBudget);
}

View file

@ -7,5 +7,6 @@
export * from './compute_error_budget';
export * from './compute_sli';
export * from './compute_burn_rate';
export * from './date_range';
export * from './validate_slo';

View file

@ -16,8 +16,8 @@ describe('validateSLO', () => {
expect(() => validateSLO(slo)).toThrowError('Invalid objective.target');
});
it("throws when 'objective.target' is gt 1", () => {
const slo = createSLO({ objective: { target: 1.0001 } });
it("throws when 'objective.target' is gte 1", () => {
const slo = createSLO({ objective: { target: 1 } });
expect(() => validateSLO(slo)).toThrowError('Invalid objective.target');
});

View file

@ -44,7 +44,7 @@ export function validateSLO(slo: SLO) {
}
function isValidTargetNumber(value: number): boolean {
return value > 0 && value <= 1;
return value > 0 && value < 1;
}
function isValidTimeWindowDuration(duration: Duration): boolean {

View file

@ -15,6 +15,10 @@ export function oneWeek(): Duration {
return new Duration(1, DurationUnit.w);
}
export function sixHours(): Duration {
return new Duration(6, DurationUnit.h);
}
export function oneMinute(): Duration {
return new Duration(1, DurationUnit.m);
}

View file

@ -6,7 +6,14 @@
*/
import { TimeWindow } from '../../../types/models/time_window';
import { oneWeek, sevenDays } from './duration';
import { oneWeek, sevenDays, sixHours } from './duration';
export function sixHoursRolling(): TimeWindow {
return {
duration: sixHours(),
is_rolling: true,
};
}
export function sevenDaysRolling(): TimeWindow {
return {

View file

@ -37,6 +37,7 @@ const createSLORepositoryMock = (): jest.Mocked<SLORepository> => {
const createSLIClientMock = (): jest.Mocked<SLIClient> => {
return {
fetchCurrentSLIData: jest.fn(),
fetchSLIDataFrom: jest.fn(),
};
};

View file

@ -15,6 +15,20 @@ import { Duration, DurationUnit } from '../../types/models';
import { createSLO } from './fixtures/slo';
import { DefaultSLIClient } from './sli_client';
const commonEsResponse = {
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
};
describe('SLIClient', () => {
let esClientMock: ElasticsearchClientMock;
@ -32,17 +46,7 @@ describe('SLIClient', () => {
},
});
esClientMock.search.mockResolvedValueOnce({
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
...commonEsResponse,
aggregations: {},
});
const sliClient = new DefaultSLIClient(esClientMock);
@ -61,17 +65,7 @@ describe('SLIClient', () => {
},
});
esClientMock.search.mockResolvedValueOnce({
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
...commonEsResponse,
aggregations: {
good: { value: 90 },
total: { value: 100 },
@ -93,6 +87,7 @@ describe('SLIClient', () => {
expect(esClientMock.search).toHaveBeenCalledWith(
expect.objectContaining({
index: `${SLO_DESTINATION_INDEX_NAME}*`,
size: 0,
query: {
bool: {
filter: [
@ -126,17 +121,7 @@ describe('SLIClient', () => {
},
});
esClientMock.search.mockResolvedValueOnce({
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
...commonEsResponse,
aggregations: {
good: { value: 90 },
total: { value: 100 },
@ -192,17 +177,7 @@ describe('SLIClient', () => {
});
esClientMock.search.mockResolvedValueOnce({
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
...commonEsResponse,
aggregations: {},
});
const sliClient = new DefaultSLIClient(esClientMock);
@ -229,17 +204,7 @@ describe('SLIClient', () => {
},
});
esClientMock.search.mockResolvedValueOnce({
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
...commonEsResponse,
aggregations: {
slices: { buckets: [] },
good: { value: 90 },
@ -338,17 +303,7 @@ describe('SLIClient', () => {
},
});
esClientMock.search.mockResolvedValueOnce({
took: 100,
timed_out: false,
_shards: {
total: 0,
successful: 0,
skipped: 0,
failed: 0,
},
hits: {
hits: [],
},
...commonEsResponse,
aggregations: {
good: { value: 90 },
total: { value: 100 },
@ -429,6 +384,265 @@ describe('SLIClient', () => {
});
});
});
describe('fetchSLIDataFrom', () => {
const LONG_WINDOW = 'long_window';
const SHORT_WINDOW = 'short_window';
describe('for SLO defined with occurrences budgeting method', () => {
it('calls ES with the lookback windows aggregations', async () => {
const slo = createSLO({ budgeting_method: 'occurrences' });
const lookbackWindows = [
{ name: LONG_WINDOW, duration: new Duration(1, DurationUnit.h) },
{ name: SHORT_WINDOW, duration: new Duration(5, DurationUnit.m) },
];
esClientMock.search.mockResolvedValueOnce({
...commonEsResponse,
aggregations: {
[LONG_WINDOW]: {
buckets: [
{
key: '2022-11-08T13:53:00.000Z-2022-11-08T14:53:00.000Z',
from: 1667915580000,
from_as_string: '2022-11-08T13:53:00.000Z',
to: 1667919180000,
to_as_string: '2022-11-08T14:53:00.000Z',
doc_count: 60,
total: {
value: 32169,
},
good: {
value: 15748,
},
},
],
},
[SHORT_WINDOW]: {
buckets: [
{
key: '2022-11-08T14:48:00.000Z-2022-11-08T14:53:00.000Z',
from: 1667918880000,
from_as_string: '2022-11-08T14:48:00.000Z',
to: 1667919180000,
to_as_string: '2022-11-08T14:53:00.000Z',
doc_count: 5,
total: {
value: 2211,
},
good: {
value: 772,
},
},
],
},
},
});
const sliClient = new DefaultSLIClient(esClientMock);
const result = await sliClient.fetchSLIDataFrom(slo, lookbackWindows);
expect(esClientMock.search.mock.lastCall[0]).toMatchObject({
aggs: {
[LONG_WINDOW]: {
date_range: {
field: '@timestamp',
ranges: [{ from: 'now-1h/m', to: 'now/m' }],
},
aggs: {
good: { sum: { field: 'slo.numerator' } },
total: { sum: { field: 'slo.denominator' } },
},
},
[SHORT_WINDOW]: {
date_range: {
field: '@timestamp',
ranges: [{ from: 'now-5m/m', to: 'now/m' }],
},
aggs: {
good: { sum: { field: 'slo.numerator' } },
total: { sum: { field: 'slo.denominator' } },
},
},
},
});
expect(result[LONG_WINDOW]).toMatchObject({ good: 15748, total: 32169 });
expect(result[SHORT_WINDOW]).toMatchObject({ good: 772, total: 2211 });
});
});
describe('for SLO defined with timeslices budgeting method', () => {
it('calls ES with the lookback windows aggregations', async () => {
const slo = createSLO({
budgeting_method: 'timeslices',
objective: {
target: 0.95,
timeslice_target: 0.9,
timeslice_window: new Duration(10, DurationUnit.m),
},
});
const lookbackWindows = [
{ name: LONG_WINDOW, duration: new Duration(1, DurationUnit.h) },
{ name: SHORT_WINDOW, duration: new Duration(5, DurationUnit.m) },
];
esClientMock.search.mockResolvedValueOnce({
...commonEsResponse,
aggregations: {
[LONG_WINDOW]: {
buckets: [
{
key: '2022-11-08T13:53:00.000Z-2022-11-08T14:53:00.000Z',
from: 1667915580000,
from_as_string: '2022-11-08T13:53:00.000Z',
to: 1667919180000,
to_as_string: '2022-11-08T14:53:00.000Z',
doc_count: 60,
total: {
value: 32169,
},
good: {
value: 15748,
},
},
],
},
[SHORT_WINDOW]: {
buckets: [
{
key: '2022-11-08T14:48:00.000Z-2022-11-08T14:53:00.000Z',
from: 1667918880000,
from_as_string: '2022-11-08T14:48:00.000Z',
to: 1667919180000,
to_as_string: '2022-11-08T14:53:00.000Z',
doc_count: 5,
total: {
value: 2211,
},
good: {
value: 772,
},
},
],
},
},
});
const sliClient = new DefaultSLIClient(esClientMock);
const result = await sliClient.fetchSLIDataFrom(slo, lookbackWindows);
expect(esClientMock.search.mock.lastCall[0]).toMatchObject({
aggs: {
[LONG_WINDOW]: {
date_range: {
field: '@timestamp',
ranges: [{ from: 'now-1h/m', to: 'now/m' }],
},
aggs: {
slices: {
date_histogram: {
field: '@timestamp',
fixed_interval: '10m',
},
aggs: {
good: {
sum: {
field: 'slo.numerator',
},
},
total: {
sum: {
field: 'slo.denominator',
},
},
good_slice: {
bucket_script: {
buckets_path: {
good: 'good',
total: 'total',
},
script: 'params.good / params.total >= 0.9 ? 1 : 0',
},
},
count_slice: {
bucket_script: {
buckets_path: {},
script: '1',
},
},
},
},
good: {
sum_bucket: {
buckets_path: 'slices>good_slice.value',
},
},
total: {
sum_bucket: {
buckets_path: 'slices>count_slice.value',
},
},
},
},
[SHORT_WINDOW]: {
date_range: {
field: '@timestamp',
ranges: [{ from: 'now-5m/m', to: 'now/m' }],
},
aggs: {
slices: {
date_histogram: {
field: '@timestamp',
fixed_interval: '10m',
},
aggs: {
good: {
sum: {
field: 'slo.numerator',
},
},
total: {
sum: {
field: 'slo.denominator',
},
},
good_slice: {
bucket_script: {
buckets_path: {
good: 'good',
total: 'total',
},
script: 'params.good / params.total >= 0.9 ? 1 : 0',
},
},
count_slice: {
bucket_script: {
buckets_path: {},
script: '1',
},
},
},
},
good: {
sum_bucket: {
buckets_path: 'slices>good_slice.value',
},
},
total: {
sum_bucket: {
buckets_path: 'slices>count_slice.value',
},
},
},
},
},
});
expect(result[LONG_WINDOW]).toMatchObject({ good: 15748, total: 32169 });
expect(result[SHORT_WINDOW]).toMatchObject({ good: 772, total: 2211 });
});
});
});
});
expect.extend({

View file

@ -5,13 +5,18 @@
* 2.0.
*/
import { AggregationsSumAggregate } from '@elastic/elasticsearch/lib/api/types';
import {
AggregationsAggregationContainer,
AggregationsDateRangeAggregate,
AggregationsSumAggregate,
} from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient } from '@kbn/core/server';
import { assertNever } from '@kbn/std';
import { SLO_DESTINATION_INDEX_NAME } from '../../assets/constants';
import { toDateRange } from '../../domain/services/date_range';
import { InternalQueryError } from '../../errors';
import { DateRange, Duration, IndicatorData, SLO } from '../../types/models';
import {
occurencesBudgetingMethodSchema,
timeslicesBudgetingMethodSchema,
@ -19,9 +24,21 @@ import {
export interface SLIClient {
fetchCurrentSLIData(slo: SLO): Promise<IndicatorData>;
fetchSLIDataFrom(
slo: SLO,
lookbackWindows: LookbackWindow[]
): Promise<Record<WindowName, IndicatorData>>;
}
type WindowName = string;
interface LookbackWindow {
name: WindowName;
duration: Duration;
}
type AggKey = 'good' | 'total';
type EsAggregations = Record<WindowName, AggregationsDateRangeAggregate>;
export class DefaultSLIClient implements SLIClient {
constructor(private esClient: ElasticsearchClient) {}
@ -87,6 +104,40 @@ export class DefaultSLIClient implements SLIClient {
assertNever(slo.budgeting_method);
}
async fetchSLIDataFrom(
slo: SLO,
lookbackWindows: LookbackWindow[]
): Promise<Record<WindowName, IndicatorData>> {
const sortedLookbackWindows = [...lookbackWindows].sort((a, b) =>
a.duration.isShorterThan(b.duration) ? 1 : -1
);
const longestLookbackWindow = sortedLookbackWindows[0];
const longestDateRange = toDateRange({
duration: longestLookbackWindow.duration,
is_rolling: true,
});
if (occurencesBudgetingMethodSchema.is(slo.budgeting_method)) {
const result = await this.esClient.search<unknown, EsAggregations>({
...commonQuery(slo, longestDateRange),
aggs: toLookbackWindowsAggregationsQuery(sortedLookbackWindows),
});
return handleWindowedResult(result.aggregations, lookbackWindows);
}
if (timeslicesBudgetingMethodSchema.is(slo.budgeting_method)) {
const result = await this.esClient.search<unknown, EsAggregations>({
...commonQuery(slo, longestDateRange),
aggs: toLookbackWindowsSlicedAggregationsQuery(slo, sortedLookbackWindows),
});
return handleWindowedResult(result.aggregations, lookbackWindows);
}
assertNever(slo.budgeting_method);
}
}
function commonQuery(slo: SLO, dateRange: DateRange) {
@ -126,8 +177,123 @@ function handleResult(
};
}
function toLookbackWindowsAggregationsQuery(sortedLookbackWindow: LookbackWindow[]) {
return sortedLookbackWindow.reduce<Record<string, AggregationsAggregationContainer>>(
(acc, lookbackWindow) => ({
...acc,
[lookbackWindow.name]: {
date_range: {
field: '@timestamp',
ranges: [{ from: `now-${lookbackWindow.duration.format()}/m`, to: 'now/m' }],
},
aggs: {
good: { sum: { field: 'slo.numerator' } },
total: { sum: { field: 'slo.denominator' } },
},
},
}),
{}
);
}
function toLookbackWindowsSlicedAggregationsQuery(slo: SLO, lookbackWindows: LookbackWindow[]) {
return lookbackWindows.reduce<Record<string, AggregationsAggregationContainer>>(
(acc, lookbackWindow) => ({
...acc,
[lookbackWindow.name]: {
date_range: {
field: '@timestamp',
ranges: [
{
from: `now-${lookbackWindow.duration.format()}/m`,
to: 'now/m',
},
],
},
aggs: {
slices: {
date_histogram: {
field: '@timestamp',
fixed_interval: toInterval(slo.objective.timeslice_window),
},
aggs: {
good: {
sum: {
field: 'slo.numerator',
},
},
total: {
sum: {
field: 'slo.denominator',
},
},
good_slice: {
bucket_script: {
buckets_path: {
good: 'good',
total: 'total',
},
script: `params.good / params.total >= ${slo.objective.timeslice_target} ? 1 : 0`,
},
},
count_slice: {
bucket_script: {
buckets_path: {},
script: '1',
},
},
},
},
good: {
sum_bucket: {
buckets_path: 'slices>good_slice.value',
},
},
total: {
sum_bucket: {
buckets_path: 'slices>count_slice.value',
},
},
},
},
}),
{}
);
}
function handleWindowedResult(
aggregations: Record<WindowName, AggregationsDateRangeAggregate> | undefined,
lookbackWindows: LookbackWindow[]
): Record<WindowName, IndicatorData> {
if (aggregations === undefined) {
throw new InternalQueryError('Invalid aggregation response');
}
const indicatorDataPerLookbackWindow: Record<WindowName, IndicatorData> = {};
lookbackWindows.forEach((lookbackWindow) => {
const windowAggBuckets = aggregations[lookbackWindow.name]?.buckets;
if (!Array.isArray(windowAggBuckets) || windowAggBuckets.length === 0) {
throw new InternalQueryError('Invalid aggregation bucket response');
}
const bucket = windowAggBuckets[0];
const good = (bucket.good as AggregationsSumAggregate).value;
const total = (bucket.total as AggregationsSumAggregate).value;
if (good === null || total === null) {
throw new InternalQueryError('Invalid aggregation sum bucket response');
}
indicatorDataPerLookbackWindow[lookbackWindow.name] = {
good,
total,
date_range: { from: new Date(bucket.from_as_string!), to: new Date(bucket.to_as_string!) },
};
});
return indicatorDataPerLookbackWindow;
}
function toInterval(duration: Duration | undefined): string {
if (duration === undefined) return '1m';
return `${duration.value}${duration.unit}`;
return duration.format();
}