Adds support for event loop utilization to the core metrics service (#153717)

This commit is contained in:
Luke Elmers 2023-03-29 15:35:55 -06:00 committed by GitHub
parent 0d6ad6821e
commit 9517d067b5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 308 additions and 70 deletions

View file

@ -78,6 +78,11 @@ const mockedResponse: StatusResponse = {
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),
event_loop_utilization: {
active: 1,
idle: 1,
utilization: 1,
},
uptime_in_millis: 1,
},
processes: [
@ -93,6 +98,11 @@ const mockedResponse: StatusResponse = {
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),
event_loop_utilization: {
active: 1,
idle: 1,
utilization: 1,
},
uptime_in_millis: 1,
},
],
@ -232,15 +242,15 @@ describe('response processing', () => {
const data = await loadStatus({ http, notifications });
const names = data.metrics.map((m) => m.name);
expect(names).toEqual([
'Heap total',
'Heap used',
'Heap used out of 976.56 KB',
'Requests per second',
'Utilization (active: 1.00 / idle: 1.00)',
'Load',
'Delay',
'Response time avg',
]);
const values = data.metrics.map((m) => m.value);
expect(values).toEqual([1000000, 100, 400, [4.1, 2.1, 0.1], 1, 4000]);
expect(values).toEqual([100, 400, 1, [4.1, 2.1, 0.1], 1, 4000]);
});
test('adds meta details to Load, Delay and Response time', async () => {

View file

@ -6,6 +6,7 @@
* Side Public License, v 1.
*/
import numeral from '@elastic/numeral';
import { i18n } from '@kbn/i18n';
import type { HttpSetup } from '@kbn/core-http-browser';
import type { NotificationsSetup } from '@kbn/core-notifications-browser';
@ -57,16 +58,10 @@ function formatMetrics({ metrics }: StatusResponse): Metric[] {
}
return [
{
name: i18n.translate('core.statusPage.metricsTiles.columns.heapTotalHeader', {
defaultMessage: 'Heap total',
}),
value: metrics.process.memory.heap.size_limit,
type: 'byte',
},
{
name: i18n.translate('core.statusPage.metricsTiles.columns.heapUsedHeader', {
defaultMessage: 'Heap used',
defaultMessage: 'Heap used out of {heapTotal}',
values: { heapTotal: numeral(metrics.process.memory.heap.size_limit).format('0.00 b') },
}),
value: metrics.process.memory.heap.used_in_bytes,
type: 'byte',
@ -78,6 +73,17 @@ function formatMetrics({ metrics }: StatusResponse): Metric[] {
value: (metrics.requests.total * 1000) / metrics.collection_interval_in_millis,
type: 'float',
},
{
name: i18n.translate('core.statusPage.metricsTiles.columns.utilizationHeader', {
defaultMessage: 'Utilization (active: {active} / idle: {idle})',
values: {
active: numeral(metrics.process.event_loop_utilization.active).format('0.00'),
idle: numeral(metrics.process.event_loop_utilization.idle).format('0.00'),
},
}),
value: metrics.process.event_loop_utilization.utilization,
type: 'float',
},
{
name: i18n.translate('core.statusPage.metricsTiles.columns.loadHeader', {
defaultMessage: 'Load',

View file

@ -0,0 +1,19 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
export const eventLoopUtilizationMock = jest.fn().mockImplementation(() => ({
active: 1,
idle: 1,
utilization: 1,
}));
jest.doMock('perf_hooks', () => ({
performance: {
eventLoopUtilization: eventLoopUtilizationMock,
},
}));

View file

@ -0,0 +1,79 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { eventLoopUtilizationMock } from './event_loop_utilization_monitor.test.mocks';
import { EventLoopUtilizationMonitor } from './event_loop_utilization_monitor';
describe('EventLoopUtilizationMonitor', () => {
afterEach(() => jest.clearAllMocks());
describe('#constructor', () => {
test('#constructor collects utilization', () => {
new EventLoopUtilizationMonitor();
expect(eventLoopUtilizationMock).toHaveBeenCalledTimes(1);
});
});
describe('#reset', () => {
test('collects utilization', () => {
const monitor = new EventLoopUtilizationMonitor();
monitor.reset();
expect(eventLoopUtilizationMock).toHaveBeenCalledTimes(2);
});
});
describe('#collect', () => {
test('collects utilization', () => {
const monitor = new EventLoopUtilizationMonitor();
monitor.collect();
expect(eventLoopUtilizationMock).toHaveBeenCalledTimes(2);
});
test('returns values from call to performance.eventLoopUtilization', () => {
const monitor = new EventLoopUtilizationMonitor();
expect(monitor.collect()).toMatchInlineSnapshot(`
Object {
"active": 1,
"idle": 1,
"utilization": 1,
}
`);
});
test('passes last ELU value from constructor to calculate diff', () => {
const mockInitialData = {
active: 0,
idle: 0,
utilization: 0,
};
eventLoopUtilizationMock.mockImplementationOnce(() => mockInitialData);
const monitor = new EventLoopUtilizationMonitor();
monitor.collect();
expect(eventLoopUtilizationMock).toHaveBeenCalledTimes(2);
expect(eventLoopUtilizationMock.mock.calls[1][0]).toEqual(mockInitialData);
});
test('passes last ELU value from reset to calculate diff', () => {
const monitor = new EventLoopUtilizationMonitor();
const mockInitialData = {
active: 0,
idle: 0,
utilization: 0,
};
eventLoopUtilizationMock.mockImplementationOnce(() => mockInitialData);
monitor.reset();
monitor.collect();
expect(eventLoopUtilizationMock).toHaveBeenCalledTimes(3);
expect(eventLoopUtilizationMock.mock.calls[2][0]).toEqual(mockInitialData);
});
});
});

View file

@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import type { EventLoopUtilization } from 'perf_hooks';
import { performance } from 'perf_hooks';
export class EventLoopUtilizationMonitor {
private elu: EventLoopUtilization;
/**
* Creating a new instance of EventLoopUtilizationMonitor will capture the
* current ELU to use as a point of comparison against the first call to
* `collect`.
*/
constructor() {
this.elu = performance.eventLoopUtilization();
}
/**
* Get ELU between now and last time the ELU was reset.
*/
public collect(): EventLoopUtilization {
const { active, idle, utilization } = performance.eventLoopUtilization(this.elu);
return {
active,
idle,
utilization,
};
}
/**
* Resets the ELU to now. Will be used to calculate the diff on the next call to `collect`.
*/
public reset() {
this.elu = performance.eventLoopUtilization();
}
}

View file

@ -40,6 +40,11 @@ function createMockOpsProcessMetrics(): OpsProcessMetrics {
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,
event_loop_utilization: {
active: 1,
idle: 1,
utilization: 1,
},
pid: 1,
uptime_in_millis: 1,
};

View file

@ -0,0 +1,19 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
import { collectorMock } from './mocks_internal';
export const mockEventLoopDelayMonitor = collectorMock.create();
jest.doMock('./event_loop_delays_monitor', () => ({
EventLoopDelaysMonitor: jest.fn().mockImplementation(() => mockEventLoopDelayMonitor),
}));
export const mockEventLoopUtilizationMonitor = collectorMock.create();
jest.doMock('./event_loop_utilization_monitor', () => ({
EventLoopUtilizationMonitor: jest.fn().mockImplementation(() => mockEventLoopUtilizationMonitor),
}));

View file

@ -7,9 +7,9 @@
*/
import v8, { HeapInfo } from 'v8';
import { mockEventLoopDelayMonitor, mockEventLoopUtilizationMonitor } from './process.test.mocks';
import { ProcessMetricsCollector } from './process';
/* eslint-disable dot-notation */
describe('ProcessMetricsCollector', () => {
let collector: ProcessMetricsCollector;
@ -18,62 +18,83 @@ describe('ProcessMetricsCollector', () => {
});
afterEach(() => {
jest.restoreAllMocks();
jest.clearAllMocks();
});
it('collects pid from the process', () => {
const metrics = collector.collect();
describe('#collect', () => {
it('collects pid from the process', () => {
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].pid).toEqual(process.pid);
expect(metrics).toHaveLength(1);
expect(metrics[0].pid).toEqual(process.pid);
});
it('collects event loop delay', () => {
mockEventLoopDelayMonitor.collect.mockReturnValueOnce({ mean: 13 });
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].event_loop_delay).toBe(13);
expect(mockEventLoopDelayMonitor.collect).toBeCalledTimes(1);
});
it('collects event loop utilization', () => {
const mockData = { active: 1, idle: 1, utilization: 1 };
mockEventLoopUtilizationMonitor.collect.mockReturnValueOnce(mockData);
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].event_loop_utilization).toEqual(mockData);
expect(mockEventLoopUtilizationMonitor.collect).toBeCalledTimes(1);
});
it('collects uptime info from the process', () => {
const uptime = 58986;
jest.spyOn(process, 'uptime').mockImplementation(() => uptime);
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].uptime_in_millis).toEqual(uptime * 1000);
});
it('collects memory info from the process', () => {
const heapTotal = 58986;
const heapUsed = 4688;
const heapSizeLimit = 5788;
const rss = 5865;
jest.spyOn(process, 'memoryUsage').mockImplementation(() => ({
rss,
heapTotal,
heapUsed,
external: 0,
arrayBuffers: 0,
}));
jest.spyOn(v8, 'getHeapStatistics').mockImplementation(
() =>
({
heap_size_limit: heapSizeLimit,
} as HeapInfo)
);
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].memory.heap.total_in_bytes).toEqual(heapTotal);
expect(metrics[0].memory.heap.used_in_bytes).toEqual(heapUsed);
expect(metrics[0].memory.heap.size_limit).toEqual(heapSizeLimit);
expect(metrics[0].memory.resident_set_size_in_bytes).toEqual(rss);
});
});
it('collects event loop delay', () => {
const mockEventLoopDelayMonitor = { collect: jest.fn().mockReturnValue({ mean: 13 }) };
// @ts-expect-error-next-line readonly private method.
collector['eventLoopDelayMonitor'] = mockEventLoopDelayMonitor;
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].event_loop_delay).toBe(13);
expect(mockEventLoopDelayMonitor.collect).toBeCalledTimes(1);
});
describe('#reset', () => {
it('resets event loop delay', () => {
collector.reset();
expect(mockEventLoopDelayMonitor.reset).toBeCalledTimes(1);
});
it('collects uptime info from the process', () => {
const uptime = 58986;
jest.spyOn(process, 'uptime').mockImplementation(() => uptime);
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].uptime_in_millis).toEqual(uptime * 1000);
});
it('collects memory info from the process', () => {
const heapTotal = 58986;
const heapUsed = 4688;
const heapSizeLimit = 5788;
const rss = 5865;
jest.spyOn(process, 'memoryUsage').mockImplementation(() => ({
rss,
heapTotal,
heapUsed,
external: 0,
arrayBuffers: 0,
}));
jest.spyOn(v8, 'getHeapStatistics').mockImplementation(
() =>
({
heap_size_limit: heapSizeLimit,
} as HeapInfo)
);
const metrics = collector.collect();
expect(metrics).toHaveLength(1);
expect(metrics[0].memory.heap.total_in_bytes).toEqual(heapTotal);
expect(metrics[0].memory.heap.used_in_bytes).toEqual(heapUsed);
expect(metrics[0].memory.heap.size_limit).toEqual(heapSizeLimit);
expect(metrics[0].memory.resident_set_size_in_bytes).toEqual(rss);
it('resets event loop utilization', () => {
collector.reset();
expect(mockEventLoopUtilizationMonitor.reset).toBeCalledTimes(1);
});
});
});

View file

@ -9,6 +9,7 @@
import v8 from 'v8';
import type { OpsProcessMetrics, MetricsCollector } from '@kbn/core-metrics-server';
import { EventLoopDelaysMonitor } from './event_loop_delays_monitor';
import { EventLoopUtilizationMonitor } from './event_loop_utilization_monitor';
export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetrics[]> {
static getMainThreadMetrics(processes: OpsProcessMetrics[]): undefined | OpsProcessMetrics {
@ -21,9 +22,11 @@ export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetri
}
private readonly eventLoopDelayMonitor = new EventLoopDelaysMonitor();
private readonly eventLoopUtilizationMonitor = new EventLoopUtilizationMonitor();
private getCurrentPidMetrics(): OpsProcessMetrics {
const eventLoopDelayHistogram = this.eventLoopDelayMonitor.collect();
const eventLoopUtilization = this.eventLoopUtilizationMonitor.collect();
const heapStats = v8.getHeapStatistics();
const memoryUsage = process.memoryUsage();
@ -39,6 +42,7 @@ export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetri
pid: process.pid,
event_loop_delay: eventLoopDelayHistogram.mean,
event_loop_delay_histogram: eventLoopDelayHistogram,
event_loop_utilization: eventLoopUtilization,
uptime_in_millis: process.uptime() * 1000,
};
}
@ -49,5 +53,6 @@ export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetri
public reset() {
this.eventLoopDelayMonitor.reset();
this.eventLoopUtilizationMonitor.reset();
}
}

View file

@ -18,6 +18,11 @@ export function createMockOpsProcessMetrics(): OpsProcessMetrics {
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,
event_loop_utilization: {
active: 1,
idle: 1,
utilization: 1,
},
pid: 1,
uptime_in_millis: 1,
};

View file

@ -51,6 +51,11 @@ const testMetrics = {
uptime_in_millis: 1500,
event_loop_delay: 50,
event_loop_delay_histogram: { percentiles: { '50': 50, '75': 75, '95': 95, '99': 99 } },
event_loop_utilization: {
active: 629.1224170000005,
idle: 359.23554199999995,
utilization: 0.6365329598160299,
},
},
os: {
load: {
@ -65,7 +70,7 @@ describe('getEcsOpsMetricsLog', () => {
it('provides correctly formatted message', () => {
const result = getEcsOpsMetricsLog(createMockOpsMetrics(testMetrics));
expect(result.message).toMatchInlineSnapshot(
`"memory: 100.0B uptime: 0:00:01 load: [10.00,20.00,30.00] mean delay: 50.000 delay histogram: { 50: 50.000; 95: 95.000; 99: 99.000 }"`
`"memory: 100.0B uptime: 0:00:01 load: [10.00,20.00,30.00] mean delay: 50.000 delay histogram: { 50: 50.000; 95: 95.000; 99: 99.000 } utilization: 0.63653"`
);
});
@ -116,6 +121,11 @@ describe('getEcsOpsMetricsLog', () => {
"95": 95,
"99": 99,
},
"eventLoopUtilization": Object {
"active": 629.1224170000005,
"idle": 359.23554199999995,
"utilization": 0.6365329598160299,
},
"memory": Object {
"heap": Object {
"usedInBytes": 100,

View file

@ -55,6 +55,11 @@ export function getEcsOpsMetricsLog(metrics: OpsMetrics) {
).format('0.000')} }`
: '';
const eventLoopUtilizationVal = process?.event_loop_utilization;
const eventLoopUtilizationMsg = eventLoopUtilizationVal
? ` utilization: ${numeral(process?.event_loop_utilization.utilization).format('0.00000')}`
: '';
const loadEntries = {
'1m': os?.load ? os?.load['1m'] : undefined,
'5m': os?.load ? os?.load['5m'] : undefined,
@ -85,6 +90,7 @@ export function getEcsOpsMetricsLog(metrics: OpsMetrics) {
},
eventLoopDelay: eventLoopDelayVal,
eventLoopDelayHistogram: eventLoopDelayHistVals,
eventLoopUtilization: eventLoopUtilizationVal,
},
host: {
os: {
@ -101,6 +107,7 @@ export function getEcsOpsMetricsLog(metrics: OpsMetrics) {
loadValsMsg,
eventLoopDelayValMsg,
eventLoopDelayHistMsg,
eventLoopUtilizationMsg,
].join(''),
meta,
};

View file

@ -214,6 +214,7 @@ describe('MetricsService', () => {
"process": Object {
"eventLoopDelay": undefined,
"eventLoopDelayHistogram": undefined,
"eventLoopUtilization": undefined,
"memory": Object {
"heap": Object {
"usedInBytes": undefined,

View file

@ -6,6 +6,8 @@
* Side Public License, v 1.
*/
import type { EventLoopUtilization } from 'perf_hooks';
/**
* an IntervalHistogram object that samples and reports the event loop delay over time.
* The delays will be reported in milliseconds.
@ -85,6 +87,8 @@ export interface OpsProcessMetrics {
event_loop_delay: number;
/** node event loop delay histogram since last collection */
event_loop_delay_histogram: IntervalHistogram;
/** node event loop utilization since last collection */
event_loop_utilization: EventLoopUtilization;
/** uptime of the kibana process */
uptime_in_millis: number;
}

View file

@ -26,6 +26,11 @@ Object {
"process": Object {
"event_loop_delay": 1,
"event_loop_delay_histogram": Any<Object>,
"event_loop_utilization": Object {
"active": 1,
"idle": 1,
"utilization": 1,
},
"memory": Object {
"heap": Object {
"size_limit": 1,
@ -40,6 +45,11 @@ Object {
Object {
"event_loop_delay": 1,
"event_loop_delay_histogram": Any<Object>,
"event_loop_utilization": Object {
"active": 1,
"idle": 1,
"utilization": 1,
},
"memory": Object {
"heap": Object {
"size_limit": 1,

View file

@ -930,8 +930,6 @@
"core.status.yellowTitle": "Jaune",
"core.statusPage.coreStatus.sectionTitle": "Statut principal",
"core.statusPage.loadStatus.serverIsDownErrorMessage": "Échec de requête du statut du serveur. Votre serveur est peut-être indisponible ?",
"core.statusPage.metricsTiles.columns.heapTotalHeader": "Tas total",
"core.statusPage.metricsTiles.columns.heapUsedHeader": "Tas utilisé",
"core.statusPage.metricsTiles.columns.load.metaHeader": "Intervalle de charge",
"core.statusPage.metricsTiles.columns.loadHeader": "Charge",
"core.statusPage.metricsTiles.columns.processDelayDetailsHeader": "Centiles",

View file

@ -930,8 +930,6 @@
"core.status.yellowTitle": "黄",
"core.statusPage.coreStatus.sectionTitle": "コアステータス",
"core.statusPage.loadStatus.serverIsDownErrorMessage": "サーバーステータスのリクエストに失敗しました。サーバーがダウンしている可能性があります。",
"core.statusPage.metricsTiles.columns.heapTotalHeader": "ヒープ合計",
"core.statusPage.metricsTiles.columns.heapUsedHeader": "使用ヒープ",
"core.statusPage.metricsTiles.columns.load.metaHeader": "読み込み間隔",
"core.statusPage.metricsTiles.columns.loadHeader": "読み込み",
"core.statusPage.metricsTiles.columns.processDelayDetailsHeader": "パーセンタイル",

View file

@ -930,8 +930,6 @@
"core.status.yellowTitle": "黄色",
"core.statusPage.coreStatus.sectionTitle": "核心状态",
"core.statusPage.loadStatus.serverIsDownErrorMessage": "无法请求服务器状态。也许您的服务器已关闭?",
"core.statusPage.metricsTiles.columns.heapTotalHeader": "堆总数",
"core.statusPage.metricsTiles.columns.heapUsedHeader": "已使用堆数",
"core.statusPage.metricsTiles.columns.load.metaHeader": "加载时间间隔",
"core.statusPage.metricsTiles.columns.loadHeader": "加载",
"core.statusPage.metricsTiles.columns.processDelayDetailsHeader": "百分位数",