[8.11] Core metrics collection - collect more memory related metrics (#172146) (#172271)

# Backport

This will backport the following commits from `main` to `8.11`:
- [Core metrics collection - collect more memory related metrics
(#172146)](https://github.com/elastic/kibana/pull/172146)

<!--- Backport version: 8.9.7 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Pierre
Gayvallet","email":"pierre.gayvallet@elastic.co"},"sourceCommit":{"committedDate":"2023-11-30T10:47:46Z","message":"Core
metrics collection - collect more memory related metrics (#172146)\n\n##
Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/171060\r\n\r\nAdd additional
memory metrics for collection:\r\n\r\n**process**:\r\n-
`process.memory.array_buffers_in_bytes`
the\r\n`process.memoryUsage().arrayBuffers` value\r\n-
`process.memory.external_in_bytes`
the\r\n`process.memoryUsage().external` value\r\n\r\n**cgroup**: (v2
only)\r\n- `os.cgroupMemory.current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.current`\r\n-
`os.cgroupMemory.swap_current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.swap.current`","sha":"b323fc90a81dd404686c1858a3c525ae1cc922ae","branchLabelMapping":{"^v8.12.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["Team:Core","release_note:skip","Feature:Stack
Monitoring","backport:prev-minor","v8.12.0"],"number":172146,"url":"https://github.com/elastic/kibana/pull/172146","mergeCommit":{"message":"Core
metrics collection - collect more memory related metrics (#172146)\n\n##
Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/171060\r\n\r\nAdd additional
memory metrics for collection:\r\n\r\n**process**:\r\n-
`process.memory.array_buffers_in_bytes`
the\r\n`process.memoryUsage().arrayBuffers` value\r\n-
`process.memory.external_in_bytes`
the\r\n`process.memoryUsage().external` value\r\n\r\n**cgroup**: (v2
only)\r\n- `os.cgroupMemory.current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.current`\r\n-
`os.cgroupMemory.swap_current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.swap.current`","sha":"b323fc90a81dd404686c1858a3c525ae1cc922ae"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v8.12.0","labelRegex":"^v8.12.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/172146","number":172146,"mergeCommit":{"message":"Core
metrics collection - collect more memory related metrics (#172146)\n\n##
Summary\r\n\r\nPart of
https://github.com/elastic/kibana/issues/171060\r\n\r\nAdd additional
memory metrics for collection:\r\n\r\n**process**:\r\n-
`process.memory.array_buffers_in_bytes`
the\r\n`process.memoryUsage().arrayBuffers` value\r\n-
`process.memory.external_in_bytes`
the\r\n`process.memoryUsage().external` value\r\n\r\n**cgroup**: (v2
only)\r\n- `os.cgroupMemory.current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.current`\r\n-
`os.cgroupMemory.swap_current_in_bytes` value
from\r\n`/sys/fs/cgroup/{group}/memory.swap.current`","sha":"b323fc90a81dd404686c1858a3c525ae1cc922ae"}}]}]
BACKPORT-->

Co-authored-by: Pierre Gayvallet <pierre.gayvallet@elastic.co>
This commit is contained in:
Kibana Machine 2023-11-30 07:20:06 -05:00 committed by GitHub
parent 0e1d6924d8
commit 4131708383
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 176 additions and 47 deletions

View file

@ -76,6 +76,8 @@ const mockedResponse: StatusResponse = {
total_in_bytes: 0,
},
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),
@ -96,6 +98,8 @@ const mockedResponse: StatusResponse = {
total_in_bytes: 0,
},
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: mocked.createHistogram(),

View file

@ -93,10 +93,7 @@ describe('OsCgroupMetricsCollector', () => {
await collector.collect();
expect(gatherV2CgroupMetrics).toHaveBeenCalledTimes(1);
expect(gatherV2CgroupMetrics).toHaveBeenCalledWith({
cpuAcctPath: '/groupname',
cpuPath: '/groupname',
});
expect(gatherV2CgroupMetrics).toHaveBeenCalledWith('/groupname');
expect(gatherV1CgroupMetrics).toHaveBeenCalledTimes(0);
});

View file

@ -42,9 +42,13 @@ export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetric
return {};
}
const args = { cpuAcctPath: this.cpuAcctPath!, cpuPath: this.cpuPath! };
// "await" to handle any errors here.
return await (this.isCgroup2 ? gatherV2CgroupMetrics(args) : gatherV1CgroupMetrics(args));
return await (this.isCgroup2
? gatherV2CgroupMetrics(this.cpuAcctPath!)
: gatherV1CgroupMetrics({
cpuAcctPath: this.cpuAcctPath!,
cpuPath: this.cpuPath!,
}));
} catch (err) {
this.noCgroupPresent = true;
@ -67,10 +71,15 @@ export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetric
private async initializePaths(): Promise<void> {
if (this.hasPaths()) return;
const { data: cgroups, v2 } = await gatherInfo();
this.isCgroup2 = v2;
this.cpuPath = this.options.cpuPath || cgroups[GROUP_CPU];
this.cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT];
const result = await gatherInfo();
this.isCgroup2 = result.v2;
if (result.v2) {
this.cpuPath = result.path;
this.cpuAcctPath = result.path;
} else {
this.cpuPath = this.options.cpuPath || result.data[GROUP_CPU];
this.cpuAcctPath = this.options.cpuAcctPath || result.data[GROUP_CPUACCT];
}
// prevents undefined cgroup paths
this.noCgroupPresent = Boolean(!this.cpuPath || !this.cpuAcctPath);

View file

@ -17,11 +17,14 @@ describe('gatherInfo', () => {
'/proc/self/cgroup': `0:controller:/path
1:controller2,controller3:/otherpath`,
});
const { data } = await gatherInfo();
expect(data).toEqual({
controller: '/path',
controller2: '/otherpath',
controller3: '/otherpath',
const result = await gatherInfo();
expect(result).toEqual({
v2: false,
data: {
controller: '/path',
controller2: '/otherpath',
controller3: '/otherpath',
},
});
});
@ -30,7 +33,7 @@ describe('gatherInfo', () => {
'/proc/self/cgroup': `0:controller:/path
1:controller2,controller3:/otherpath`,
});
await expect(gatherInfo()).resolves.toMatchObject({ v2: false });
expect(await gatherInfo()).toMatchObject({ v2: false });
mockFs({
'/proc/self/cgroup': `
@ -38,7 +41,7 @@ describe('gatherInfo', () => {
`,
});
await expect(gatherInfo()).resolves.toMatchObject({ v2: true });
expect(await gatherInfo()).toMatchObject({ v2: true });
});
test('missing cgroup file', async () => {

View file

@ -7,8 +7,6 @@
*/
import fs from 'fs/promises';
import { GROUP_CPU, GROUP_CPUACCT } from './constants';
const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
const CONTROLLER_SEPARATOR_RE = ',';
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
@ -27,10 +25,15 @@ async function readProcSelf(): Promise<string[]> {
return data.split(/\n/).filter((line) => line.trim().length > 0);
}
interface Result {
data: Record<string, string>;
v2: boolean;
}
type Result =
| {
v2: true;
path: string;
}
| {
v2: false;
data: Record<string, string>;
};
export async function gatherInfo(): Promise<Result> {
const lines = await readProcSelf();
@ -39,11 +42,8 @@ export async function gatherInfo(): Promise<Result> {
// eslint-disable-next-line prettier/prettier
const [/* '0' */, /* '' */, path] = lines[0].trim().split(':');
return {
data: {
[GROUP_CPU]: path,
[GROUP_CPUACCT]: path,
},
v2: true,
path,
};
}

View file

@ -8,4 +8,4 @@
import type { OpsOsMetrics } from '@kbn/core-metrics-server';
export type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct'>;
export type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct' | 'cgroup_memory'>;

View file

@ -22,10 +22,17 @@ system_usec 125968
nr_periods 123
nr_throttled 1
throttled_usec 123123`,
'/sys/fs/cgroup/memory.current': '9000',
'/sys/fs/cgroup/memory.swap.current': '42',
});
expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/', cpuPath: '/' })).toMatchInlineSnapshot(`
const metrics = await gatherV2CgroupMetrics('/');
expect(metrics).toMatchInlineSnapshot(`
Object {
"cgroup_memory": Object {
"current_in_bytes": 9000,
"swap_current_in_bytes": 42,
},
"cpu": Object {
"cfs_period_micros": 100000,
"cfs_quota_micros": -1,
@ -54,11 +61,17 @@ system_usec 125968
nr_periods 123
nr_throttled 1
throttled_usec 123123`,
'/sys/fs/cgroup/mypath/memory.current': '9876',
'/sys/fs/cgroup/mypath/memory.swap.current': '132645',
});
expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/mypath', cpuPath: '/mypath' }))
.toMatchInlineSnapshot(`
const metrics = await gatherV2CgroupMetrics('/mypath');
expect(metrics).toMatchInlineSnapshot(`
Object {
"cgroup_memory": Object {
"current_in_bytes": 9876,
"swap_current_in_bytes": 132645,
},
"cpu": Object {
"cfs_period_micros": 100000,
"cfs_quota_micros": 111,

View file

@ -13,37 +13,60 @@ import type { OsCgroupMetrics } from './types';
const PROC_CGROUP2_DIR = '/sys/fs/cgroup';
const CPU_STATS_FILE = 'cpu.stat';
const CPU_MAX_FILE = 'cpu.max';
const MEMORY_CURRENT_FILE = 'memory.current';
const MEMORY_SWAP_CURRENT_FILE = 'memory.swap.current';
interface Arg {
cpuPath: string;
cpuAcctPath: string;
}
const getCGroupFilePath = (group: string, fileName: string): string => {
return joinPath(PROC_CGROUP2_DIR, group, fileName);
};
export async function gatherV2CgroupMetrics(arg: Arg): Promise<OsCgroupMetrics> {
const [{ usage_nanos: usageNanos, ...stat }, cpuMax] = await Promise.all([
readCPUStat(arg.cpuPath),
readCPUMax(arg.cpuPath),
]);
export async function gatherV2CgroupMetrics(group: string): Promise<OsCgroupMetrics> {
const [{ usage_nanos: usageNanos, ...stat }, cpuMax, memoryCurrent, swapCurrent] =
await Promise.all([
readCPUStat(group),
readCPUMax(group),
readMemoryCurrent(group),
readSwapCurrent(group),
]);
return {
cpu: {
...cpuMax,
control_group: arg.cpuPath,
control_group: group,
stat,
},
cpuacct: {
control_group: arg.cpuPath,
control_group: group,
usage_nanos: usageNanos,
},
cgroup_memory: {
current_in_bytes: memoryCurrent,
swap_current_in_bytes: swapCurrent,
},
};
}
interface CPUMax {
cfs_period_micros: number;
cfs_quota_micros: number;
}
async function readMemoryCurrent(group: string): Promise<number> {
const rawMemoryCurrent = (await fs.readFile(getCGroupFilePath(group, MEMORY_CURRENT_FILE)))
.toString()
.trim();
return parseInt(rawMemoryCurrent, 10);
}
async function readSwapCurrent(group: string): Promise<number> {
const rawMemoryCurrent = (await fs.readFile(getCGroupFilePath(group, MEMORY_SWAP_CURRENT_FILE)))
.toString()
.trim();
return parseInt(rawMemoryCurrent, 10);
}
async function readCPUMax(group: string): Promise<CPUMax> {
const [quota, period] = (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_MAX_FILE)))
const [quota, period] = (await fs.readFile(getCGroupFilePath(group, CPU_MAX_FILE)))
.toString()
.trim()
.split(/\s+/);
@ -62,7 +85,7 @@ async function readCPUStat(group: string): Promise<CPUStat> {
time_throttled_nanos: -1,
usage_nanos: -1,
};
return (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_STATS_FILE)))
return (await fs.readFile(getCGroupFilePath(group, CPU_STATS_FILE)))
.toString()
.split(/\n/)
.reduce((acc, line) => {

View file

@ -37,6 +37,8 @@ function createMockOpsProcessMetrics(): OpsProcessMetrics {
memory: {
heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 },
resident_set_size_in_bytes: 1,
array_buffers_in_bytes: 1,
external_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,

View file

@ -61,12 +61,15 @@ describe('ProcessMetricsCollector', () => {
const heapUsed = 4688;
const heapSizeLimit = 5788;
const rss = 5865;
const external = 9001;
const arrayBuffers = 42;
jest.spyOn(process, 'memoryUsage').mockImplementation(() => ({
rss,
heapTotal,
heapUsed,
external: 0,
arrayBuffers: 0,
external,
arrayBuffers,
}));
jest.spyOn(v8, 'getHeapStatistics').mockImplementation(
@ -83,6 +86,8 @@ describe('ProcessMetricsCollector', () => {
expect(metrics[0].memory.heap.used_in_bytes).toEqual(heapUsed);
expect(metrics[0].memory.heap.size_limit).toEqual(heapSizeLimit);
expect(metrics[0].memory.resident_set_size_in_bytes).toEqual(rss);
expect(metrics[0].memory.external_in_bytes).toEqual(external);
expect(metrics[0].memory.array_buffers_in_bytes).toEqual(arrayBuffers);
});
});

View file

@ -38,6 +38,8 @@ export class ProcessMetricsCollector implements MetricsCollector<OpsProcessMetri
size_limit: heapStats.heap_size_limit,
},
resident_set_size_in_bytes: memoryUsage.rss,
array_buffers_in_bytes: memoryUsage.arrayBuffers,
external_in_bytes: memoryUsage.external,
},
pid: process.pid,
event_loop_delay: eventLoopDelayHistogram.mean,

View file

@ -15,6 +15,8 @@ export function createMockOpsProcessMetrics(): OpsProcessMetrics {
memory: {
heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 },
resident_set_size_in_bytes: 1,
external_in_bytes: 1,
array_buffers_in_bytes: 1,
},
event_loop_delay: 1,
event_loop_delay_histogram: histogram,

View file

@ -45,9 +45,15 @@ function createMockOpsMetrics(testMetrics: Partial<OpsMetrics>): OpsMetrics {
...testMetrics,
};
}
const testMetrics = {
process: {
memory: { heap: { used_in_bytes: 100 } },
memory: {
heap: { used_in_bytes: 100, total_in_bytes: 200, size_limit: 300 },
resident_set_size_in_bytes: 400,
external_in_bytes: 500,
array_buffers_in_bytes: 600,
},
uptime_in_millis: 1500,
event_loop_delay: 50,
event_loop_delay_histogram: { percentiles: { '50': 50, '75': 75, '95': 95, '99': 99 } },
@ -127,9 +133,14 @@ describe('getEcsOpsMetricsLog', () => {
"utilization": 0.6365329598160299,
},
"memory": Object {
"arrayBuffersInBytes": 600,
"externalInBytes": 500,
"heap": Object {
"sizeLimit": 300,
"totalInBytes": 200,
"usedInBytes": 100,
},
"residentSetSizeInBytes": 400,
},
"uptime": 1,
},

View file

@ -86,7 +86,12 @@ export function getEcsOpsMetricsLog(metrics: OpsMetrics) {
memory: {
heap: {
usedInBytes: processMemoryUsedInBytes,
totalInBytes: process?.memory?.heap.total_in_bytes,
sizeLimit: process?.memory?.heap.size_limit,
},
residentSetSizeInBytes: process?.memory?.resident_set_size_in_bytes,
externalInBytes: process?.memory?.external_in_bytes,
arrayBuffersInBytes: process?.memory?.array_buffers_in_bytes,
},
eventLoopDelay: eventLoopDelayVal,
eventLoopDelayHistogram: eventLoopDelayHistVals,

View file

@ -216,9 +216,14 @@ describe('MetricsService', () => {
"eventLoopDelayHistogram": undefined,
"eventLoopUtilization": undefined,
"memory": Object {
"arrayBuffersInBytes": undefined,
"externalInBytes": undefined,
"heap": Object {
"sizeLimit": undefined,
"totalInBytes": undefined,
"usedInBytes": undefined,
},
"residentSetSizeInBytes": undefined,
},
"uptime": undefined,
},

View file

@ -82,6 +82,10 @@ export interface OpsProcessMetrics {
};
/** node rss */
resident_set_size_in_bytes: number;
/** memory usage of C++ objects bound to JavaScript objects managed by V8 */
external_in_bytes: number;
/** memory allocated for array buffers. This is also included in the external value*/
array_buffers_in_bytes: number;
};
/** mean event loop delay since last collection*/
event_loop_delay: number;
@ -153,6 +157,14 @@ export interface OpsOsMetrics {
time_throttled_nanos: number;
};
};
/** memory cgroup metrics, undefined when not running in cgroup v2 */
cgroup_memory?: {
/** The total amount of memory currently being used by the cgroup and its descendants. */
current_in_bytes: number;
/** The total amount of swap currently being used by the cgroup and its descendants. */
swap_current_in_bytes: number;
};
}
/**

View file

@ -32,6 +32,8 @@ Object {
"utilization": 1,
},
"memory": Object {
"array_buffers_in_bytes": 1,
"external_in_bytes": 1,
"heap": Object {
"size_limit": 1,
"total_in_bytes": 1,
@ -51,6 +53,8 @@ Object {
"utilization": 1,
},
"memory": Object {
"array_buffers_in_bytes": 1,
"external_in_bytes": 1,
"heap": Object {
"size_limit": 1,
"total_in_bytes": 1,

View file

@ -89,6 +89,10 @@ export interface OpsProcessMetrics {
};
/** node rss */
resident_set_size_in_bytes: number;
/** memory usage of C++ objects bound to JavaScript objects managed by V8 */
external_in_bytes: number;
/** memory allocated for array buffers. This is also included in the external value*/
array_buffers_in_bytes: number;
};
/** mean event loop delay since last collection*/
event_loop_delay: number;
@ -159,6 +163,14 @@ export interface OpsOsMetrics {
time_throttled_nanos: number;
};
};
/** memory cgroup metrics, undefined when not running in cgroup v2 */
cgroup_memory?: {
/** The total amount of memory currently being used by the cgroup and its descendants. */
current_in_bytes: number;
/** The total amount of swap currently being used by the cgroup and its descendants. */
swap_current_in_bytes: number;
};
}
/**

View file

@ -237,6 +237,16 @@ describe('CollectorSet', () => {
},
uptime_in_millis: 137844000,
},
process: {
heap: {
total_in_bytes: 1,
used_in_bytes: 2,
size_limit: 3,
},
resident_set_size_in_bytes: 4,
array_buffers_in_bytes: 5,
external_in_bytes: 6,
},
daysOfTheWeek: ['monday', 'tuesday', 'wednesday'],
};
@ -247,6 +257,16 @@ describe('CollectorSet', () => {
memory: { free_bytes: 458280960, total_bytes: 17179869184, used_bytes: 16721588224 },
uptime_ms: 137844000,
},
process: {
heap: {
total_bytes: 1,
used_bytes: 2,
size_limit: 3,
},
resident_set_size_bytes: 4,
array_buffers_bytes: 5,
external_bytes: 6,
},
days_of_the_week: ['monday', 'tuesday', 'wednesday'],
});
});