mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
Support cgroup v2 in core metric collection (#147082)
## Summary * Add support for reading cgroup metrics from the new v2 unified hierarchy (https://www.kernel.org/doc/Documentation/cgroup-v2.txt) * Refactored files for improved readability * Close https://github.com/elastic/kibana/issues/140874 ### Checklist - [x] Manually test this script docker images built on the newer kernel - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios
This commit is contained in:
parent
58c11ed59c
commit
229ec79675
13 changed files with 669 additions and 319 deletions
|
@ -1,127 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
import mockFs from 'mock-fs';
|
||||
import { loggerMock } from '@kbn/logging-mocks';
|
||||
import { OsCgroupMetricsCollector } from './cgroup';
|
||||
|
||||
describe('OsCgroupMetricsCollector', () => {
|
||||
afterEach(() => mockFs.restore());
|
||||
|
||||
it('returns empty object when no cgroup file present', async () => {
|
||||
mockFs({
|
||||
'/proc/self': {
|
||||
/** empty directory */
|
||||
},
|
||||
});
|
||||
|
||||
const logger = loggerMock.create();
|
||||
const collector = new OsCgroupMetricsCollector({ logger });
|
||||
expect(await collector.collect()).toEqual({});
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('collects default cgroup data', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname
|
||||
`,
|
||||
'/sys/fs/cgroup/cpuacct/groupname/cpuacct.usage': '111',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_period_us': '222',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_quota_us': '333',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.stat': `
|
||||
nr_periods 444
|
||||
nr_throttled 555
|
||||
throttled_time 666
|
||||
`,
|
||||
});
|
||||
|
||||
const collector = new OsCgroupMetricsCollector({ logger: loggerMock.create() });
|
||||
expect(await collector.collect()).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cpu": Object {
|
||||
"cfs_period_micros": 222,
|
||||
"cfs_quota_micros": 333,
|
||||
"control_group": "/groupname",
|
||||
"stat": Object {
|
||||
"number_of_elapsed_periods": 444,
|
||||
"number_of_times_throttled": 555,
|
||||
"time_throttled_nanos": 666,
|
||||
},
|
||||
},
|
||||
"cpuacct": Object {
|
||||
"control_group": "/groupname",
|
||||
"usage_nanos": 111,
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
it('collects override cgroup data', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname
|
||||
`,
|
||||
'/sys/fs/cgroup/cpuacct/xxcustomcpuacctxx/cpuacct.usage': '111',
|
||||
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_period_us': '222',
|
||||
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_quota_us': '333',
|
||||
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.stat': `
|
||||
nr_periods 444
|
||||
nr_throttled 555
|
||||
throttled_time 666
|
||||
`,
|
||||
});
|
||||
|
||||
const collector = new OsCgroupMetricsCollector({
|
||||
logger: loggerMock.create(),
|
||||
cpuAcctPath: 'xxcustomcpuacctxx',
|
||||
cpuPath: 'xxcustomcpuxx',
|
||||
});
|
||||
expect(await collector.collect()).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cpu": Object {
|
||||
"cfs_period_micros": 222,
|
||||
"cfs_quota_micros": 333,
|
||||
"control_group": "xxcustomcpuxx",
|
||||
"stat": Object {
|
||||
"number_of_elapsed_periods": 444,
|
||||
"number_of_times_throttled": 555,
|
||||
"time_throttled_nanos": 666,
|
||||
},
|
||||
},
|
||||
"cpuacct": Object {
|
||||
"control_group": "xxcustomcpuacctxx",
|
||||
"usage_nanos": 111,
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
it('returns empty object and logs error on an EACCES error', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname
|
||||
`,
|
||||
'/sys/fs/cgroup': mockFs.directory({ mode: parseInt('0000', 8) }),
|
||||
});
|
||||
|
||||
const logger = loggerMock.create();
|
||||
|
||||
const collector = new OsCgroupMetricsCollector({ logger });
|
||||
expect(await collector.collect()).toEqual({});
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
"cgroup metrics could not be read due to error: [Error: EACCES, permission denied '/sys/fs/cgroup/cpuacct/groupname/cpuacct.usage']"
|
||||
);
|
||||
});
|
||||
});
|
|
@ -1,192 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import { join as joinPath } from 'path';
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import type { MetricsCollector, OpsOsMetrics } from '@kbn/core-metrics-server';
|
||||
|
||||
type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct'>;
|
||||
|
||||
interface OsCgroupMetricsCollectorOptions {
|
||||
logger: Logger;
|
||||
cpuPath?: string;
|
||||
cpuAcctPath?: string;
|
||||
}
|
||||
|
||||
export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetrics> {
|
||||
/** Used to prevent unnecessary file reads on systems not using cgroups. */
|
||||
private noCgroupPresent = false;
|
||||
private cpuPath?: string;
|
||||
private cpuAcctPath?: string;
|
||||
|
||||
constructor(private readonly options: OsCgroupMetricsCollectorOptions) {}
|
||||
|
||||
public async collect(): Promise<OsCgroupMetrics> {
|
||||
try {
|
||||
if (this.noCgroupPresent) {
|
||||
return {};
|
||||
}
|
||||
|
||||
await this.initializePaths();
|
||||
if (!this.cpuAcctPath || !this.cpuPath) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const [cpuAcctUsage, cpuFsPeriod, cpuFsQuota, cpuStat] = await Promise.all([
|
||||
readCPUAcctUsage(this.cpuAcctPath),
|
||||
readCPUFsPeriod(this.cpuPath),
|
||||
readCPUFsQuota(this.cpuPath),
|
||||
readCPUStat(this.cpuPath),
|
||||
]);
|
||||
|
||||
return {
|
||||
cpuacct: {
|
||||
control_group: this.cpuAcctPath,
|
||||
usage_nanos: cpuAcctUsage,
|
||||
},
|
||||
|
||||
cpu: {
|
||||
control_group: this.cpuPath,
|
||||
cfs_period_micros: cpuFsPeriod,
|
||||
cfs_quota_micros: cpuFsQuota,
|
||||
stat: cpuStat,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
this.noCgroupPresent = true;
|
||||
|
||||
if (err.code !== 'ENOENT') {
|
||||
this.options.logger.error(
|
||||
`cgroup metrics could not be read due to error: [${err.toString()}]`
|
||||
);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
public reset() {}
|
||||
|
||||
private async initializePaths() {
|
||||
// Perform this setup lazily on the first collect call and then memoize the results.
|
||||
// Makes the assumption this data doesn't change while the process is running.
|
||||
if (this.cpuPath && this.cpuAcctPath) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Only read the file if both options are undefined.
|
||||
if (!this.options.cpuPath || !this.options.cpuAcctPath) {
|
||||
const cgroups = await readControlGroups();
|
||||
this.cpuPath = this.options.cpuPath || cgroups[GROUP_CPU];
|
||||
this.cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT];
|
||||
} else {
|
||||
this.cpuPath = this.options.cpuPath;
|
||||
this.cpuAcctPath = this.options.cpuAcctPath;
|
||||
}
|
||||
|
||||
// prevents undefined cgroup paths
|
||||
if (!this.cpuPath || !this.cpuAcctPath) {
|
||||
this.noCgroupPresent = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
|
||||
const CONTROLLER_SEPARATOR_RE = ',';
|
||||
|
||||
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
|
||||
const PROC_CGROUP_CPU_DIR = '/sys/fs/cgroup/cpu';
|
||||
const PROC_CGROUP_CPUACCT_DIR = '/sys/fs/cgroup/cpuacct';
|
||||
|
||||
const GROUP_CPUACCT = 'cpuacct';
|
||||
const CPUACCT_USAGE_FILE = 'cpuacct.usage';
|
||||
|
||||
const GROUP_CPU = 'cpu';
|
||||
const CPU_FS_PERIOD_US_FILE = 'cpu.cfs_period_us';
|
||||
const CPU_FS_QUOTA_US_FILE = 'cpu.cfs_quota_us';
|
||||
const CPU_STATS_FILE = 'cpu.stat';
|
||||
|
||||
async function readControlGroups() {
|
||||
const data = await fs.promises.readFile(PROC_SELF_CGROUP_FILE);
|
||||
|
||||
return data
|
||||
.toString()
|
||||
.split(/\n/)
|
||||
.reduce((acc, line) => {
|
||||
const matches = line.match(CONTROL_GROUP_RE);
|
||||
|
||||
if (matches !== null) {
|
||||
const controllers = matches[1].split(CONTROLLER_SEPARATOR_RE);
|
||||
controllers.forEach((controller) => {
|
||||
acc[controller] = matches[2];
|
||||
});
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {} as Record<string, string>);
|
||||
}
|
||||
|
||||
async function fileContentsToInteger(path: string) {
|
||||
const data = await fs.promises.readFile(path);
|
||||
return parseInt(data.toString(), 10);
|
||||
}
|
||||
|
||||
function readCPUAcctUsage(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPUACCT_DIR, controlGroup, CPUACCT_USAGE_FILE));
|
||||
}
|
||||
|
||||
function readCPUFsPeriod(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_PERIOD_US_FILE));
|
||||
}
|
||||
|
||||
function readCPUFsQuota(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_QUOTA_US_FILE));
|
||||
}
|
||||
|
||||
async function readCPUStat(controlGroup: string) {
|
||||
const stat = {
|
||||
number_of_elapsed_periods: -1,
|
||||
number_of_times_throttled: -1,
|
||||
time_throttled_nanos: -1,
|
||||
};
|
||||
|
||||
try {
|
||||
const data = await fs.promises.readFile(
|
||||
joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_STATS_FILE)
|
||||
);
|
||||
return data
|
||||
.toString()
|
||||
.split(/\n/)
|
||||
.reduce((acc, line) => {
|
||||
const fields = line.split(/\s+/);
|
||||
|
||||
switch (fields[0]) {
|
||||
case 'nr_periods':
|
||||
acc.number_of_elapsed_periods = parseInt(fields[1], 10);
|
||||
break;
|
||||
|
||||
case 'nr_throttled':
|
||||
acc.number_of_times_throttled = parseInt(fields[1], 10);
|
||||
break;
|
||||
|
||||
case 'throttled_time':
|
||||
acc.time_throttled_nanos = parseInt(fields[1], 10);
|
||||
break;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, stat);
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT') {
|
||||
return stat;
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,122 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
jest.mock('./v1', () => {
|
||||
const actual = jest.requireActual('./v1');
|
||||
return {
|
||||
...actual,
|
||||
gatherV1CgroupMetrics: jest.fn(actual.gatherV1CgroupMetrics),
|
||||
};
|
||||
});
|
||||
|
||||
jest.mock('./v2', () => {
|
||||
const actual = jest.requireActual('./v2');
|
||||
return {
|
||||
...actual,
|
||||
gatherV2CgroupMetrics: jest.fn(actual.gatherV2CgroupMetrics),
|
||||
};
|
||||
});
|
||||
|
||||
import mockFs from 'mock-fs';
|
||||
import { loggerMock } from '@kbn/logging-mocks';
|
||||
import { OsCgroupMetricsCollector } from '.';
|
||||
import { Logger } from '@kbn/logging';
|
||||
import { gatherV1CgroupMetrics } from './v1';
|
||||
import { gatherV2CgroupMetrics } from './v2';
|
||||
|
||||
describe('OsCgroupMetricsCollector', () => {
|
||||
let collector: OsCgroupMetricsCollector;
|
||||
let logger: Logger;
|
||||
beforeEach(() => {
|
||||
logger = loggerMock.create();
|
||||
collector = new OsCgroupMetricsCollector({ logger });
|
||||
});
|
||||
afterEach(() => {
|
||||
mockFs.restore();
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('returns empty object when no cgroup file present', async () => {
|
||||
mockFs({
|
||||
'/proc/self': {
|
||||
/** empty directory */
|
||||
},
|
||||
});
|
||||
|
||||
expect(await collector.collect()).toEqual({});
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('returns empty object and logs error on an EACCES error', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname
|
||||
`,
|
||||
'/sys/fs/cgroup': mockFs.directory({ mode: parseInt('0000', 8) }),
|
||||
});
|
||||
|
||||
expect(await collector.collect()).toEqual({});
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
"cgroup metrics could not be read due to error: [Error: EACCES, permission denied '/sys/fs/cgroup/cpuacct/groupname/cpuacct.usage']"
|
||||
);
|
||||
});
|
||||
|
||||
it('delegates correctly to the v1 implementation', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname`,
|
||||
});
|
||||
|
||||
await collector.collect();
|
||||
|
||||
expect(gatherV1CgroupMetrics).toHaveBeenCalledTimes(1);
|
||||
expect(gatherV1CgroupMetrics).toHaveBeenCalledWith({
|
||||
cpuAcctPath: '/groupname',
|
||||
cpuPath: '/groupname',
|
||||
});
|
||||
expect(gatherV2CgroupMetrics).toHaveBeenCalledTimes(0);
|
||||
});
|
||||
|
||||
it('delegates correctly to the v2 implementation', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `0::/groupname`,
|
||||
});
|
||||
|
||||
await collector.collect();
|
||||
|
||||
expect(gatherV2CgroupMetrics).toHaveBeenCalledTimes(1);
|
||||
expect(gatherV2CgroupMetrics).toHaveBeenCalledWith({
|
||||
cpuAcctPath: '/groupname',
|
||||
cpuPath: '/groupname',
|
||||
});
|
||||
expect(gatherV1CgroupMetrics).toHaveBeenCalledTimes(0);
|
||||
});
|
||||
|
||||
it('passes through overrides', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `0:test:/groupname`,
|
||||
});
|
||||
|
||||
logger = loggerMock.create();
|
||||
collector = new OsCgroupMetricsCollector({
|
||||
logger,
|
||||
cpuAcctPath: '/override1',
|
||||
cpuPath: '/override2',
|
||||
});
|
||||
|
||||
await collector.collect();
|
||||
|
||||
expect(gatherV1CgroupMetrics).toHaveBeenCalledWith({
|
||||
cpuAcctPath: '/override1',
|
||||
cpuPath: '/override2',
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
import type { Logger } from '@kbn/logging';
|
||||
import type { MetricsCollector } from '@kbn/core-metrics-server';
|
||||
|
||||
import { gatherV1CgroupMetrics } from './v1';
|
||||
import { gatherV2CgroupMetrics } from './v2';
|
||||
import { gatherInfo } from './gather_info';
|
||||
import { GROUP_CPU, GROUP_CPUACCT } from './constants';
|
||||
import { OsCgroupMetrics } from './types';
|
||||
|
||||
interface OsCgroupMetricsCollectorOptions {
|
||||
logger: Logger;
|
||||
cpuPath?: string;
|
||||
cpuAcctPath?: string;
|
||||
}
|
||||
|
||||
export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetrics> {
|
||||
/** Used to prevent unnecessary file reads on systems not using cgroups. */
|
||||
private noCgroupPresent = false;
|
||||
/** Are resources being managed by cgroup2? */
|
||||
private isCgroup2 = false;
|
||||
private cpuPath?: string;
|
||||
private cpuAcctPath?: string;
|
||||
|
||||
constructor(private readonly options: OsCgroupMetricsCollectorOptions) {}
|
||||
|
||||
public async collect(): Promise<OsCgroupMetrics> {
|
||||
try {
|
||||
if (this.noCgroupPresent) {
|
||||
return {};
|
||||
}
|
||||
|
||||
await this.initializePaths();
|
||||
if (!this.hasPaths()) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const args = { cpuAcctPath: this.cpuAcctPath!, cpuPath: this.cpuPath! };
|
||||
// "await" to handle any errors here.
|
||||
return await (this.isCgroup2 ? gatherV2CgroupMetrics(args) : gatherV1CgroupMetrics(args));
|
||||
} catch (err) {
|
||||
this.noCgroupPresent = true;
|
||||
|
||||
if (err.code !== 'ENOENT') {
|
||||
this.options.logger.error(
|
||||
`cgroup metrics could not be read due to error: [${err.toString()}]`
|
||||
);
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
public reset() {}
|
||||
|
||||
private hasPaths(): boolean {
|
||||
return Boolean(this.cpuPath && this.cpuAcctPath);
|
||||
}
|
||||
|
||||
private async initializePaths(): Promise<void> {
|
||||
if (this.hasPaths()) return;
|
||||
|
||||
const { data: cgroups, v2 } = await gatherInfo();
|
||||
this.isCgroup2 = v2;
|
||||
this.cpuPath = this.options.cpuPath || cgroups[GROUP_CPU];
|
||||
this.cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT];
|
||||
|
||||
// prevents undefined cgroup paths
|
||||
this.noCgroupPresent = Boolean(!this.cpuPath || !this.cpuAcctPath);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
export const GROUP_CPUACCT = 'cpuacct';
|
||||
export const GROUP_CPU = 'cpu';
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
import mockFs from 'mock-fs';
|
||||
import { gatherInfo } from './gather_info';
|
||||
|
||||
describe('gatherInfo', () => {
|
||||
afterEach(() => mockFs.restore());
|
||||
|
||||
test('parse cgroup file entries', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `0:controller:/path
|
||||
1:controller2,controller3:/otherpath`,
|
||||
});
|
||||
const { data } = await gatherInfo();
|
||||
expect(data).toEqual({
|
||||
controller: '/path',
|
||||
controller2: '/otherpath',
|
||||
controller3: '/otherpath',
|
||||
});
|
||||
});
|
||||
|
||||
test('detect cgroup version', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `0:controller:/path
|
||||
1:controller2,controller3:/otherpath`,
|
||||
});
|
||||
await expect(gatherInfo()).resolves.toMatchObject({ v2: false });
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
|
||||
0::/path
|
||||
|
||||
`,
|
||||
});
|
||||
await expect(gatherInfo()).resolves.toMatchObject({ v2: true });
|
||||
});
|
||||
|
||||
test('missing cgroup file', async () => {
|
||||
mockFs({});
|
||||
await expect(gatherInfo()).rejects.toMatchObject({ code: 'ENOENT' });
|
||||
});
|
||||
|
||||
test('invalid cgroup file', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `invalid`,
|
||||
});
|
||||
await expect(gatherInfo()).resolves.toEqual({ data: {}, v2: false });
|
||||
});
|
||||
});
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
import fs from 'fs/promises';
|
||||
|
||||
import { GROUP_CPU, GROUP_CPUACCT } from './constants';
|
||||
|
||||
const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
|
||||
const CONTROLLER_SEPARATOR_RE = ',';
|
||||
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
|
||||
|
||||
/**
|
||||
* Check whether the contents of /proc/self/cgroup indicate that we are running in a cgroup v2
|
||||
*
|
||||
* @note cgroup v2 is always in the format "0::<PATH>". See https://www.kernel.org/doc/Documentation/cgroup-v2.txt.
|
||||
*/
|
||||
function isCgroups2(procSelfLines: string[]): boolean {
|
||||
return procSelfLines.length === 1 && procSelfLines[0].trim().startsWith('0::');
|
||||
}
|
||||
|
||||
async function readProcSelf(): Promise<string[]> {
|
||||
const data = (await fs.readFile(PROC_SELF_CGROUP_FILE)).toString();
|
||||
return data.split(/\n/).filter((line) => line.trim().length > 0);
|
||||
}
|
||||
|
||||
interface Result {
|
||||
data: Record<string, string>;
|
||||
v2: boolean;
|
||||
}
|
||||
|
||||
export async function gatherInfo(): Promise<Result> {
|
||||
const lines = await readProcSelf();
|
||||
|
||||
if (isCgroups2(lines)) {
|
||||
// eslint-disable-next-line prettier/prettier
|
||||
const [/* '0' */, /* '' */, path] = lines[0].trim().split(':');
|
||||
return {
|
||||
data: {
|
||||
[GROUP_CPU]: path,
|
||||
[GROUP_CPUACCT]: path,
|
||||
},
|
||||
v2: true,
|
||||
};
|
||||
}
|
||||
|
||||
const data = lines.reduce((acc, line) => {
|
||||
const matches = line.match(CONTROL_GROUP_RE);
|
||||
|
||||
if (matches !== null) {
|
||||
const controllers = matches[1].split(CONTROLLER_SEPARATOR_RE);
|
||||
controllers.forEach((controller) => {
|
||||
acc[controller] = matches[2];
|
||||
});
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {} as Record<string, string>);
|
||||
|
||||
return { data, v2: false };
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
export { OsCgroupMetricsCollector } from './cgroup';
|
|
@ -0,0 +1,11 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
import type { OpsOsMetrics } from '@kbn/core-metrics-server';
|
||||
|
||||
export type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct'>;
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
import mockFs from 'mock-fs';
|
||||
import { gatherV1CgroupMetrics } from './v1';
|
||||
|
||||
describe('gatherV1CgroupMetrics', () => {
|
||||
afterEach(() => mockFs.restore());
|
||||
|
||||
it('collects cgroup data', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname
|
||||
`,
|
||||
'/sys/fs/cgroup/cpuacct/groupname/cpuacct.usage': '111',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_period_us': '222',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_quota_us': '333',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.stat': `
|
||||
nr_periods 444
|
||||
nr_throttled 555
|
||||
throttled_time 666
|
||||
`,
|
||||
});
|
||||
|
||||
expect(await gatherV1CgroupMetrics({ cpuAcctPath: '/groupname', cpuPath: '/groupname' }))
|
||||
.toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cpu": Object {
|
||||
"cfs_period_micros": 222,
|
||||
"cfs_quota_micros": 333,
|
||||
"control_group": "/groupname",
|
||||
"stat": Object {
|
||||
"number_of_elapsed_periods": 444,
|
||||
"number_of_times_throttled": 555,
|
||||
"time_throttled_nanos": 666,
|
||||
},
|
||||
},
|
||||
"cpuacct": Object {
|
||||
"control_group": "/groupname",
|
||||
"usage_nanos": 111,
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
import fs from 'fs/promises';
|
||||
import { join as joinPath } from 'path';
|
||||
import type { OsCgroupMetrics } from './types';
|
||||
|
||||
const CPU_STATS_FILE = 'cpu.stat';
|
||||
const CPUACCT_USAGE_FILE = 'cpuacct.usage';
|
||||
const CPU_FS_QUOTA_US_FILE = 'cpu.cfs_quota_us';
|
||||
const PROC_CGROUP_CPU_DIR = '/sys/fs/cgroup/cpu';
|
||||
const CPU_FS_PERIOD_US_FILE = 'cpu.cfs_period_us';
|
||||
const PROC_CGROUP_CPUACCT_DIR = '/sys/fs/cgroup/cpuacct';
|
||||
|
||||
interface Arg {
|
||||
cpuPath: string;
|
||||
cpuAcctPath: string;
|
||||
}
|
||||
|
||||
export async function gatherV1CgroupMetrics({
|
||||
cpuAcctPath,
|
||||
cpuPath,
|
||||
}: Arg): Promise<OsCgroupMetrics> {
|
||||
const [cpuAcctUsage, cpuFsPeriod, cpuFsQuota, cpuStat] = await Promise.all([
|
||||
readCPUAcctUsage(cpuAcctPath),
|
||||
readCPUFsPeriod(cpuPath),
|
||||
readCPUFsQuota(cpuPath),
|
||||
readCPUStat(cpuPath),
|
||||
]);
|
||||
|
||||
return {
|
||||
cpuacct: {
|
||||
control_group: cpuAcctPath,
|
||||
usage_nanos: cpuAcctUsage,
|
||||
},
|
||||
|
||||
cpu: {
|
||||
control_group: cpuPath,
|
||||
cfs_period_micros: cpuFsPeriod,
|
||||
cfs_quota_micros: cpuFsQuota,
|
||||
stat: cpuStat,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function fileContentsToInteger(path: string) {
|
||||
const data = await fs.readFile(path);
|
||||
return parseInt(data.toString(), 10);
|
||||
}
|
||||
|
||||
function readCPUAcctUsage(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPUACCT_DIR, controlGroup, CPUACCT_USAGE_FILE));
|
||||
}
|
||||
|
||||
function readCPUFsPeriod(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_PERIOD_US_FILE));
|
||||
}
|
||||
|
||||
function readCPUFsQuota(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_QUOTA_US_FILE));
|
||||
}
|
||||
|
||||
async function readCPUStat(controlGroup: string) {
|
||||
const stat = {
|
||||
number_of_elapsed_periods: -1,
|
||||
number_of_times_throttled: -1,
|
||||
time_throttled_nanos: -1,
|
||||
};
|
||||
|
||||
try {
|
||||
const data = await fs.readFile(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_STATS_FILE));
|
||||
return data
|
||||
.toString()
|
||||
.split(/\n/)
|
||||
.reduce((acc, line) => {
|
||||
const [key, value] = line.split(/\s+/);
|
||||
|
||||
switch (key) {
|
||||
case 'nr_periods':
|
||||
acc.number_of_elapsed_periods = parseInt(value, 10);
|
||||
break;
|
||||
case 'nr_throttled':
|
||||
acc.number_of_times_throttled = parseInt(value, 10);
|
||||
break;
|
||||
case 'throttled_time':
|
||||
acc.time_throttled_nanos = parseInt(value, 10);
|
||||
break;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, stat);
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT') {
|
||||
return stat;
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
import mockFs from 'mock-fs';
|
||||
import { gatherV2CgroupMetrics } from './v2';
|
||||
|
||||
describe('gatherV2CgroupMetrics', () => {
|
||||
afterEach(() => mockFs.restore());
|
||||
|
||||
it('collects default cgroup data for "root"', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `0::/`,
|
||||
'/sys/fs/cgroup/cpu.max': 'max 100000', // "max" is a special no-value value
|
||||
'/sys/fs/cgroup/cpu.stat': `usage_usec 185247
|
||||
user_usec 59279
|
||||
system_usec 125968
|
||||
nr_periods 123
|
||||
nr_throttled 1
|
||||
throttled_usec 123123`,
|
||||
});
|
||||
|
||||
expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/', cpuPath: '/' })).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cpu": Object {
|
||||
"cfs_period_micros": 100000,
|
||||
"cfs_quota_micros": -1,
|
||||
"control_group": "/",
|
||||
"stat": Object {
|
||||
"number_of_elapsed_periods": 123,
|
||||
"number_of_times_throttled": 1,
|
||||
"time_throttled_nanos": 123123,
|
||||
},
|
||||
},
|
||||
"cpuacct": Object {
|
||||
"control_group": "/",
|
||||
"usage_nanos": 185247,
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
it('collects default cgroup data', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `0::/mypath`,
|
||||
'/sys/fs/cgroup/mypath/cpu.max': '111 100000',
|
||||
'/sys/fs/cgroup/mypath/cpu.stat': `usage_usec 185247
|
||||
user_usec 59279
|
||||
system_usec 125968
|
||||
nr_periods 123
|
||||
nr_throttled 1
|
||||
throttled_usec 123123`,
|
||||
});
|
||||
|
||||
expect(await gatherV2CgroupMetrics({ cpuAcctPath: '/mypath', cpuPath: '/mypath' }))
|
||||
.toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cpu": Object {
|
||||
"cfs_period_micros": 100000,
|
||||
"cfs_quota_micros": 111,
|
||||
"control_group": "/mypath",
|
||||
"stat": Object {
|
||||
"number_of_elapsed_periods": 123,
|
||||
"number_of_times_throttled": 1,
|
||||
"time_throttled_nanos": 123123,
|
||||
},
|
||||
},
|
||||
"cpuacct": Object {
|
||||
"control_group": "/mypath",
|
||||
"usage_nanos": 185247,
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
});
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
import fs from 'fs/promises';
|
||||
import { join as joinPath } from 'path';
|
||||
import type { OsCgroupMetrics } from './types';
|
||||
|
||||
const PROC_CGROUP2_DIR = '/sys/fs/cgroup';
|
||||
const CPU_STATS_FILE = 'cpu.stat';
|
||||
const CPU_MAX_FILE = 'cpu.max';
|
||||
|
||||
interface Arg {
|
||||
cpuPath: string;
|
||||
cpuAcctPath: string;
|
||||
}
|
||||
|
||||
export async function gatherV2CgroupMetrics(arg: Arg): Promise<OsCgroupMetrics> {
|
||||
const [{ usage_nanos: usageNanos, ...stat }, cpuMax] = await Promise.all([
|
||||
readCPUStat(arg.cpuPath),
|
||||
readCPUMax(arg.cpuPath),
|
||||
]);
|
||||
|
||||
return {
|
||||
cpu: {
|
||||
...cpuMax,
|
||||
control_group: arg.cpuPath,
|
||||
stat,
|
||||
},
|
||||
cpuacct: {
|
||||
control_group: arg.cpuPath,
|
||||
usage_nanos: usageNanos,
|
||||
},
|
||||
};
|
||||
}
|
||||
interface CPUMax {
|
||||
cfs_period_micros: number;
|
||||
cfs_quota_micros: number;
|
||||
}
|
||||
|
||||
async function readCPUMax(group: string): Promise<CPUMax> {
|
||||
const [quota, period] = (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_MAX_FILE)))
|
||||
.toString()
|
||||
.trim()
|
||||
.split(/\s+/);
|
||||
return {
|
||||
cfs_quota_micros: quota === 'max' ? -1 : parseInt(quota, 10),
|
||||
cfs_period_micros: parseInt(period, 10),
|
||||
};
|
||||
}
|
||||
|
||||
type CPUStat = Required<OsCgroupMetrics>['cpu']['stat'] & { usage_nanos: number };
|
||||
|
||||
async function readCPUStat(group: string): Promise<CPUStat> {
|
||||
const stat: CPUStat = {
|
||||
number_of_elapsed_periods: -1,
|
||||
number_of_times_throttled: -1,
|
||||
time_throttled_nanos: -1,
|
||||
usage_nanos: -1,
|
||||
};
|
||||
return (await fs.readFile(joinPath(PROC_CGROUP2_DIR, group, CPU_STATS_FILE)))
|
||||
.toString()
|
||||
.split(/\n/)
|
||||
.reduce((acc, line) => {
|
||||
const [key, value] = line.split(/\s+/);
|
||||
switch (key) {
|
||||
case 'nr_periods':
|
||||
acc.number_of_elapsed_periods = parseInt(value, 10);
|
||||
break;
|
||||
case 'nr_throttled':
|
||||
acc.number_of_times_throttled = parseInt(value, 10);
|
||||
break;
|
||||
case 'throttled_usec':
|
||||
acc.time_throttled_nanos = parseInt(value, 10);
|
||||
break;
|
||||
// In V2 cpuacct also lives in cpu.stat
|
||||
case 'usage_usec':
|
||||
acc.usage_nanos = parseInt(value, 10);
|
||||
break;
|
||||
}
|
||||
return stat;
|
||||
}, stat);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue