mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
Move metrics to setup and add cgroup metrics (#76730)
This commit is contained in:
parent
d67a421e68
commit
9bc603e7d9
31 changed files with 634 additions and 50 deletions
|
@ -123,7 +123,7 @@ The plugin integrates with the core system via lifecycle events: `setup`<!-- -->
|
|||
| [LoggerFactory](./kibana-plugin-core-server.loggerfactory.md) | The single purpose of <code>LoggerFactory</code> interface is to define a way to retrieve a context-based logger instance. |
|
||||
| [LoggingServiceSetup](./kibana-plugin-core-server.loggingservicesetup.md) | Provides APIs to plugins for customizing the plugin's logger. |
|
||||
| [LogMeta](./kibana-plugin-core-server.logmeta.md) | Contextual metadata |
|
||||
| [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) | |
|
||||
| [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) | APIs to retrieves metrics gathered and exposed by the core platform. |
|
||||
| [NodesVersionCompatibility](./kibana-plugin-core-server.nodesversioncompatibility.md) | |
|
||||
| [OnPostAuthToolkit](./kibana-plugin-core-server.onpostauthtoolkit.md) | A tool set defining an outcome of OnPostAuth interceptor for incoming request. |
|
||||
| [OnPreAuthToolkit](./kibana-plugin-core-server.onpreauthtoolkit.md) | A tool set defining an outcome of OnPreAuth interceptor for incoming request. |
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) > [collectionInterval](./kibana-plugin-core-server.metricsservicesetup.collectioninterval.md)
|
||||
|
||||
## MetricsServiceSetup.collectionInterval property
|
||||
|
||||
Interval metrics are collected in milliseconds
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
readonly collectionInterval: number;
|
||||
```
|
|
@ -0,0 +1,24 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [MetricsServiceSetup](./kibana-plugin-core-server.metricsservicesetup.md) > [getOpsMetrics$](./kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md)
|
||||
|
||||
## MetricsServiceSetup.getOpsMetrics$ property
|
||||
|
||||
Retrieve an observable emitting the [OpsMetrics](./kibana-plugin-core-server.opsmetrics.md) gathered. The observable will emit an initial value during core's `start` phase, and a new value every fixed interval of time, based on the `opts.interval` configuration property.
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
getOpsMetrics$: () => Observable<OpsMetrics>;
|
||||
```
|
||||
|
||||
## Example
|
||||
|
||||
|
||||
```ts
|
||||
core.metrics.getOpsMetrics$().subscribe(metrics => {
|
||||
// do something with the metrics
|
||||
})
|
||||
|
||||
```
|
||||
|
|
@ -4,8 +4,18 @@
|
|||
|
||||
## MetricsServiceSetup interface
|
||||
|
||||
APIs to retrieves metrics gathered and exposed by the core platform.
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
export interface MetricsServiceSetup
|
||||
```
|
||||
|
||||
## Properties
|
||||
|
||||
| Property | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| [collectionInterval](./kibana-plugin-core-server.metricsservicesetup.collectioninterval.md) | <code>number</code> | Interval metrics are collected in milliseconds |
|
||||
| [getOpsMetrics$](./kibana-plugin-core-server.metricsservicesetup.getopsmetrics_.md) | <code>() => Observable<OpsMetrics></code> | Retrieve an observable emitting the [OpsMetrics](./kibana-plugin-core-server.opsmetrics.md) gathered. The observable will emit an initial value during core's <code>start</code> phase, and a new value every fixed interval of time, based on the <code>opts.interval</code> configuration property. |
|
||||
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [OpsMetrics](./kibana-plugin-core-server.opsmetrics.md) > [collected\_at](./kibana-plugin-core-server.opsmetrics.collected_at.md)
|
||||
|
||||
## OpsMetrics.collected\_at property
|
||||
|
||||
Time metrics were recorded at.
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
collected_at: Date;
|
||||
```
|
|
@ -16,6 +16,7 @@ export interface OpsMetrics
|
|||
|
||||
| Property | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| [collected\_at](./kibana-plugin-core-server.opsmetrics.collected_at.md) | <code>Date</code> | Time metrics were recorded at. |
|
||||
| [concurrent\_connections](./kibana-plugin-core-server.opsmetrics.concurrent_connections.md) | <code>OpsServerMetrics['concurrent_connections']</code> | number of current concurrent connections to the server |
|
||||
| [os](./kibana-plugin-core-server.opsmetrics.os.md) | <code>OpsOsMetrics</code> | OS related metrics |
|
||||
| [process](./kibana-plugin-core-server.opsmetrics.process.md) | <code>OpsProcessMetrics</code> | Process related metrics |
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [OpsOsMetrics](./kibana-plugin-core-server.opsosmetrics.md) > [cpu](./kibana-plugin-core-server.opsosmetrics.cpu.md)
|
||||
|
||||
## OpsOsMetrics.cpu property
|
||||
|
||||
cpu cgroup metrics, undefined when not running in a cgroup
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
cpu?: {
|
||||
control_group: string;
|
||||
cfs_period_micros: number;
|
||||
cfs_quota_micros: number;
|
||||
stat: {
|
||||
number_of_elapsed_periods: number;
|
||||
number_of_times_throttled: number;
|
||||
time_throttled_nanos: number;
|
||||
};
|
||||
};
|
||||
```
|
|
@ -0,0 +1,16 @@
|
|||
<!-- Do not edit this file. It is automatically generated by API Documenter. -->
|
||||
|
||||
[Home](./index.md) > [kibana-plugin-core-server](./kibana-plugin-core-server.md) > [OpsOsMetrics](./kibana-plugin-core-server.opsosmetrics.md) > [cpuacct](./kibana-plugin-core-server.opsosmetrics.cpuacct.md)
|
||||
|
||||
## OpsOsMetrics.cpuacct property
|
||||
|
||||
cpu accounting metrics, undefined when not running in a cgroup
|
||||
|
||||
<b>Signature:</b>
|
||||
|
||||
```typescript
|
||||
cpuacct?: {
|
||||
control_group: string;
|
||||
usage_nanos: number;
|
||||
};
|
||||
```
|
|
@ -16,6 +16,8 @@ export interface OpsOsMetrics
|
|||
|
||||
| Property | Type | Description |
|
||||
| --- | --- | --- |
|
||||
| [cpu](./kibana-plugin-core-server.opsosmetrics.cpu.md) | <code>{</code><br/><code> control_group: string;</code><br/><code> cfs_period_micros: number;</code><br/><code> cfs_quota_micros: number;</code><br/><code> stat: {</code><br/><code> number_of_elapsed_periods: number;</code><br/><code> number_of_times_throttled: number;</code><br/><code> time_throttled_nanos: number;</code><br/><code> };</code><br/><code> }</code> | cpu cgroup metrics, undefined when not running in a cgroup |
|
||||
| [cpuacct](./kibana-plugin-core-server.opsosmetrics.cpuacct.md) | <code>{</code><br/><code> control_group: string;</code><br/><code> usage_nanos: number;</code><br/><code> }</code> | cpu accounting metrics, undefined when not running in a cgroup |
|
||||
| [distro](./kibana-plugin-core-server.opsosmetrics.distro.md) | <code>string</code> | The os distrib. Only present for linux platforms |
|
||||
| [distroRelease](./kibana-plugin-core-server.opsosmetrics.distrorelease.md) | <code>string</code> | The os distrib release, prefixed by the os distrib. Only present for linux platforms |
|
||||
| [load](./kibana-plugin-core-server.opsosmetrics.load.md) | <code>{</code><br/><code> '1m': number;</code><br/><code> '5m': number;</code><br/><code> '15m': number;</code><br/><code> }</code> | cpu load metrics |
|
||||
|
|
|
@ -20,12 +20,12 @@ which may cause a delay before pages start being served.
|
|||
Set to `false` to disable Console. *Default: `true`*
|
||||
|
||||
| `cpu.cgroup.path.override:`
|
||||
| Override for cgroup cpu path when mounted in a
|
||||
manner that is inconsistent with `/proc/self/cgroup`.
|
||||
| *deprecated* This setting has been renamed to `ops.cGroupOverrides.cpuPath`
|
||||
and the old name will no longer be supported as of 8.0.
|
||||
|
||||
| `cpuacct.cgroup.path.override:`
|
||||
| Override for cgroup cpuacct path when mounted
|
||||
in a manner that is inconsistent with `/proc/self/cgroup`.
|
||||
| *deprecated* This setting has been renamed to `ops.cGroupOverrides.cpuAcctPath`
|
||||
and the old name will no longer be supported as of 8.0.
|
||||
|
||||
| `csp.rules:`
|
||||
| A https://w3c.github.io/webappsec-csp/[content-security-policy] template
|
||||
|
@ -438,6 +438,14 @@ not saved in {es}. *Default: `data`*
|
|||
| Set the interval in milliseconds to sample
|
||||
system and process performance metrics. The minimum value is 100. *Default: `5000`*
|
||||
|
||||
| `ops.cGroupOverrides.cpuPath:`
|
||||
| Override for cgroup cpu path when mounted in a
|
||||
manner that is inconsistent with `/proc/self/cgroup`.
|
||||
|
||||
| `ops.cGroupOverrides.cpuAcctPath:`
|
||||
| Override for cgroup cpuacct path when mounted
|
||||
in a manner that is inconsistent with `/proc/self/cgroup`.
|
||||
|
||||
| `server.basePath:`
|
||||
| Enables you to specify a path to mount {kib} at if you are
|
||||
running behind a proxy. Use the `server.rewriteBasePath` setting to tell {kib}
|
||||
|
|
|
@ -57,6 +57,7 @@ const mockedResponse: StatusResponse = {
|
|||
],
|
||||
},
|
||||
metrics: {
|
||||
collected_at: new Date('2020-01-01 01:00:00'),
|
||||
collection_interval_in_millis: 1000,
|
||||
os: {
|
||||
platform: 'darwin' as const,
|
||||
|
|
|
@ -113,7 +113,7 @@ const mapManifestServiceUrlDeprecation: ConfigDeprecation = (settings, fromPath,
|
|||
return settings;
|
||||
};
|
||||
|
||||
export const coreDeprecationProvider: ConfigDeprecationProvider = ({ unusedFromRoot }) => [
|
||||
export const coreDeprecationProvider: ConfigDeprecationProvider = ({ rename, unusedFromRoot }) => [
|
||||
unusedFromRoot('savedObjects.indexCheckTimeout'),
|
||||
unusedFromRoot('server.xsrf.token'),
|
||||
unusedFromRoot('maps.manifestServiceUrl'),
|
||||
|
@ -136,6 +136,8 @@ export const coreDeprecationProvider: ConfigDeprecationProvider = ({ unusedFromR
|
|||
unusedFromRoot('optimize.workers'),
|
||||
unusedFromRoot('optimize.profile'),
|
||||
unusedFromRoot('optimize.validateSyntaxOfNodeModules'),
|
||||
rename('cpu.cgroup.path.override', 'ops.cGroupOverrides.cpuPath'),
|
||||
rename('cpuacct.cgroup.path.override', 'ops.cGroupOverrides.cpuAcctPath'),
|
||||
configPathDeprecation,
|
||||
dataPathDeprecation,
|
||||
rewriteBasePathDeprecation,
|
||||
|
|
|
@ -264,6 +264,7 @@ export class LegacyService implements CoreService {
|
|||
getTypeRegistry: startDeps.core.savedObjects.getTypeRegistry,
|
||||
},
|
||||
metrics: {
|
||||
collectionInterval: startDeps.core.metrics.collectionInterval,
|
||||
getOpsMetrics$: startDeps.core.metrics.getOpsMetrics$,
|
||||
},
|
||||
uiSettings: { asScopedToClient: startDeps.core.uiSettings.asScopedToClient },
|
||||
|
|
115
src/core/server/metrics/collectors/cgroup.test.ts
Normal file
115
src/core/server/metrics/collectors/cgroup.test.ts
Normal file
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch B.V. under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch B.V. licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import mockFs from 'mock-fs';
|
||||
import { OsCgroupMetricsCollector } from './cgroup';
|
||||
|
||||
describe('OsCgroupMetricsCollector', () => {
|
||||
afterEach(() => mockFs.restore());
|
||||
|
||||
it('returns empty object when no cgroup file present', async () => {
|
||||
mockFs({
|
||||
'/proc/self': {
|
||||
/** empty directory */
|
||||
},
|
||||
});
|
||||
|
||||
const collector = new OsCgroupMetricsCollector({});
|
||||
expect(await collector.collect()).toEqual({});
|
||||
});
|
||||
|
||||
it('collects default cgroup data', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname
|
||||
`,
|
||||
'/sys/fs/cgroup/cpuacct/groupname/cpuacct.usage': '111',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_period_us': '222',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.cfs_quota_us': '333',
|
||||
'/sys/fs/cgroup/cpu/groupname/cpu.stat': `
|
||||
nr_periods 444
|
||||
nr_throttled 555
|
||||
throttled_time 666
|
||||
`,
|
||||
});
|
||||
|
||||
const collector = new OsCgroupMetricsCollector({});
|
||||
expect(await collector.collect()).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cpu": Object {
|
||||
"cfs_period_micros": 222,
|
||||
"cfs_quota_micros": 333,
|
||||
"control_group": "/groupname",
|
||||
"stat": Object {
|
||||
"number_of_elapsed_periods": 444,
|
||||
"number_of_times_throttled": 555,
|
||||
"time_throttled_nanos": 666,
|
||||
},
|
||||
},
|
||||
"cpuacct": Object {
|
||||
"control_group": "/groupname",
|
||||
"usage_nanos": 111,
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
|
||||
it('collects override cgroup data', async () => {
|
||||
mockFs({
|
||||
'/proc/self/cgroup': `
|
||||
123:memory:/groupname
|
||||
123:cpu:/groupname
|
||||
123:cpuacct:/groupname
|
||||
`,
|
||||
'/sys/fs/cgroup/cpuacct/xxcustomcpuacctxx/cpuacct.usage': '111',
|
||||
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_period_us': '222',
|
||||
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.cfs_quota_us': '333',
|
||||
'/sys/fs/cgroup/cpu/xxcustomcpuxx/cpu.stat': `
|
||||
nr_periods 444
|
||||
nr_throttled 555
|
||||
throttled_time 666
|
||||
`,
|
||||
});
|
||||
|
||||
const collector = new OsCgroupMetricsCollector({
|
||||
cpuAcctPath: 'xxcustomcpuacctxx',
|
||||
cpuPath: 'xxcustomcpuxx',
|
||||
});
|
||||
expect(await collector.collect()).toMatchInlineSnapshot(`
|
||||
Object {
|
||||
"cpu": Object {
|
||||
"cfs_period_micros": 222,
|
||||
"cfs_quota_micros": 333,
|
||||
"control_group": "xxcustomcpuxx",
|
||||
"stat": Object {
|
||||
"number_of_elapsed_periods": 444,
|
||||
"number_of_times_throttled": 555,
|
||||
"time_throttled_nanos": 666,
|
||||
},
|
||||
},
|
||||
"cpuacct": Object {
|
||||
"control_group": "xxcustomcpuacctxx",
|
||||
"usage_nanos": 111,
|
||||
},
|
||||
}
|
||||
`);
|
||||
});
|
||||
});
|
194
src/core/server/metrics/collectors/cgroup.ts
Normal file
194
src/core/server/metrics/collectors/cgroup.ts
Normal file
|
@ -0,0 +1,194 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch B.V. under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch B.V. licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import fs from 'fs';
|
||||
import { join as joinPath } from 'path';
|
||||
import { MetricsCollector, OpsOsMetrics } from './types';
|
||||
|
||||
type OsCgroupMetrics = Pick<OpsOsMetrics, 'cpu' | 'cpuacct'>;
|
||||
|
||||
interface OsCgroupMetricsCollectorOptions {
|
||||
cpuPath?: string;
|
||||
cpuAcctPath?: string;
|
||||
}
|
||||
|
||||
export class OsCgroupMetricsCollector implements MetricsCollector<OsCgroupMetrics> {
|
||||
/** Used to prevent unnecessary file reads on systems not using cgroups. */
|
||||
private noCgroupPresent = false;
|
||||
private cpuPath?: string;
|
||||
private cpuAcctPath?: string;
|
||||
|
||||
constructor(private readonly options: OsCgroupMetricsCollectorOptions) {}
|
||||
|
||||
public async collect(): Promise<OsCgroupMetrics> {
|
||||
try {
|
||||
await this.initializePaths();
|
||||
if (this.noCgroupPresent || !this.cpuAcctPath || !this.cpuPath) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const [cpuAcctUsage, cpuFsPeriod, cpuFsQuota, cpuStat] = await Promise.all([
|
||||
readCPUAcctUsage(this.cpuAcctPath),
|
||||
readCPUFsPeriod(this.cpuPath),
|
||||
readCPUFsQuota(this.cpuPath),
|
||||
readCPUStat(this.cpuPath),
|
||||
]);
|
||||
|
||||
return {
|
||||
cpuacct: {
|
||||
control_group: this.cpuAcctPath,
|
||||
usage_nanos: cpuAcctUsage,
|
||||
},
|
||||
|
||||
cpu: {
|
||||
control_group: this.cpuPath,
|
||||
cfs_period_micros: cpuFsPeriod,
|
||||
cfs_quota_micros: cpuFsQuota,
|
||||
stat: cpuStat,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT') {
|
||||
this.noCgroupPresent = true;
|
||||
return {};
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public reset() {}
|
||||
|
||||
private async initializePaths() {
|
||||
// Perform this setup lazily on the first collect call and then memoize the results.
|
||||
// Makes the assumption this data doesn't change while the process is running.
|
||||
if (this.cpuPath && this.cpuAcctPath) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Only read the file if both options are undefined.
|
||||
if (!this.options.cpuPath || !this.options.cpuAcctPath) {
|
||||
const cgroups = await readControlGroups();
|
||||
this.cpuPath = this.options.cpuPath || cgroups[GROUP_CPU];
|
||||
this.cpuAcctPath = this.options.cpuAcctPath || cgroups[GROUP_CPUACCT];
|
||||
} else {
|
||||
this.cpuPath = this.options.cpuPath;
|
||||
this.cpuAcctPath = this.options.cpuAcctPath;
|
||||
}
|
||||
|
||||
// prevents undefined cgroup paths
|
||||
if (!this.cpuPath || !this.cpuAcctPath) {
|
||||
this.noCgroupPresent = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CONTROL_GROUP_RE = new RegExp('\\d+:([^:]+):(/.*)');
|
||||
const CONTROLLER_SEPARATOR_RE = ',';
|
||||
|
||||
const PROC_SELF_CGROUP_FILE = '/proc/self/cgroup';
|
||||
const PROC_CGROUP_CPU_DIR = '/sys/fs/cgroup/cpu';
|
||||
const PROC_CGROUP_CPUACCT_DIR = '/sys/fs/cgroup/cpuacct';
|
||||
|
||||
const GROUP_CPUACCT = 'cpuacct';
|
||||
const CPUACCT_USAGE_FILE = 'cpuacct.usage';
|
||||
|
||||
const GROUP_CPU = 'cpu';
|
||||
const CPU_FS_PERIOD_US_FILE = 'cpu.cfs_period_us';
|
||||
const CPU_FS_QUOTA_US_FILE = 'cpu.cfs_quota_us';
|
||||
const CPU_STATS_FILE = 'cpu.stat';
|
||||
|
||||
async function readControlGroups() {
|
||||
const data = await fs.promises.readFile(PROC_SELF_CGROUP_FILE);
|
||||
|
||||
return data
|
||||
.toString()
|
||||
.split(/\n/)
|
||||
.reduce((acc, line) => {
|
||||
const matches = line.match(CONTROL_GROUP_RE);
|
||||
|
||||
if (matches !== null) {
|
||||
const controllers = matches[1].split(CONTROLLER_SEPARATOR_RE);
|
||||
controllers.forEach((controller) => {
|
||||
acc[controller] = matches[2];
|
||||
});
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, {} as Record<string, string>);
|
||||
}
|
||||
|
||||
async function fileContentsToInteger(path: string) {
|
||||
const data = await fs.promises.readFile(path);
|
||||
return parseInt(data.toString(), 10);
|
||||
}
|
||||
|
||||
function readCPUAcctUsage(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPUACCT_DIR, controlGroup, CPUACCT_USAGE_FILE));
|
||||
}
|
||||
|
||||
function readCPUFsPeriod(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_PERIOD_US_FILE));
|
||||
}
|
||||
|
||||
function readCPUFsQuota(controlGroup: string) {
|
||||
return fileContentsToInteger(joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_FS_QUOTA_US_FILE));
|
||||
}
|
||||
|
||||
async function readCPUStat(controlGroup: string) {
|
||||
const stat = {
|
||||
number_of_elapsed_periods: -1,
|
||||
number_of_times_throttled: -1,
|
||||
time_throttled_nanos: -1,
|
||||
};
|
||||
|
||||
try {
|
||||
const data = await fs.promises.readFile(
|
||||
joinPath(PROC_CGROUP_CPU_DIR, controlGroup, CPU_STATS_FILE)
|
||||
);
|
||||
return data
|
||||
.toString()
|
||||
.split(/\n/)
|
||||
.reduce((acc, line) => {
|
||||
const fields = line.split(/\s+/);
|
||||
|
||||
switch (fields[0]) {
|
||||
case 'nr_periods':
|
||||
acc.number_of_elapsed_periods = parseInt(fields[1], 10);
|
||||
break;
|
||||
|
||||
case 'nr_throttled':
|
||||
acc.number_of_times_throttled = parseInt(fields[1], 10);
|
||||
break;
|
||||
|
||||
case 'throttled_time':
|
||||
acc.time_throttled_nanos = parseInt(fields[1], 10);
|
||||
break;
|
||||
}
|
||||
|
||||
return acc;
|
||||
}, stat);
|
||||
} catch (err) {
|
||||
if (err.code === 'ENOENT') {
|
||||
return stat;
|
||||
}
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
33
src/core/server/metrics/collectors/collector.mock.ts
Normal file
33
src/core/server/metrics/collectors/collector.mock.ts
Normal file
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch B.V. under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch B.V. licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import { MetricsCollector } from './types';
|
||||
|
||||
const createCollector = (collectReturnValue: any = {}): jest.Mocked<MetricsCollector<any>> => {
|
||||
const collector: jest.Mocked<MetricsCollector<any>> = {
|
||||
collect: jest.fn().mockResolvedValue(collectReturnValue),
|
||||
reset: jest.fn(),
|
||||
};
|
||||
|
||||
return collector;
|
||||
};
|
||||
|
||||
export const metricsCollectorMock = {
|
||||
create: createCollector,
|
||||
};
|
|
@ -18,6 +18,6 @@
|
|||
*/
|
||||
|
||||
export { OpsProcessMetrics, OpsOsMetrics, OpsServerMetrics, MetricsCollector } from './types';
|
||||
export { OsMetricsCollector } from './os';
|
||||
export { OsMetricsCollector, OpsMetricsCollectorOptions } from './os';
|
||||
export { ProcessMetricsCollector } from './process';
|
||||
export { ServerMetricsCollector } from './server';
|
||||
|
|
25
src/core/server/metrics/collectors/os.test.mocks.ts
Normal file
25
src/core/server/metrics/collectors/os.test.mocks.ts
Normal file
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch B.V. under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch B.V. licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import { metricsCollectorMock } from './collector.mock';
|
||||
|
||||
export const cgroupCollectorMock = metricsCollectorMock.create();
|
||||
jest.doMock('./cgroup', () => ({
|
||||
OsCgroupMetricsCollector: jest.fn(() => cgroupCollectorMock),
|
||||
}));
|
|
@ -20,6 +20,7 @@
|
|||
jest.mock('getos', () => (cb: Function) => cb(null, { dist: 'distrib', release: 'release' }));
|
||||
|
||||
import os from 'os';
|
||||
import { cgroupCollectorMock } from './os.test.mocks';
|
||||
import { OsMetricsCollector } from './os';
|
||||
|
||||
describe('OsMetricsCollector', () => {
|
||||
|
@ -27,6 +28,8 @@ describe('OsMetricsCollector', () => {
|
|||
|
||||
beforeEach(() => {
|
||||
collector = new OsMetricsCollector();
|
||||
cgroupCollectorMock.collect.mockReset();
|
||||
cgroupCollectorMock.reset.mockReset();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
|
@ -96,4 +99,9 @@ describe('OsMetricsCollector', () => {
|
|||
'15m': fifteenMinLoad,
|
||||
});
|
||||
});
|
||||
|
||||
it('calls the cgroup sub-collector', async () => {
|
||||
await collector.collect();
|
||||
expect(cgroupCollectorMock.collect).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
|
|
@ -21,10 +21,22 @@ import os from 'os';
|
|||
import getosAsync, { LinuxOs } from 'getos';
|
||||
import { promisify } from 'util';
|
||||
import { OpsOsMetrics, MetricsCollector } from './types';
|
||||
import { OsCgroupMetricsCollector } from './cgroup';
|
||||
|
||||
const getos = promisify(getosAsync);
|
||||
|
||||
export interface OpsMetricsCollectorOptions {
|
||||
cpuPath?: string;
|
||||
cpuAcctPath?: string;
|
||||
}
|
||||
|
||||
export class OsMetricsCollector implements MetricsCollector<OpsOsMetrics> {
|
||||
private readonly cgroupCollector: OsCgroupMetricsCollector;
|
||||
|
||||
constructor(options: OpsMetricsCollectorOptions = {}) {
|
||||
this.cgroupCollector = new OsCgroupMetricsCollector(options);
|
||||
}
|
||||
|
||||
public async collect(): Promise<OpsOsMetrics> {
|
||||
const platform = os.platform();
|
||||
const load = os.loadavg();
|
||||
|
@ -43,20 +55,30 @@ export class OsMetricsCollector implements MetricsCollector<OpsOsMetrics> {
|
|||
used_in_bytes: os.totalmem() - os.freemem(),
|
||||
},
|
||||
uptime_in_millis: os.uptime() * 1000,
|
||||
...(await this.getDistroStats(platform)),
|
||||
...(await this.cgroupCollector.collect()),
|
||||
};
|
||||
|
||||
if (platform === 'linux') {
|
||||
try {
|
||||
const distro = (await getos()) as LinuxOs;
|
||||
metrics.distro = distro.dist;
|
||||
metrics.distroRelease = `${distro.dist}-${distro.release}`;
|
||||
} catch (e) {
|
||||
// ignore errors
|
||||
}
|
||||
}
|
||||
|
||||
return metrics;
|
||||
}
|
||||
|
||||
public reset() {}
|
||||
|
||||
private async getDistroStats(
|
||||
platform: string
|
||||
): Promise<Pick<OpsOsMetrics, 'distro' | 'distroRelease'>> {
|
||||
if (platform === 'linux') {
|
||||
try {
|
||||
const distro = (await getos()) as LinuxOs;
|
||||
return {
|
||||
distro: distro.dist,
|
||||
distroRelease: `${distro.dist}-${distro.release}`,
|
||||
};
|
||||
} catch (e) {
|
||||
// ignore errors
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,6 +85,33 @@ export interface OpsOsMetrics {
|
|||
};
|
||||
/** the OS uptime */
|
||||
uptime_in_millis: number;
|
||||
|
||||
/** cpu accounting metrics, undefined when not running in a cgroup */
|
||||
cpuacct?: {
|
||||
/** name of this process's cgroup */
|
||||
control_group: string;
|
||||
/** cpu time used by this process's cgroup */
|
||||
usage_nanos: number;
|
||||
};
|
||||
|
||||
/** cpu cgroup metrics, undefined when not running in a cgroup */
|
||||
cpu?: {
|
||||
/** name of this process's cgroup */
|
||||
control_group: string;
|
||||
/** the length of the cfs period */
|
||||
cfs_period_micros: number;
|
||||
/** total available run-time within a cfs period */
|
||||
cfs_quota_micros: number;
|
||||
/** current stats on the cfs periods */
|
||||
stat: {
|
||||
/** number of cfs periods that elapsed */
|
||||
number_of_elapsed_periods: number;
|
||||
/** number of times the cgroup has been throttled */
|
||||
number_of_times_throttled: number;
|
||||
/** total amount of time the cgroup has been throttled for */
|
||||
time_throttled_nanos: number;
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,20 +21,18 @@ import { MetricsService } from './metrics_service';
|
|||
import {
|
||||
InternalMetricsServiceSetup,
|
||||
InternalMetricsServiceStart,
|
||||
MetricsServiceSetup,
|
||||
MetricsServiceStart,
|
||||
} from './types';
|
||||
|
||||
const createInternalSetupContractMock = () => {
|
||||
const setupContract: jest.Mocked<InternalMetricsServiceSetup> = {};
|
||||
return setupContract;
|
||||
};
|
||||
|
||||
const createStartContractMock = () => {
|
||||
const startContract: jest.Mocked<MetricsServiceStart> = {
|
||||
const setupContract: jest.Mocked<InternalMetricsServiceSetup> = {
|
||||
collectionInterval: 30000,
|
||||
getOpsMetrics$: jest.fn(),
|
||||
};
|
||||
startContract.getOpsMetrics$.mockReturnValue(
|
||||
setupContract.getOpsMetrics$.mockReturnValue(
|
||||
new BehaviorSubject({
|
||||
collected_at: new Date('2020-01-01 01:00:00'),
|
||||
process: {
|
||||
memory: {
|
||||
heap: { total_in_bytes: 1, used_in_bytes: 1, size_limit: 1 },
|
||||
|
@ -56,11 +54,21 @@ const createStartContractMock = () => {
|
|||
concurrent_connections: 1,
|
||||
})
|
||||
);
|
||||
return setupContract;
|
||||
};
|
||||
|
||||
const createSetupContractMock = () => {
|
||||
const startContract: jest.Mocked<MetricsServiceSetup> = createInternalSetupContractMock();
|
||||
return startContract;
|
||||
};
|
||||
|
||||
const createInternalStartContractMock = () => {
|
||||
const startContract: jest.Mocked<InternalMetricsServiceStart> = createStartContractMock();
|
||||
const startContract: jest.Mocked<InternalMetricsServiceStart> = createInternalSetupContractMock();
|
||||
return startContract;
|
||||
};
|
||||
|
||||
const createStartContractMock = () => {
|
||||
const startContract: jest.Mocked<MetricsServiceStart> = createInternalSetupContractMock();
|
||||
return startContract;
|
||||
};
|
||||
|
||||
|
@ -77,7 +85,7 @@ const createMock = () => {
|
|||
|
||||
export const metricsServiceMock = {
|
||||
create: createMock,
|
||||
createSetupContract: createStartContractMock,
|
||||
createSetupContract: createSetupContractMock,
|
||||
createStartContract: createStartContractMock,
|
||||
createInternalSetupContract: createInternalSetupContractMock,
|
||||
createInternalStartContract: createInternalStartContractMock,
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
import { Subject } from 'rxjs';
|
||||
import { ReplaySubject } from 'rxjs';
|
||||
import { first } from 'rxjs/operators';
|
||||
import { CoreService } from '../../types';
|
||||
import { CoreContext } from '../core_context';
|
||||
|
@ -37,26 +37,21 @@ export class MetricsService
|
|||
private readonly logger: Logger;
|
||||
private metricsCollector?: OpsMetricsCollector;
|
||||
private collectInterval?: NodeJS.Timeout;
|
||||
private metrics$ = new Subject<OpsMetrics>();
|
||||
private metrics$ = new ReplaySubject<OpsMetrics>();
|
||||
private service?: InternalMetricsServiceSetup;
|
||||
|
||||
constructor(private readonly coreContext: CoreContext) {
|
||||
this.logger = coreContext.logger.get('metrics');
|
||||
}
|
||||
|
||||
public async setup({ http }: MetricsServiceSetupDeps): Promise<InternalMetricsServiceSetup> {
|
||||
this.metricsCollector = new OpsMetricsCollector(http.server);
|
||||
return {};
|
||||
}
|
||||
|
||||
public async start(): Promise<InternalMetricsServiceStart> {
|
||||
if (!this.metricsCollector) {
|
||||
throw new Error('#setup() needs to be run first');
|
||||
}
|
||||
const config = await this.coreContext.configService
|
||||
.atPath<OpsConfigType>(opsConfig.path)
|
||||
.pipe(first())
|
||||
.toPromise();
|
||||
|
||||
this.metricsCollector = new OpsMetricsCollector(http.server, config.cGroupOverrides);
|
||||
|
||||
await this.refreshMetrics();
|
||||
|
||||
this.collectInterval = setInterval(() => {
|
||||
|
@ -65,9 +60,20 @@ export class MetricsService
|
|||
|
||||
const metricsObservable = this.metrics$.asObservable();
|
||||
|
||||
return {
|
||||
this.service = {
|
||||
collectionInterval: config.interval.asMilliseconds(),
|
||||
getOpsMetrics$: () => metricsObservable,
|
||||
};
|
||||
|
||||
return this.service;
|
||||
}
|
||||
|
||||
public async start(): Promise<InternalMetricsServiceStart> {
|
||||
if (!this.service) {
|
||||
throw new Error('#setup() needs to be run first');
|
||||
}
|
||||
|
||||
return this.service;
|
||||
}
|
||||
|
||||
private async refreshMetrics() {
|
||||
|
|
|
@ -23,6 +23,10 @@ export const opsConfig = {
|
|||
path: 'ops',
|
||||
schema: schema.object({
|
||||
interval: schema.duration({ defaultValue: '5s' }),
|
||||
cGroupOverrides: schema.object({
|
||||
cpuPath: schema.maybe(schema.string()),
|
||||
cpuAcctPath: schema.maybe(schema.string()),
|
||||
}),
|
||||
}),
|
||||
};
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ describe('OpsMetricsCollector', () => {
|
|||
|
||||
beforeEach(() => {
|
||||
const hapiServer = httpServiceMock.createInternalSetupContract().server;
|
||||
collector = new OpsMetricsCollector(hapiServer);
|
||||
collector = new OpsMetricsCollector(hapiServer, {});
|
||||
|
||||
mockOsCollector.collect.mockResolvedValue('osMetrics');
|
||||
});
|
||||
|
@ -51,6 +51,7 @@ describe('OpsMetricsCollector', () => {
|
|||
expect(mockServerCollector.collect).toHaveBeenCalledTimes(1);
|
||||
|
||||
expect(metrics).toEqual({
|
||||
collected_at: expect.any(Date),
|
||||
process: 'processMetrics',
|
||||
os: 'osMetrics',
|
||||
requests: 'serverRequestsMetrics',
|
||||
|
|
|
@ -21,6 +21,7 @@ import { Server as HapiServer } from 'hapi';
|
|||
import {
|
||||
ProcessMetricsCollector,
|
||||
OsMetricsCollector,
|
||||
OpsMetricsCollectorOptions,
|
||||
ServerMetricsCollector,
|
||||
MetricsCollector,
|
||||
} from './collectors';
|
||||
|
@ -31,9 +32,9 @@ export class OpsMetricsCollector implements MetricsCollector<OpsMetrics> {
|
|||
private readonly osCollector: OsMetricsCollector;
|
||||
private readonly serverCollector: ServerMetricsCollector;
|
||||
|
||||
constructor(server: HapiServer) {
|
||||
constructor(server: HapiServer, opsOptions: OpsMetricsCollectorOptions) {
|
||||
this.processCollector = new ProcessMetricsCollector();
|
||||
this.osCollector = new OsMetricsCollector();
|
||||
this.osCollector = new OsMetricsCollector(opsOptions);
|
||||
this.serverCollector = new ServerMetricsCollector(server);
|
||||
}
|
||||
|
||||
|
@ -44,6 +45,7 @@ export class OpsMetricsCollector implements MetricsCollector<OpsMetrics> {
|
|||
this.serverCollector.collect(),
|
||||
]);
|
||||
return {
|
||||
collected_at: new Date(),
|
||||
process,
|
||||
os,
|
||||
...server,
|
||||
|
|
|
@ -20,14 +20,15 @@
|
|||
import { Observable } from 'rxjs';
|
||||
import { OpsProcessMetrics, OpsOsMetrics, OpsServerMetrics } from './collectors';
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-empty-interface
|
||||
export interface MetricsServiceSetup {}
|
||||
/**
|
||||
* APIs to retrieves metrics gathered and exposed by the core platform.
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export interface MetricsServiceStart {
|
||||
export interface MetricsServiceSetup {
|
||||
/** Interval metrics are collected in milliseconds */
|
||||
readonly collectionInterval: number;
|
||||
|
||||
/**
|
||||
* Retrieve an observable emitting the {@link OpsMetrics} gathered.
|
||||
* The observable will emit an initial value during core's `start` phase, and a new value every fixed interval of time,
|
||||
|
@ -42,6 +43,12 @@ export interface MetricsServiceStart {
|
|||
*/
|
||||
getOpsMetrics$: () => Observable<OpsMetrics>;
|
||||
}
|
||||
/**
|
||||
* {@inheritdoc MetricsServiceSetup}
|
||||
*
|
||||
* @public
|
||||
*/
|
||||
export type MetricsServiceStart = MetricsServiceSetup;
|
||||
|
||||
export type InternalMetricsServiceSetup = MetricsServiceSetup;
|
||||
export type InternalMetricsServiceStart = MetricsServiceStart;
|
||||
|
@ -53,6 +60,8 @@ export type InternalMetricsServiceStart = MetricsServiceStart;
|
|||
* @public
|
||||
*/
|
||||
export interface OpsMetrics {
|
||||
/** Time metrics were recorded at. */
|
||||
collected_at: Date;
|
||||
/** Process related metrics */
|
||||
process: OpsProcessMetrics;
|
||||
/** OS related metrics */
|
||||
|
|
|
@ -233,6 +233,7 @@ export function createPluginStartContext<TPlugin, TPluginDependencies>(
|
|||
getTypeRegistry: deps.savedObjects.getTypeRegistry,
|
||||
},
|
||||
metrics: {
|
||||
collectionInterval: deps.metrics.collectionInterval,
|
||||
getOpsMetrics$: deps.metrics.getOpsMetrics$,
|
||||
},
|
||||
uiSettings: {
|
||||
|
|
|
@ -1531,10 +1531,10 @@ export interface LogRecord {
|
|||
timestamp: Date;
|
||||
}
|
||||
|
||||
// Warning: (ae-missing-release-tag) "MetricsServiceSetup" is exported by the package, but it is missing a release tag (@alpha, @beta, @public, or @internal)
|
||||
//
|
||||
// @public (undocumented)
|
||||
// @public
|
||||
export interface MetricsServiceSetup {
|
||||
readonly collectionInterval: number;
|
||||
getOpsMetrics$: () => Observable<OpsMetrics>;
|
||||
}
|
||||
|
||||
// @public @deprecated (undocumented)
|
||||
|
@ -1621,6 +1621,7 @@ export interface OnPreRoutingToolkit {
|
|||
|
||||
// @public
|
||||
export interface OpsMetrics {
|
||||
collected_at: Date;
|
||||
concurrent_connections: OpsServerMetrics['concurrent_connections'];
|
||||
os: OpsOsMetrics;
|
||||
process: OpsProcessMetrics;
|
||||
|
@ -1630,6 +1631,20 @@ export interface OpsMetrics {
|
|||
|
||||
// @public
|
||||
export interface OpsOsMetrics {
|
||||
cpu?: {
|
||||
control_group: string;
|
||||
cfs_period_micros: number;
|
||||
cfs_quota_micros: number;
|
||||
stat: {
|
||||
number_of_elapsed_periods: number;
|
||||
number_of_times_throttled: number;
|
||||
time_throttled_nanos: number;
|
||||
};
|
||||
};
|
||||
cpuacct?: {
|
||||
control_group: string;
|
||||
usage_nanos: number;
|
||||
};
|
||||
distro?: string;
|
||||
distroRelease?: string;
|
||||
load: {
|
||||
|
|
|
@ -39,6 +39,7 @@ describe('telemetry_ops_stats', () => {
|
|||
const callCluster = jest.fn();
|
||||
|
||||
const metric: OpsMetrics = {
|
||||
collected_at: new Date('2020-01-01 01:00:00'),
|
||||
process: {
|
||||
memory: {
|
||||
heap: {
|
||||
|
|
|
@ -18,13 +18,13 @@
|
|||
*/
|
||||
|
||||
import { Observable } from 'rxjs';
|
||||
import { cloneDeep } from 'lodash';
|
||||
import { cloneDeep, omit } from 'lodash';
|
||||
import moment from 'moment';
|
||||
import { OpsMetrics } from 'kibana/server';
|
||||
import { UsageCollectionSetup } from 'src/plugins/usage_collection/server';
|
||||
import { KIBANA_STATS_TYPE } from '../../../common/constants';
|
||||
|
||||
interface OpsStatsMetrics extends Omit<OpsMetrics, 'response_times'> {
|
||||
interface OpsStatsMetrics extends Omit<OpsMetrics, 'response_times' | 'collected_at'> {
|
||||
timestamp: string;
|
||||
response_times: {
|
||||
average: number;
|
||||
|
@ -52,9 +52,9 @@ export function getOpsStatsCollector(
|
|||
// @ts-expect-error
|
||||
delete metrics.requests.statusCodes;
|
||||
lastMetrics = {
|
||||
...metrics,
|
||||
...omit(metrics, ['collected_at']),
|
||||
response_times: responseTimes,
|
||||
timestamp: moment.utc().toISOString(),
|
||||
timestamp: moment.utc(metrics.collected_at).toISOString(),
|
||||
};
|
||||
});
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue