Allow task manager health stats to be logged as info messages (#144986)

In this PR, I'm adding a new setting
(`xpack.task_manager.monitored_stats_health_verbose_log.level`) that
allows the task manager monitoring stats to be verbosely logged at info
level instead of warning.

The two supported values are:
- debug (default)
- info

This will help debug SDHs on Cloud where we won't want to turn on debug
level on the entire cluster but would still like to see the task manager
monitored stats over time.

## Cloud allow-list PR

https://github.com/elastic/cloud/pull/109563

## To verify
1. Set the following two configuration options:
```
xpack.task_manager.monitored_stats_health_verbose_log.enabled: true
xpack.task_manager.monitored_stats_health_verbose_log.level: info
```
2. Startup Kibana
3. Notice `Latest Monitored Stats:` are logged at info level
4. Remove `xpack.task_manager.monitored_stats_health_verbose_log.level`
configuration
5. Add the following configuration
```
logging:
  loggers:
    - name: plugins.taskManager
      level: debug
```
6. Restart Kibana
7. Notice `Latest Monitored Stats:` are logged at debug level (as usual)

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Mike Côté 2022-11-14 09:32:41 -05:00 committed by GitHub
parent c4aca1fc65
commit e5b27b36bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 52 additions and 1 deletions

View file

@ -26,6 +26,7 @@ describe('config validation', () => {
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_health_verbose_log": Object {
"enabled": false,
"level": "debug",
"warn_delayed_task_start_in_seconds": 60,
},
"monitored_stats_required_freshness": 4000,
@ -76,6 +77,7 @@ describe('config validation', () => {
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_health_verbose_log": Object {
"enabled": false,
"level": "debug",
"warn_delayed_task_start_in_seconds": 60,
},
"monitored_stats_required_freshness": 4000,
@ -124,6 +126,7 @@ describe('config validation', () => {
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_health_verbose_log": Object {
"enabled": false,
"level": "debug",
"warn_delayed_task_start_in_seconds": 60,
},
"monitored_stats_required_freshness": 4000,

View file

@ -111,6 +111,9 @@ export const configSchema = schema.object(
}),
monitored_stats_health_verbose_log: schema.object({
enabled: schema.boolean({ defaultValue: false }),
level: schema.oneOf([schema.literal('debug'), schema.literal('info')], {
defaultValue: 'debug',
}),
/* The amount of seconds we allow a task to delay before printing a warning server log */
warn_delayed_task_start_in_seconds: schema.number({
defaultValue: DEFAULT_MONITORING_STATS_WARN_DELAYED_TASK_START_IN_SECONDS,

View file

@ -52,6 +52,7 @@ describe('EphemeralTaskLifecycle', () => {
monitored_stats_running_average_window: 50,
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
monitored_task_execution_thresholds: {

View file

@ -47,6 +47,7 @@ describe('managed configuration', () => {
monitored_aggregated_stats_refresh_rate: 60000,
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug' as const,
warn_delayed_task_start_in_seconds: 60,
},
monitored_stats_required_freshness: 4000,

View file

@ -31,6 +31,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -66,6 +67,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -88,6 +90,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -107,6 +110,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -120,11 +124,31 @@ describe('logHealthMetrics', () => {
expect(firstDebug).toMatchObject(health);
});
it('should log as info if status is OK and level is info', () => {
const logger = loggingSystemMock.create().get();
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'info',
warn_delayed_task_start_in_seconds: 60,
},
});
const health = getMockMonitoredHealth();
logHealthMetrics(health, logger, config, true);
const firstInfo = JSON.parse(
(logger as jest.Mocked<Logger>).info.mock.calls[0][0].replace('Latest Monitored Stats: ', '')
);
expect(firstInfo).toMatchObject(health);
});
it('should log as debug if status is OK even if not enabled', () => {
const logger = loggingSystemMock.create().get();
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -143,6 +167,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -168,6 +193,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -191,6 +217,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -234,6 +261,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -277,6 +305,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -301,6 +330,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});
@ -337,6 +367,7 @@ describe('logHealthMetrics', () => {
const config = getTaskManagerConfig({
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 60,
},
});

View file

@ -15,6 +15,7 @@ import { MonitoredHealth } from '../routes/health';
import { calculateHealthStatus } from './calculate_health_status';
enum LogLevel {
Info = 'info',
Warn = 'warn',
Error = 'error',
Debug = 'debug',
@ -30,7 +31,8 @@ export function logHealthMetrics(
config: TaskManagerConfig,
shouldRunTasks: boolean
) {
let logLevel: LogLevel = LogLevel.Debug;
let logLevel: LogLevel =
config.monitored_stats_health_verbose_log.level === 'info' ? LogLevel.Info : LogLevel.Debug;
const enabled = config.monitored_stats_health_verbose_log.enabled;
const healthWithoutCapacity: MonitoredHealth = {
...monitoredHealth,
@ -82,6 +84,9 @@ export function logHealthMetrics(
logLevel = LogLevel.Warn;
}
switch (logLevel) {
case LogLevel.Info:
logger.info(message);
break;
case LogLevel.Warn:
logger.warn(message);
break;

View file

@ -23,6 +23,7 @@ describe('Configuration Statistics Aggregator', () => {
monitored_aggregated_stats_refresh_rate: 5000,
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug' as const,
warn_delayed_task_start_in_seconds: 60,
},
monitored_stats_running_average_window: 50,

View file

@ -27,6 +27,7 @@ describe('createMonitoringStatsStream', () => {
monitored_aggregated_stats_refresh_rate: 5000,
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug' as const,
warn_delayed_task_start_in_seconds: 60,
},
monitored_stats_running_average_window: 50,

View file

@ -47,6 +47,7 @@ const pluginInitializerContextParams = {
monitored_aggregated_stats_refresh_rate: 5000,
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug' as const,
warn_delayed_task_start_in_seconds: 60,
},
monitored_stats_required_freshness: 5000,

View file

@ -48,6 +48,7 @@ describe('TaskPollingLifecycle', () => {
monitored_aggregated_stats_refresh_rate: 5000,
monitored_stats_health_verbose_log: {
enabled: false,
level: 'debug' as const,
warn_delayed_task_start_in_seconds: 60,
},
monitored_stats_required_freshness: 5000,

View file

@ -208,6 +208,7 @@ describe('healthRoute', () => {
monitored_stats_required_freshness: 1000,
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 100,
},
monitored_aggregated_stats_refresh_rate: 60000,
@ -267,6 +268,7 @@ describe('healthRoute', () => {
monitored_stats_required_freshness: 1000,
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 120,
},
monitored_aggregated_stats_refresh_rate: 60000,
@ -344,6 +346,7 @@ describe('healthRoute', () => {
monitored_stats_required_freshness: 1000,
monitored_stats_health_verbose_log: {
enabled: true,
level: 'debug',
warn_delayed_task_start_in_seconds: 120,
},
monitored_aggregated_stats_refresh_rate: 60000,