mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
# Backport This will backport the following commits from `main` to `8.6`: - [[Fleet] Add Agent logs panic messages from last hour to telemetry (#149825)](https://github.com/elastic/kibana/pull/149825) <!--- Backport version: 8.9.7 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Mark Hopkin","email":"mark.hopkin@elastic.co"},"sourceCommit":{"committedDate":"2023-01-30T17:20:08Z","message":"[Fleet] Add Agent logs panic messages from last hour to telemetry (#149825)\n\n## Summary\r\n\r\nCloses https://github.com/elastic/ingest-dev/issues/1486\r\n\r\nAdd `agent_logs_panics_last_hour` telemetry field which contains the\r\nmessage and timestamp of all log messages containing the word panic that\r\nocurred in the last hour.\r\n\r\nCapped at 100 messages.","sha":"b9a999f7f831ce968fd57823ce63fbdcef85402d","branchLabelMapping":{"^v8.7.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","backport:skip","Team:Fleet","v8.7.0"],"number":149825,"url":"https://github.com/elastic/kibana/pull/149825","mergeCommit":{"message":"[Fleet] Add Agent logs panic messages from last hour to telemetry (#149825)\n\n## Summary\r\n\r\nCloses https://github.com/elastic/ingest-dev/issues/1486\r\n\r\nAdd `agent_logs_panics_last_hour` telemetry field which contains the\r\nmessage and timestamp of all log messages containing the word panic that\r\nocurred in the last hour.\r\n\r\nCapped at 100 messages.","sha":"b9a999f7f831ce968fd57823ce63fbdcef85402d"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v8.7.0","labelRegex":"^v8.7.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/149825","number":149825,"mergeCommit":{"message":"[Fleet] Add Agent logs panic messages from last hour to telemetry (#149825)\n\n## Summary\r\n\r\nCloses https://github.com/elastic/ingest-dev/issues/1486\r\n\r\nAdd `agent_logs_panics_last_hour` telemetry field which contains the\r\nmessage and timestamp of all log messages containing the word panic that\r\nocurred in the last hour.\r\n\r\nCapped at 100 messages.","sha":"b9a999f7f831ce968fd57823ce63fbdcef85402d"}}]}] BACKPORT-->
This commit is contained in:
parent
5d8aa894ff
commit
daf9cfe5a8
5 changed files with 142 additions and 2 deletions
65
x-pack/plugins/fleet/server/collectors/agent_logs_panics.ts
Normal file
65
x-pack/plugins/fleet/server/collectors/agent_logs_panics.ts
Normal file
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
|
||||
|
||||
const AGENT_LOGS_INDEX_PATTERN = 'logs-elastic_agent-*';
|
||||
const MAX_MESSAGE_COUNT = 100;
|
||||
|
||||
export interface AgentPanicLogsData {
|
||||
agent_logs_panics_last_hour: Array<{ message: string; timestamp: string }>;
|
||||
}
|
||||
|
||||
interface MaybeLogsDoc {
|
||||
message?: string;
|
||||
'@timestamp'?: string;
|
||||
}
|
||||
const DEFAULT_LOGS_DATA = {
|
||||
agent_logs_panics_last_hour: [],
|
||||
};
|
||||
|
||||
export async function getPanicLogsLastHour(
|
||||
esClient?: ElasticsearchClient
|
||||
): Promise<AgentPanicLogsData> {
|
||||
if (!esClient) {
|
||||
return DEFAULT_LOGS_DATA;
|
||||
}
|
||||
|
||||
const res = await esClient.search<MaybeLogsDoc>({
|
||||
index: AGENT_LOGS_INDEX_PATTERN,
|
||||
size: MAX_MESSAGE_COUNT,
|
||||
sort: [{ '@timestamp': 'desc' }],
|
||||
_source: ['message', '@timestamp'],
|
||||
query: {
|
||||
bool: {
|
||||
filter: [
|
||||
{
|
||||
range: {
|
||||
'@timestamp': {
|
||||
gte: 'now-1h',
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
match: {
|
||||
message: 'panic',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
const panicLogsLastHour = res.hits.hits.map((hit) => ({
|
||||
message: hit._source?.message || '',
|
||||
timestamp: hit._source?.['@timestamp'] || '',
|
||||
}));
|
||||
|
||||
return {
|
||||
agent_logs_panics_last_hour: panicLogsLastHour,
|
||||
};
|
||||
}
|
|
@ -19,6 +19,8 @@ import type { PackageUsage } from './package_collectors';
|
|||
import { getFleetServerUsage, getFleetServerConfig } from './fleet_server_collector';
|
||||
import type { FleetServerUsage } from './fleet_server_collector';
|
||||
import { getAgentPoliciesUsage } from './agent_policies';
|
||||
import type { AgentPanicLogsData } from './agent_logs_panics';
|
||||
import { getPanicLogsLastHour } from './agent_logs_panics';
|
||||
|
||||
export interface Usage {
|
||||
agents_enabled: boolean;
|
||||
|
@ -39,6 +41,7 @@ export interface FleetUsage extends Usage {
|
|||
degraded: number;
|
||||
};
|
||||
agents_per_policy: number[];
|
||||
agent_logs_panics_last_hour: AgentPanicLogsData['agent_logs_panics_last_hour'];
|
||||
agent_logs_top_errors?: string[];
|
||||
fleet_server_logs_top_errors?: string[];
|
||||
}
|
||||
|
@ -47,7 +50,7 @@ export const fetchFleetUsage = async (
|
|||
core: CoreSetup,
|
||||
config: FleetConfigType,
|
||||
abortController: AbortController
|
||||
) => {
|
||||
): Promise<FleetUsage | undefined> => {
|
||||
const [soClient, esClient] = await getInternalClients(core);
|
||||
if (!soClient || !esClient) {
|
||||
return;
|
||||
|
@ -60,6 +63,7 @@ export const fetchFleetUsage = async (
|
|||
...(await getAgentData(esClient, abortController)),
|
||||
fleet_server_config: await getFleetServerConfig(soClient),
|
||||
agent_policies: await getAgentPoliciesUsage(esClient, abortController),
|
||||
...(await getPanicLogsLastHour(esClient)),
|
||||
// TODO removed top errors telemetry as it causes this issue: https://github.com/elastic/kibana/issues/148976
|
||||
// ...(await getAgentLogsTopErrors(esClient)),
|
||||
};
|
||||
|
|
|
@ -176,7 +176,7 @@ describe('fleet usage telemetry', () => {
|
|||
|
||||
await esClient.create({
|
||||
index: 'logs-elastic_agent-default',
|
||||
id: 'log1',
|
||||
id: 'panic1',
|
||||
body: {
|
||||
log: {
|
||||
level: 'error',
|
||||
|
@ -187,6 +187,45 @@ describe('fleet usage telemetry', () => {
|
|||
refresh: 'wait_for',
|
||||
});
|
||||
|
||||
await esClient.create({
|
||||
index: 'logs-elastic_agent-default',
|
||||
id: 'panic2',
|
||||
body: {
|
||||
log: {
|
||||
level: 'error',
|
||||
},
|
||||
'@timestamp': new Date(Date.now() - 1000 * 60).toISOString(),
|
||||
message: 'stderr panic some other panic',
|
||||
},
|
||||
refresh: 'wait_for',
|
||||
});
|
||||
|
||||
await esClient.create({
|
||||
index: 'logs-elastic_agent-default',
|
||||
id: 'not-panic',
|
||||
body: {
|
||||
log: {
|
||||
level: 'error',
|
||||
},
|
||||
'@timestamp': new Date().toISOString(),
|
||||
message: 'this should not be included in metrics',
|
||||
},
|
||||
refresh: 'wait_for',
|
||||
});
|
||||
|
||||
await esClient.create({
|
||||
index: 'logs-elastic_agent-default',
|
||||
id: 'panic-outside-time-range',
|
||||
body: {
|
||||
log: {
|
||||
level: 'error',
|
||||
},
|
||||
'@timestamp': new Date(Date.now() - 2000 * 60 * 60).toISOString(),
|
||||
message: 'stderr panic this should not be included in metrics',
|
||||
},
|
||||
refresh: 'wait_for',
|
||||
});
|
||||
|
||||
await esClient.create({
|
||||
index: 'logs-elastic_agent.fleet_server-default',
|
||||
id: 'log2',
|
||||
|
@ -279,6 +318,16 @@ describe('fleet usage telemetry', () => {
|
|||
],
|
||||
},
|
||||
agent_policies: { count: 3, output_types: ['elasticsearch'] },
|
||||
agent_logs_panics_last_hour: [
|
||||
{
|
||||
timestamp: expect.any(String),
|
||||
message: 'stderr panic close of closed channel',
|
||||
},
|
||||
{
|
||||
timestamp: expect.any(String),
|
||||
message: 'stderr panic some other panic',
|
||||
},
|
||||
],
|
||||
// agent_logs_top_errors: ['stderr panic close of closed channel'],
|
||||
// fleet_server_logs_top_errors: ['failed to unenroll offline agents'],
|
||||
})
|
||||
|
|
|
@ -177,4 +177,26 @@ export const fleetUsagesSchema: RootSchema<any> = {
|
|||
},
|
||||
},
|
||||
},
|
||||
agent_logs_panics_last_hour: {
|
||||
type: 'array',
|
||||
_meta: {
|
||||
description: 'Array of log messages containing the word panic from the last hour',
|
||||
},
|
||||
items: {
|
||||
properties: {
|
||||
timestamp: {
|
||||
type: 'date',
|
||||
_meta: {
|
||||
description: 'Timestamp of the log message containing the word panic',
|
||||
},
|
||||
},
|
||||
message: {
|
||||
type: 'text',
|
||||
_meta: {
|
||||
description: 'Log message containing the word panic',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue