mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 01:13:23 -04:00
[Stack Monitoring] fix ccr read_exceptions alert (#153888)
## Summary Closes https://github.com/elastic/kibana/issues/153298 ## Testing - Setup CCR ([see how to](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#multi-cluster-tests-for-ccrccs-or-listing). I used two cloud clusters, happy to provide credentials to reviewers to avoid the setup) - Setup CCR between two clusters, create a follower indice and replicate some data - Intentionally break the remote cluster connection on the follower cluster (update the cluster endpoint). At this point read_exceptions should appear in ccr documents - Start local stack - local elasticsearch should have [this change](https://github.com/elastic/elasticsearch/pull/94875). See [howto run component from source](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#single-cluster-testing) - Enable stack monitoring default rules - Start metricbeat collection of the follower cluster - Ensure ccr alert triggers with metricbeat 7.x (this won't work against cloud cluster because the license is not supported in 7.x but support was added later, I'll see if we can [backport this change](https://github.com/elastic/beats/pull/34105)) - Ensure ccr alert triggers with metricbeat 8.x (build should include [this change](https://github.com/elastic/beats/pull/34957))
This commit is contained in:
parent
ed56403817
commit
d1e5dbc5c9
2 changed files with 209 additions and 6 deletions
|
@ -29,12 +29,14 @@ describe('fetchCCReadExceptions', () => {
|
|||
},
|
||||
},
|
||||
};
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
esRes
|
||||
);
|
||||
|
||||
it('should call ES with correct query', async () => {
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
esRes
|
||||
);
|
||||
|
||||
await fetchCCRReadExceptions(esClient, 1643306331418, 1643309869056, 10000);
|
||||
expect(esClient.search).toHaveBeenCalledWith({
|
||||
index:
|
||||
|
@ -125,7 +127,13 @@ describe('fetchCCReadExceptions', () => {
|
|||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should call ES with correct query when ccs disabled', async () => {
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
esRes
|
||||
);
|
||||
// @ts-ignore
|
||||
Globals.app.config.ui.ccs.enabled = false;
|
||||
let params = null;
|
||||
|
@ -139,4 +147,199 @@ describe('fetchCCReadExceptions', () => {
|
|||
// @ts-ignore
|
||||
expect(params.index).toBe('.monitoring-es-*,metrics-elasticsearch.stack_monitoring.ccr-*');
|
||||
});
|
||||
|
||||
it('should return read exceptions from legacy documents', async () => {
|
||||
const legacyRes = {
|
||||
aggregations: {
|
||||
remote_clusters: {
|
||||
buckets: [
|
||||
{
|
||||
key: 'secondary',
|
||||
doc_count: 21,
|
||||
follower_indices: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{
|
||||
key: 'foobar_follower',
|
||||
doc_count: 21,
|
||||
hits: {
|
||||
hits: {
|
||||
total: { value: 21, relation: 'eq' },
|
||||
max_score: null,
|
||||
hits: [
|
||||
{
|
||||
_index: '.monitoring-es-7-mb-2023.03.30',
|
||||
_id: '0YmUM4cBxRuN6VWqFo3H',
|
||||
_score: null,
|
||||
_source: {
|
||||
ccr_stats: {
|
||||
shard_id: 0,
|
||||
read_exceptions: [
|
||||
{
|
||||
retries: 1,
|
||||
exception: {
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason:
|
||||
'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
type: 'exception',
|
||||
},
|
||||
from_seq_no: 28,
|
||||
},
|
||||
],
|
||||
leader_index: 'foobar',
|
||||
},
|
||||
cluster_uuid: 'jRHXRb4pSnySw_JEBv_dHg',
|
||||
},
|
||||
sort: [1680197555160],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
legacyRes
|
||||
);
|
||||
|
||||
const result = await fetchCCRReadExceptions(esClient, 1643306331418, 1643309869056, 10000);
|
||||
expect(result).toStrictEqual([
|
||||
{
|
||||
clusterUuid: 'jRHXRb4pSnySw_JEBv_dHg',
|
||||
remoteCluster: 'secondary',
|
||||
followerIndex: 'foobar_follower',
|
||||
leaderIndex: 'foobar',
|
||||
shardId: 0,
|
||||
lastReadException: {
|
||||
type: 'exception',
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
},
|
||||
ccs: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return read exceptions from ecs documents', async () => {
|
||||
const ecsRes = {
|
||||
aggregations: {
|
||||
remote_clusters: {
|
||||
buckets: [
|
||||
{
|
||||
key: 'secondary',
|
||||
doc_count: 44,
|
||||
follower_indices: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{
|
||||
key: 'foobar_follower',
|
||||
doc_count: 44,
|
||||
hits: {
|
||||
hits: {
|
||||
total: { value: 44, relation: 'eq' },
|
||||
max_score: null,
|
||||
hits: [
|
||||
{
|
||||
_index: '.ds-.monitoring-es-8-mb-2023.03.30-000001',
|
||||
_id: '6YmAM4cBxRuN6VWqx4Sg',
|
||||
_score: null,
|
||||
_source: {
|
||||
elasticsearch: {
|
||||
cluster: { id: 'jRHXRb4pSnySw_JEBv_dHg' },
|
||||
ccr: {
|
||||
read_exceptions: [
|
||||
{
|
||||
from_seq_no: 28,
|
||||
retries: 1,
|
||||
exception: {
|
||||
type: 'exception',
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason:
|
||||
'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
leader: { index: 'foobar' },
|
||||
},
|
||||
},
|
||||
},
|
||||
sort: [1680196289074],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
ecsRes
|
||||
);
|
||||
|
||||
const result = await fetchCCRReadExceptions(esClient, 1643306331418, 1643309869056, 10000);
|
||||
expect(result).toStrictEqual([
|
||||
{
|
||||
clusterUuid: 'jRHXRb4pSnySw_JEBv_dHg',
|
||||
remoteCluster: 'secondary',
|
||||
followerIndex: 'foobar_follower',
|
||||
leaderIndex: 'foobar',
|
||||
shardId: undefined,
|
||||
lastReadException: {
|
||||
type: 'exception',
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
},
|
||||
ccs: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -162,7 +162,7 @@ export async function fetchCCRReadExceptions(
|
|||
|
||||
const { read_exceptions: readExceptions, shard_id: shardId } = ccrStats;
|
||||
|
||||
const leaderIndex = ccrStats.leaderIndex || ccrStats.leader.index;
|
||||
const leaderIndex = ccrStats.leader_index || ccrStats.leader.index;
|
||||
|
||||
const { exception: lastReadException } = readExceptions[readExceptions.length - 1];
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue