mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 17:59:23 -04:00
# Backport This will backport the following commits from `main` to `8.7`: - [[Stack Monitoring] fix ccr read_exceptions alert (#153888)](https://github.com/elastic/kibana/pull/153888) <!--- Backport version: 8.9.7 --> ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) <!--BACKPORT [{"author":{"name":"Kevin Lacabane","email":"kevin.lacabane@elastic.co"},"sourceCommit":{"committedDate":"2023-04-03T16:17:53Z","message":"[Stack Monitoring] fix ccr read_exceptions alert (#153888)\n\n## Summary\r\n\r\nCloses https://github.com/elastic/kibana/issues/153298\r\n\r\n## Testing\r\n- Setup CCR ([see how\r\nto](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#multi-cluster-tests-for-ccrccs-or-listing).\r\nI used two cloud clusters, happy to provide credentials to reviewers to\r\navoid the setup)\r\n- Setup CCR between two clusters, create a follower indice and replicate\r\nsome data\r\n- Intentionally break the remote cluster connection on the follower\r\ncluster (update the cluster endpoint). At this point read_exceptions\r\nshould appear in ccr documents\r\n- Start local stack\r\n- local elasticsearch should have [this\r\nchange](https://github.com/elastic/elasticsearch/pull/94875). See [howto\r\nrun component from\r\nsource](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#single-cluster-testing)\r\n- Enable stack monitoring default rules\r\n- Start metricbeat collection of the follower cluster\r\n- Ensure ccr alert triggers with metricbeat 7.x (this won't work against\r\ncloud cluster because the license is not supported in 7.x but support\r\nwas added later, I'll see if we can [backport this\r\nchange](https://github.com/elastic/beats/pull/34105))\r\n- Ensure ccr alert triggers with metricbeat 8.x (build should include\r\n[this change](https://github.com/elastic/beats/pull/34957))","sha":"d1e5dbc5c9421297d44cd11cd60d3e96cff4f481","branchLabelMapping":{"^v8.8.0$":"main","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:fix","Team:Infra Monitoring UI","Feature:Stack Monitoring","backport:prev-minor","v8.8.0"],"number":153888,"url":"https://github.com/elastic/kibana/pull/153888","mergeCommit":{"message":"[Stack Monitoring] fix ccr read_exceptions alert (#153888)\n\n## Summary\r\n\r\nCloses https://github.com/elastic/kibana/issues/153298\r\n\r\n## Testing\r\n- Setup CCR ([see how\r\nto](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#multi-cluster-tests-for-ccrccs-or-listing).\r\nI used two cloud clusters, happy to provide credentials to reviewers to\r\navoid the setup)\r\n- Setup CCR between two clusters, create a follower indice and replicate\r\nsome data\r\n- Intentionally break the remote cluster connection on the follower\r\ncluster (update the cluster endpoint). At this point read_exceptions\r\nshould appear in ccr documents\r\n- Start local stack\r\n- local elasticsearch should have [this\r\nchange](https://github.com/elastic/elasticsearch/pull/94875). See [howto\r\nrun component from\r\nsource](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#single-cluster-testing)\r\n- Enable stack monitoring default rules\r\n- Start metricbeat collection of the follower cluster\r\n- Ensure ccr alert triggers with metricbeat 7.x (this won't work against\r\ncloud cluster because the license is not supported in 7.x but support\r\nwas added later, I'll see if we can [backport this\r\nchange](https://github.com/elastic/beats/pull/34105))\r\n- Ensure ccr alert triggers with metricbeat 8.x (build should include\r\n[this change](https://github.com/elastic/beats/pull/34957))","sha":"d1e5dbc5c9421297d44cd11cd60d3e96cff4f481"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v8.8.0","labelRegex":"^v8.8.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/153888","number":153888,"mergeCommit":{"message":"[Stack Monitoring] fix ccr read_exceptions alert (#153888)\n\n## Summary\r\n\r\nCloses https://github.com/elastic/kibana/issues/153298\r\n\r\n## Testing\r\n- Setup CCR ([see how\r\nto](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#multi-cluster-tests-for-ccrccs-or-listing).\r\nI used two cloud clusters, happy to provide credentials to reviewers to\r\navoid the setup)\r\n- Setup CCR between two clusters, create a follower indice and replicate\r\nsome data\r\n- Intentionally break the remote cluster connection on the follower\r\ncluster (update the cluster endpoint). At this point read_exceptions\r\nshould appear in ccr documents\r\n- Start local stack\r\n- local elasticsearch should have [this\r\nchange](https://github.com/elastic/elasticsearch/pull/94875). See [howto\r\nrun component from\r\nsource](https://github.com/elastic/kibana/blob/main/x-pack/plugins/monitoring/dev_docs/how_to/running_components_from_source.md#single-cluster-testing)\r\n- Enable stack monitoring default rules\r\n- Start metricbeat collection of the follower cluster\r\n- Ensure ccr alert triggers with metricbeat 7.x (this won't work against\r\ncloud cluster because the license is not supported in 7.x but support\r\nwas added later, I'll see if we can [backport this\r\nchange](https://github.com/elastic/beats/pull/34105))\r\n- Ensure ccr alert triggers with metricbeat 8.x (build should include\r\n[this change](https://github.com/elastic/beats/pull/34957))","sha":"d1e5dbc5c9421297d44cd11cd60d3e96cff4f481"}}]}] BACKPORT--> Co-authored-by: Kevin Lacabane <kevin.lacabane@elastic.co>
This commit is contained in:
parent
183fc6c079
commit
3df540e28b
2 changed files with 209 additions and 6 deletions
|
@ -29,12 +29,14 @@ describe('fetchCCReadExceptions', () => {
|
|||
},
|
||||
},
|
||||
};
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
esRes
|
||||
);
|
||||
|
||||
it('should call ES with correct query', async () => {
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
esRes
|
||||
);
|
||||
|
||||
await fetchCCRReadExceptions(esClient, 1643306331418, 1643309869056, 10000);
|
||||
expect(esClient.search).toHaveBeenCalledWith({
|
||||
index:
|
||||
|
@ -125,7 +127,13 @@ describe('fetchCCReadExceptions', () => {
|
|||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should call ES with correct query when ccs disabled', async () => {
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
esRes
|
||||
);
|
||||
// @ts-ignore
|
||||
Globals.app.config.ui.ccs.enabled = false;
|
||||
let params = null;
|
||||
|
@ -139,4 +147,199 @@ describe('fetchCCReadExceptions', () => {
|
|||
// @ts-ignore
|
||||
expect(params.index).toBe('.monitoring-es-*,metrics-elasticsearch.stack_monitoring.ccr-*');
|
||||
});
|
||||
|
||||
it('should return read exceptions from legacy documents', async () => {
|
||||
const legacyRes = {
|
||||
aggregations: {
|
||||
remote_clusters: {
|
||||
buckets: [
|
||||
{
|
||||
key: 'secondary',
|
||||
doc_count: 21,
|
||||
follower_indices: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{
|
||||
key: 'foobar_follower',
|
||||
doc_count: 21,
|
||||
hits: {
|
||||
hits: {
|
||||
total: { value: 21, relation: 'eq' },
|
||||
max_score: null,
|
||||
hits: [
|
||||
{
|
||||
_index: '.monitoring-es-7-mb-2023.03.30',
|
||||
_id: '0YmUM4cBxRuN6VWqFo3H',
|
||||
_score: null,
|
||||
_source: {
|
||||
ccr_stats: {
|
||||
shard_id: 0,
|
||||
read_exceptions: [
|
||||
{
|
||||
retries: 1,
|
||||
exception: {
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason:
|
||||
'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
type: 'exception',
|
||||
},
|
||||
from_seq_no: 28,
|
||||
},
|
||||
],
|
||||
leader_index: 'foobar',
|
||||
},
|
||||
cluster_uuid: 'jRHXRb4pSnySw_JEBv_dHg',
|
||||
},
|
||||
sort: [1680197555160],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
legacyRes
|
||||
);
|
||||
|
||||
const result = await fetchCCRReadExceptions(esClient, 1643306331418, 1643309869056, 10000);
|
||||
expect(result).toStrictEqual([
|
||||
{
|
||||
clusterUuid: 'jRHXRb4pSnySw_JEBv_dHg',
|
||||
remoteCluster: 'secondary',
|
||||
followerIndex: 'foobar_follower',
|
||||
leaderIndex: 'foobar',
|
||||
shardId: 0,
|
||||
lastReadException: {
|
||||
type: 'exception',
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
},
|
||||
ccs: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should return read exceptions from ecs documents', async () => {
|
||||
const ecsRes = {
|
||||
aggregations: {
|
||||
remote_clusters: {
|
||||
buckets: [
|
||||
{
|
||||
key: 'secondary',
|
||||
doc_count: 44,
|
||||
follower_indices: {
|
||||
doc_count_error_upper_bound: 0,
|
||||
sum_other_doc_count: 0,
|
||||
buckets: [
|
||||
{
|
||||
key: 'foobar_follower',
|
||||
doc_count: 44,
|
||||
hits: {
|
||||
hits: {
|
||||
total: { value: 44, relation: 'eq' },
|
||||
max_score: null,
|
||||
hits: [
|
||||
{
|
||||
_index: '.ds-.monitoring-es-8-mb-2023.03.30-000001',
|
||||
_id: '6YmAM4cBxRuN6VWqx4Sg',
|
||||
_score: null,
|
||||
_source: {
|
||||
elasticsearch: {
|
||||
cluster: { id: 'jRHXRb4pSnySw_JEBv_dHg' },
|
||||
ccr: {
|
||||
read_exceptions: [
|
||||
{
|
||||
from_seq_no: 28,
|
||||
retries: 1,
|
||||
exception: {
|
||||
type: 'exception',
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason:
|
||||
'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
leader: { index: 'foobar' },
|
||||
},
|
||||
},
|
||||
},
|
||||
sort: [1680196289074],
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
const esClient = elasticsearchClientMock.createScopedClusterClient().asCurrentUser;
|
||||
esClient.search.mockResponse(
|
||||
// @ts-expect-error not full response interface
|
||||
ecsRes
|
||||
);
|
||||
|
||||
const result = await fetchCCRReadExceptions(esClient, 1643306331418, 1643309869056, 10000);
|
||||
expect(result).toStrictEqual([
|
||||
{
|
||||
clusterUuid: 'jRHXRb4pSnySw_JEBv_dHg',
|
||||
remoteCluster: 'secondary',
|
||||
followerIndex: 'foobar_follower',
|
||||
leaderIndex: 'foobar',
|
||||
shardId: undefined,
|
||||
lastReadException: {
|
||||
type: 'exception',
|
||||
reason:
|
||||
'java.lang.IllegalArgumentException: unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'illegal_argument_exception',
|
||||
reason: 'unknown host [secondary.es.us-central1.gcp.cloud.es.ioe]',
|
||||
caused_by: {
|
||||
type: 'unknown_host_exception',
|
||||
reason: 'secondary.es.us-central1.gcp.cloud.es.ioe',
|
||||
},
|
||||
},
|
||||
},
|
||||
ccs: null,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -162,7 +162,7 @@ export async function fetchCCRReadExceptions(
|
|||
|
||||
const { read_exceptions: readExceptions, shard_id: shardId } = ccrStats;
|
||||
|
||||
const leaderIndex = ccrStats.leaderIndex || ccrStats.leader.index;
|
||||
const leaderIndex = ccrStats.leader_index || ccrStats.leader.index;
|
||||
|
||||
const { exception: lastReadException } = readExceptions[readExceptions.length - 1];
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue