[Monitoring] Using primary average shard size (#96177) (#97049)

* Using shard size avg instead of primary total

* Added ui text

* Changed to primary average instead of total

* Addressed cr feedback

* Added zero check

* Fixed threshold checking

* Changed description

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
igoristic 2021-04-13 19:03:42 -04:00 committed by GitHub
parent 247851b0ca
commit 8e9737510d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 29 additions and 16 deletions

View file

@ -81,8 +81,8 @@ by running checks on a schedule time of 1 minute with a re-notify interval of 6
[[kibana-alerts-large-shard-size]]
== Large shard size
This alert is triggered if a large (primary) shard size is found on any of the
specified index patterns. The trigger condition is met if an index's shard size is
This alert is triggered if a large average shard size (across associated primaries) is found on any of the
specified index patterns. The trigger condition is met if an index's average shard size is
55gb or higher in the last 5 minutes. The alert is grouped across all indices that match
the default pattern of `*` by running checks on a schedule time of 1 minute with a re-notify
interval of 12 hours.

View file

@ -477,7 +477,7 @@ export const ALERT_DETAILS = {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.shardSize.paramDetails.threshold.label', {
defaultMessage: `Notify when a shard exceeds this size`,
defaultMessage: `Notify when average shard size exceeds this value`,
}),
type: AlertParamType.Number,
append: 'GB',
@ -494,7 +494,7 @@ export const ALERT_DETAILS = {
defaultMessage: 'Shard size',
}),
description: i18n.translate('xpack.monitoring.alerts.shardSize.description', {
defaultMessage: 'Alert if an index (primary) shard is oversize.',
defaultMessage: 'Alert if the average shard size is larger than the configured threshold.',
}),
},
};

View file

@ -100,6 +100,9 @@ export interface ElasticsearchNodeStats {
export interface ElasticsearchIndexStats {
index?: string;
shards: {
primaries: number;
};
primaries?: {
docs?: {
count?: number;

View file

@ -48,7 +48,7 @@ export class LargeShardSizeAlert extends BaseAlert {
description: i18n.translate(
'xpack.monitoring.alerts.shardSize.actionVariables.shardIndex',
{
defaultMessage: 'List of indices which are experiencing large shard size.',
defaultMessage: 'List of indices which are experiencing large average shard size.',
}
),
},
@ -99,7 +99,7 @@ export class LargeShardSizeAlert extends BaseAlert {
const { shardIndex, shardSize } = item.meta as IndexShardSizeUIMeta;
return {
text: i18n.translate('xpack.monitoring.alerts.shardSize.ui.firingMessage', {
defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large shard size of: {shardSize}GB at #absolute`,
defaultMessage: `The following index: #start_link{shardIndex}#end_link has a large average shard size of: {shardSize}GB at #absolute`,
values: {
shardIndex,
shardSize,

View file

@ -68,13 +68,6 @@ export async function fetchIndexShardSize(
},
aggs: {
over_threshold: {
filter: {
range: {
'index_stats.primaries.store.size_in_bytes': {
gt: threshold * gbMultiplier,
},
},
},
aggs: {
index: {
terms: {
@ -95,6 +88,7 @@ export async function fetchIndexShardSize(
_source: {
includes: [
'_index',
'index_stats.shards.primaries',
'index_stats.primaries.store.size_in_bytes',
'source_node.name',
'source_node.uuid',
@ -121,7 +115,7 @@ export async function fetchIndexShardSize(
if (!clusterBuckets.length) {
return stats;
}
const thresholdBytes = threshold * gbMultiplier;
for (const clusterBucket of clusterBuckets) {
const indexBuckets = clusterBucket.over_threshold.index.buckets;
const clusterUuid = clusterBucket.key;
@ -141,9 +135,25 @@ export async function fetchIndexShardSize(
_source: { source_node: sourceNode, index_stats: indexStats },
} = topHit;
const { size_in_bytes: shardSizeBytes } = indexStats?.primaries?.store!;
if (!indexStats || !indexStats.primaries) {
continue;
}
const { primaries: totalPrimaryShards } = indexStats.shards;
const { size_in_bytes: primaryShardSizeBytes = 0 } = indexStats.primaries.store || {};
if (!primaryShardSizeBytes || !totalPrimaryShards) {
continue;
}
/**
* We can only calculate the average primary shard size at this point, since we don't have
* data (in .monitoring-es* indices) to give us individual shards. This might change in the future
*/
const { name: nodeName, uuid: nodeId } = sourceNode;
const shardSize = +(shardSizeBytes! / gbMultiplier).toFixed(2);
const avgShardSize = primaryShardSizeBytes / totalPrimaryShards;
if (avgShardSize < thresholdBytes) {
continue;
}
const shardSize = +(avgShardSize / gbMultiplier).toFixed(2);
stats.push({
shardIndex,
shardSize,