diff --git a/docs/reference/health/health.asciidoc b/docs/reference/health/health.asciidoc index 8ed467656dc4..8367c745d7a4 100644 --- a/docs/reference/health/health.asciidoc +++ b/docs/reference/health/health.asciidoc @@ -64,6 +64,9 @@ step by step troubleshooting guide to fix the diagnosed problem. `shards_availability`:: Reports health issues regarding shard assignments. + `disk`:: + Reports health issues caused by lack of disk space. + `ilm`:: Reports health issues related to Indexing Lifecycle Management. @@ -311,6 +314,28 @@ details have contents and a structure that is unique to each indicator. `started_replicas`:: (int) The number of replica shards that are active and available on the sysetm. +[[health-api-response-details-disk]] +===== disk + +`nodes`:: + (Optional, array) A list of nodes that have reported disk usage information. This field + is present if any node has reported disk usage. ++ +.Properties of `nodes` +[%collapsible%open] +==== +`node_id`:: + (string) The node id of the node reporting disk usage. + +`name`:: + (Optional, string) The node name of the node reporting disk usage. + +`status`:: + (string) The status of the disk indicator on the node. + +`cause`:: + (Optional, string) The cause for the status not being GREEN, if known. +==== [[health-api-response-details-repository-integrity]] ===== repository_integrity diff --git a/server/src/internalClusterTest/java/org/elasticsearch/discovery/StableMasterDisruptionIT.java b/server/src/internalClusterTest/java/org/elasticsearch/discovery/StableMasterDisruptionIT.java index 797e28d5ac95..f5170096171b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/discovery/StableMasterDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/discovery/StableMasterDisruptionIT.java @@ -137,7 +137,7 @@ public class StableMasterDisruptionIT extends ESIntegTestCase { assertBusy(() -> { GetHealthAction.Response healthResponse = client.execute(GetHealthAction.INSTANCE, new GetHealthAction.Request(true)).get(); String debugInformation = xContentToString(healthResponse); - assertThat(debugInformation, healthResponse.getStatus(), equalTo(expectedStatus)); + assertThat(debugInformation, healthResponse.findIndicator("master_is_stable").status(), equalTo(expectedStatus)); assertThat(debugInformation, healthResponse.findIndicator("master_is_stable").symptom(), expectedMatcher); }); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceIT.java new file mode 100644 index 000000000000..9741f5c791ab --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceIT.java @@ -0,0 +1,109 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.health.node; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.health.HealthIndicatorResult; +import org.elasticsearch.health.HealthService; +import org.elasticsearch.health.HealthStatus; +import org.elasticsearch.health.node.selection.HealthNode; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.InternalTestCluster; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +import static org.hamcrest.Matchers.equalTo; + +@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0) +public class DiskHealthIndicatorServiceIT extends ESIntegTestCase { + + public void testGreen() throws Exception { + try (InternalTestCluster internalCluster = internalCluster()) { + internalCluster.startMasterOnlyNode(); + internalCluster.startDataOnlyNode(); + ensureStableCluster(internalCluster.getNodeNames().length); + waitForAllNodesToReportHealth(); + for (String node : internalCluster.getNodeNames()) { + HealthService healthService = internalCluster.getInstance(HealthService.class, node); + List resultList = getHealthServiceResults(healthService, node); + assertNotNull(resultList); + assertThat(resultList.size(), equalTo(1)); + HealthIndicatorResult testIndicatorResult = resultList.get(0); + assertThat(testIndicatorResult.status(), equalTo(HealthStatus.GREEN)); + assertThat(testIndicatorResult.symptom(), equalTo("The cluster has enough available disk space.")); + } + } + } + + public void testRed() throws Exception { + try (InternalTestCluster internalCluster = internalCluster()) { + internalCluster.startMasterOnlyNode(getVeryLowWatermarksSettings()); + internalCluster.startDataOnlyNode(getVeryLowWatermarksSettings()); + ensureStableCluster(internalCluster.getNodeNames().length); + waitForAllNodesToReportHealth(); + for (String node : internalCluster.getNodeNames()) { + HealthService healthService = internalCluster.getInstance(HealthService.class, node); + List resultList = getHealthServiceResults(healthService, node); + assertNotNull(resultList); + assertThat(resultList.size(), equalTo(1)); + HealthIndicatorResult testIndicatorResult = resultList.get(0); + assertThat(testIndicatorResult.status(), equalTo(HealthStatus.RED)); + assertThat( + testIndicatorResult.symptom(), + equalTo("2 nodes with roles: [data, master] are out of disk or running low on disk space.") + ); + } + } + } + + private List getHealthServiceResults(HealthService healthService, String node) throws Exception { + AtomicReference> resultListReference = new AtomicReference<>(); + ActionListener> listener = new ActionListener<>() { + @Override + public void onResponse(List healthIndicatorResults) { + resultListReference.set(healthIndicatorResults); + } + + @Override + public void onFailure(Exception e) { + throw new RuntimeException(e); + } + }; + healthService.getHealth(internalCluster().client(node), DiskHealthIndicatorService.NAME, true, listener); + assertBusy(() -> assertNotNull(resultListReference.get())); + return resultListReference.get(); + } + + private Settings getVeryLowWatermarksSettings() { + return Settings.builder() + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), "0.5%") + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), "0.5%") + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_DISK_FLOOD_STAGE_WATERMARK_SETTING.getKey(), "0.5%") + .put(DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_REROUTE_INTERVAL_SETTING.getKey(), "0ms") + .build(); + } + + private void waitForAllNodesToReportHealth() throws Exception { + assertBusy(() -> { + ClusterState state = internalCluster().clusterService().state(); + DiscoveryNode healthNode = HealthNode.findHealthNode(state); + assertNotNull(healthNode); + Map healthInfoCache = internalCluster().getInstance(HealthInfoCache.class, healthNode.getName()) + .getHealthInfo() + .diskInfoByNode(); + assertThat(healthInfoCache.size(), equalTo(state.getNodes().getNodes().keySet().size())); + }); + } +} diff --git a/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java new file mode 100644 index 000000000000..57762dc94dc5 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java @@ -0,0 +1,448 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.health.node; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.health.Diagnosis; +import org.elasticsearch.health.HealthIndicatorDetails; +import org.elasticsearch.health.HealthIndicatorImpact; +import org.elasticsearch.health.HealthIndicatorResult; +import org.elasticsearch.health.HealthIndicatorService; +import org.elasticsearch.health.HealthStatus; +import org.elasticsearch.health.ImpactArea; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class DiskHealthIndicatorService implements HealthIndicatorService { + public static final String NAME = "disk"; + + private static final Logger logger = LogManager.getLogger(DiskHealthIndicatorService.class); + + private final ClusterService clusterService; + + public DiskHealthIndicatorService(ClusterService clusterService) { + this.clusterService = clusterService; + } + + @Override + public String name() { + return NAME; + } + + @Override + public HealthIndicatorResult calculate(boolean explain, HealthInfo healthInfo) { + Map diskHealthInfoMap = healthInfo.diskInfoByNode(); + if (diskHealthInfoMap == null || diskHealthInfoMap.isEmpty()) { + /* + * If there is no disk health info, that either means that a new health node was just elected, or something is seriously + * wrong with health data collection on the health node. Either way, we immediately return UNKNOWN. If there are at least + * some health info results then we work with what we have (and log any missing ones at debug level immediately below this). + */ + return createIndicator( + HealthStatus.UNKNOWN, + "No disk usage data.", + HealthIndicatorDetails.EMPTY, + Collections.emptyList(), + Collections.emptyList() + ); + } + ClusterState clusterState = clusterService.state(); + logMissingHealthInfoData(diskHealthInfoMap, clusterState); + + /* + * If there are any index blocks in the cluster state, that makes the overall status automatically red, regardless of the statuses + * returned by the nodes. If there is no cluster block, we just use the merged statuses of the nodes. + */ + Set indicesWithBlock = clusterState.blocks() + .indices() + .entrySet() + .stream() + .filter(entry -> entry.getValue().contains(IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + boolean clusterHasBlockedIndex = indicesWithBlock.isEmpty() == false; + HealthIndicatorDetails details = getDetails(explain, diskHealthInfoMap, clusterState); + final HealthStatus healthStatusFromNodes = HealthStatus.merge( + diskHealthInfoMap.values().stream().map(DiskHealthInfo::healthStatus) + ); + final HealthStatus healthStatus = clusterHasBlockedIndex ? HealthStatus.RED : healthStatusFromNodes; + + final HealthIndicatorResult healthIndicatorResult; + if (HealthStatus.GREEN.equals(healthStatus)) { + healthIndicatorResult = createIndicator( + healthStatus, + "The cluster has enough available disk space.", + details, + List.of(), + List.of() + ); + } else { + /* + * In this case the status is either RED or YELLOW. So we collect information about red and yellow indices (including indices + * with blocks placed on them), and red and yellow nodes (including those with a blocked index). We then use that + * information to get the sympotom, impacts, and diagnoses for the result. + */ + Set nodesWithBlockedIndices = getNodeIdsForIndices(indicesWithBlock, clusterState); + Set nodesReportingRed = getNodeIdsReportingStatus(diskHealthInfoMap, HealthStatus.RED); + Set indicesOnRedNodes = getIndicesForNodes(nodesReportingRed, clusterState); + Set nodesReportingYellow = getNodeIdsReportingStatus(diskHealthInfoMap, HealthStatus.YELLOW); + Set indicesOnYellowNodes = getIndicesForNodes(nodesReportingYellow, clusterState); + Set redDataNodes = getNodesWithDataRole(nodesReportingRed, clusterState); + Set yellowDataNodes = getNodesWithDataRole(nodesReportingYellow, clusterState); + Set redMasterNodes = getNodesWithMasterRole(nodesReportingRed, clusterState); + Set yellowMasterNodes = getNodesWithMasterRole(nodesReportingYellow, clusterState); + Set redNonDataNonMasterNodes = getNodesWithNonDataNonMasterRoles(nodesReportingRed, clusterState); + Set yellowNonDataNonMasterNodes = getNodesWithNonDataNonMasterRoles(nodesReportingYellow, clusterState); + + String symptom = getSymptom( + clusterHasBlockedIndex, + indicesWithBlock, + nodesWithBlockedIndices, + nodesReportingRed, + nodesReportingYellow, + clusterState + ); + List impacts = getImpacts( + indicesWithBlock, + indicesOnRedNodes, + indicesOnYellowNodes, + nodesWithBlockedIndices, + redDataNodes, + yellowDataNodes, + redMasterNodes, + yellowMasterNodes, + redNonDataNonMasterNodes, + yellowNonDataNonMasterNodes + ); + List diagnosisList = getDiagnoses( + indicesWithBlock, + indicesOnRedNodes, + indicesOnYellowNodes, + nodesWithBlockedIndices, + redDataNodes, + yellowDataNodes, + redMasterNodes, + yellowMasterNodes, + redNonDataNonMasterNodes, + yellowNonDataNonMasterNodes + ); + healthIndicatorResult = createIndicator(healthStatus, symptom, details, impacts, diagnosisList); + } + return healthIndicatorResult; + } + + private String getSymptom( + boolean clusterHasBlockedIndex, + Set blockedIndices, + Set nodesWithBlockedIndices, + Set nodesReportingRed, + Set nodesReportingYellow, + ClusterState clusterState + ) { + Set allUnhealthyNodes = (Stream.concat( + Stream.concat(nodesWithBlockedIndices.stream(), nodesReportingRed.stream()), + nodesReportingYellow.stream() + )).collect(Collectors.toSet()); + Set allRolesOnUnhealthyNodes = getRolesOnNodes(allUnhealthyNodes, clusterState).stream() + .map(DiscoveryNodeRole::roleName) + .collect(Collectors.toSet()); + final String symptom; + if (clusterHasBlockedIndex && allUnhealthyNodes.isEmpty()) { + // In this case the disk issue has been resolved but the index block has not been automatically removed yet: + symptom = String.format( + Locale.ROOT, + "%d %s blocked and cannot be updated but 0 nodes are currently out of space.", + blockedIndices.size(), + blockedIndices.size() == 1 ? "index is" : "indices are" + ); + } else { + symptom = String.format( + Locale.ROOT, + "%d node%s with roles: [%s] %s out of disk or running low on disk space.", + allUnhealthyNodes.size(), + allUnhealthyNodes.size() == 1 ? "" : "s", + allRolesOnUnhealthyNodes.stream().sorted().collect(Collectors.joining(", ")), + allUnhealthyNodes.size() == 1 ? "is" : "are" + ); + } + return symptom; + } + + private List getImpacts( + Set indicesWithBlock, + Set indicesOnRedNodes, + Set indicesOnYellowNodes, + Set nodesWithBlockedIndices, + Set redDataNodes, + Set yellowDataNodes, + Set redMasterNodes, + Set yellowMasterNodes, + Set redNonDataNonMasterNodes, + Set yellowNonDataNonMasterNodes + ) { + List impacts = new ArrayList<>(); + if (indicesWithBlock.isEmpty() == false + || indicesOnRedNodes.isEmpty() == false + || nodesWithBlockedIndices.isEmpty() == false + || redDataNodes.isEmpty() == false) { + impacts.add( + new HealthIndicatorImpact(1, "Cannot insert or update documents in the affected indices.", List.of(ImpactArea.INGEST)) + ); + } else if (indicesOnYellowNodes.isEmpty() == false || yellowDataNodes.isEmpty() == false) { + impacts.add( + new HealthIndicatorImpact( + 1, + "At risk of not being able to insert or update documents in the affected indices.", + List.of(ImpactArea.INGEST) + ) + ); + } + if (redMasterNodes.isEmpty() == false || yellowMasterNodes.isEmpty() == false) { + impacts.add(new HealthIndicatorImpact(2, "Cluster stability might be impaired.", List.of(ImpactArea.DEPLOYMENT_MANAGEMENT))); + } + if (redNonDataNonMasterNodes.isEmpty() == false || yellowNonDataNonMasterNodes.isEmpty() == false) { + impacts.add( + new HealthIndicatorImpact(2, "Some cluster functionality might be unavailable.", List.of(ImpactArea.DEPLOYMENT_MANAGEMENT)) + ); + } + return impacts; + } + + private List getDiagnoses( + Set indicesWithBlock, + Set indicesOnRedNodes, + Set indicesOnYellowNodes, + Set nodesWithBlockedIndices, + Set redDataNodes, + Set yellowDataNodes, + Set redMasterNodes, + Set yellowMasterNodes, + Set redNonDataNonMasterNodes, + Set yellowNonDataNonMasterNodes + ) { + List diagnosisList = new ArrayList<>(); + if (indicesWithBlock.isEmpty() == false + || nodesWithBlockedIndices.isEmpty() == false + || indicesOnRedNodes.isEmpty() == false + || redDataNodes.isEmpty() == false + || indicesOnYellowNodes.isEmpty() == false + || yellowDataNodes.isEmpty() == false) { + Set impactedIndices = Stream.concat( + Stream.concat(indicesWithBlock.stream(), indicesOnRedNodes.stream()), + indicesOnYellowNodes.stream() + ).collect(Collectors.toSet()); + Set unhealthyNodes = Stream.concat( + Stream.concat(nodesWithBlockedIndices.stream(), redDataNodes.stream()), + yellowDataNodes.stream() + ).collect(Collectors.toSet()); + diagnosisList.add( + new Diagnosis( + new Diagnosis.Definition( + "free-disk-space-or-add-capacity-data-nodes", + String.format( + Locale.ROOT, + "%d %s reside%s on nodes that have run out of space and writing has been blocked by the system.", + impactedIndices.size(), + impactedIndices.size() == 1 ? "index" : "indices", + impactedIndices.size() == 1 ? "s" : "" + ), + "Enable autoscaling (if applicable), add disk capacity or free up disk space to resolve " + + "this. If you have already taken action please wait for the rebalancing to complete.", + "https://ela.st/free-disk-space-or-add-capacity-data-nodes" + ), + unhealthyNodes.stream().sorted().toList() + ) + ); + } + if (redMasterNodes.isEmpty() == false || yellowMasterNodes.isEmpty() == false) { + diagnosisList.add( + new Diagnosis( + new Diagnosis.Definition( + "free-disk-space-or-add-capacity-master-nodes", + "Disk is almost full.", + "Please add capacity to the current nodes, or replace them with ones with higher capacity.", + "https://ela.st/free-disk-space-or-add-capacity-master-nodes" + ), + Stream.concat(redMasterNodes.stream(), yellowMasterNodes.stream()).sorted().toList() + ) + ); + } + if (redNonDataNonMasterNodes.isEmpty() == false || yellowNonDataNonMasterNodes.isEmpty() == false) { + diagnosisList.add( + new Diagnosis( + new Diagnosis.Definition( + "free-disk-space-or-add-capacity-other-nodes", + "Disk is almost full.", + "Please add capacity to the current nodes, or replace them with ones with higher capacity.", + "https://ela.st/free-disk-space-or-add-capacity-other-nodes" + ), + Stream.concat(redNonDataNonMasterNodes.stream(), yellowNonDataNonMasterNodes.stream()).sorted().toList() + ) + ); + } + return diagnosisList; + } + + // Non-private for unit testing + static Set getNodeIdsReportingStatus(Map diskHealthInfoMap, HealthStatus status) { + return diskHealthInfoMap.entrySet() + .stream() + .filter(entry -> status.equals(entry.getValue().healthStatus())) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + } + + // Non-private for unit testing + static Set getRolesOnNodes(Set nodeIds, ClusterState clusterState) { + return clusterState.nodes() + .getNodes() + .values() + .stream() + .filter(node -> nodeIds.contains(node.getId())) + .map(DiscoveryNode::getRoles) + .flatMap(Collection::stream) + .collect(Collectors.toSet()); + } + + // Non-private for unit testing + static Set getNodesWithDataRole(Set nodeIds, ClusterState clusterState) { + return clusterState.nodes() + .getNodes() + .values() + .stream() + .filter(node -> nodeIds.contains(node.getId())) + .filter(node -> node.getRoles().stream().anyMatch(DiscoveryNodeRole::canContainData)) + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + } + + // Non-private for unit testing + static Set getNodesWithMasterRole(Set nodeIds, ClusterState clusterState) { + return clusterState.nodes() + .getNodes() + .values() + .stream() + .filter(node -> nodeIds.contains(node.getId())) + .filter(node -> node.getRoles().contains(DiscoveryNodeRole.MASTER_ROLE)) + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + } + + // Non-private for unit testing + static Set getNodesWithNonDataNonMasterRoles(Set nodeIds, ClusterState clusterState) { + return clusterState.nodes() + .getNodes() + .values() + .stream() + .filter(node -> nodeIds.contains(node.getId())) + .filter( + node -> node.getRoles() + .stream() + .anyMatch(role -> (role.equals(DiscoveryNodeRole.MASTER_ROLE) || role.canContainData()) == false) + ) + .map(DiscoveryNode::getId) + .collect(Collectors.toSet()); + } + + // Non-private for unit testing + static Set getIndicesForNodes(Set nodes, ClusterState clusterState) { + return clusterState.routingTable() + .allShards() + .stream() + .filter(routing -> nodes.contains(routing.currentNodeId())) + .map(routing -> routing.index().getName()) + .collect(Collectors.toSet()); + } + + // Non-private for unit testing + static Set getNodeIdsForIndices(Set indices, ClusterState clusterState) { + return clusterState.routingTable() + .allShards() + .stream() + .filter(routing -> indices.contains(routing.index().getName())) + .map(ShardRouting::currentNodeId) + .collect(Collectors.toSet()); + } + + /** + * This method logs if any nodes in the cluster state do not have health info results reported. This is logged at debug level and is + * not ordinarly important, but could be useful in tracking down problems where nodes have stopped reporting health node information. + * @param diskHealthInfoMap A map of nodeId to DiskHealthInfo + */ + private void logMissingHealthInfoData(Map diskHealthInfoMap, ClusterState clusterState) { + if (logger.isDebugEnabled()) { + Set nodesInClusterState = new HashSet<>(clusterState.nodes()); + Set nodeIdsInClusterState = nodesInClusterState.stream().map(DiscoveryNode::getId).collect(Collectors.toSet()); + Set nodeIdsInHealthInfo = diskHealthInfoMap.keySet(); + if (nodeIdsInHealthInfo.containsAll(nodeIdsInClusterState) == false) { + String nodesWithMissingData = nodesInClusterState.stream() + .filter(node -> nodeIdsInHealthInfo.contains(node.getId()) == false) + .map(node -> String.format(Locale.ROOT, "{%s / %s}", node.getId(), node.getName())) + .collect(Collectors.joining(", ")); + logger.debug("The following nodes are in the cluster state but not reporting health data: [{}}]", nodesWithMissingData); + } + } + } + + private HealthIndicatorDetails getDetails(boolean explain, Map diskHealthInfoMap, ClusterState clusterState) { + if (explain == false) { + return HealthIndicatorDetails.EMPTY; + } + return (builder, params) -> { + builder.startObject(); + builder.array("nodes", arrayXContentBuilder -> { + for (Map.Entry entry : diskHealthInfoMap.entrySet()) { + builder.startObject(); + String nodeId = entry.getKey(); + builder.field("node_id", nodeId); + String nodeName = getNameForNodeId(nodeId, clusterState); + if (nodeName != null) { + builder.field("name", nodeName); + } + builder.field("status", entry.getValue().healthStatus()); + DiskHealthInfo.Cause cause = entry.getValue().cause(); + if (cause != null) { + builder.field("cause", entry.getValue().cause()); + } + builder.endObject(); + } + }); + return builder.endObject(); + }; + } + + /** + * Returns the name of the node with the given nodeId, as seen in the cluster state at this moment. The name of a node is optional, + * so if the node does not have a name (or the node with the given nodeId is no longer in the cluster state), null is returned. + * @param nodeId The id of the node whose name is to be returned + * @return The current name of the node, or null if the node is not in the cluster state or does not have a name + */ + @Nullable + private String getNameForNodeId(String nodeId, ClusterState clusterState) { + DiscoveryNode node = clusterState.nodes().get(nodeId); + return node == null ? null : node.getName(); + } +} diff --git a/server/src/main/java/org/elasticsearch/node/Node.java b/server/src/main/java/org/elasticsearch/node/Node.java index 3ab344442025..380f3d651a69 100644 --- a/server/src/main/java/org/elasticsearch/node/Node.java +++ b/server/src/main/java/org/elasticsearch/node/Node.java @@ -105,6 +105,7 @@ import org.elasticsearch.gateway.PersistedClusterStateService; import org.elasticsearch.health.HealthIndicatorService; import org.elasticsearch.health.HealthService; import org.elasticsearch.health.metadata.HealthMetadataService; +import org.elasticsearch.health.node.DiskHealthIndicatorService; import org.elasticsearch.health.node.HealthInfoCache; import org.elasticsearch.health.node.LocalHealthMonitor; import org.elasticsearch.health.node.selection.HealthNode; @@ -1164,10 +1165,15 @@ public class Node implements Closeable { List preflightHealthIndicatorServices = Collections.singletonList( new StableMasterHealthIndicatorService(coordinationDiagnosticsService, clusterService) ); - var serverHealthIndicatorServices = List.of( - new RepositoryIntegrityHealthIndicatorService(clusterService), - new ShardsAvailabilityHealthIndicatorService(clusterService, clusterModule.getAllocationService()) + var serverHealthIndicatorServices = new ArrayList<>( + List.of( + new RepositoryIntegrityHealthIndicatorService(clusterService), + new ShardsAvailabilityHealthIndicatorService(clusterService, clusterModule.getAllocationService()) + ) ); + if (HealthNode.isEnabled()) { + serverHealthIndicatorServices.add(new DiskHealthIndicatorService(clusterService)); + } var pluginHealthIndicatorServices = pluginsService.filterPlugins(HealthPlugin.class) .stream() .flatMap(plugin -> plugin.getHealthIndicatorServices().stream()) diff --git a/server/src/test/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceTests.java new file mode 100644 index 000000000000..9449e6068539 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceTests.java @@ -0,0 +1,957 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.health.node; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.node.DiscoveryNodeRole; +import org.elasticsearch.cluster.node.DiscoveryNodes; +import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.health.Diagnosis; +import org.elasticsearch.health.HealthIndicatorImpact; +import org.elasticsearch.health.HealthIndicatorResult; +import org.elasticsearch.health.HealthStatus; +import org.elasticsearch.health.ImpactArea; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class DiskHealthIndicatorServiceTests extends ESTestCase { + public void testServiceBasics() { + Set discoveryNodes = createNodesWithAllRoles(); + ClusterService clusterService = createClusterService(false, discoveryNodes); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + { + HealthStatus expectedStatus = HealthStatus.GREEN; + HealthInfo healthInfo = createHealthInfo(expectedStatus, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + } + { + HealthStatus expectedStatus = HealthStatus.YELLOW; + HealthInfo healthInfo = createHealthInfo(expectedStatus, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + } + { + HealthStatus expectedStatus = HealthStatus.RED; + HealthInfo healthInfo = createHealthInfo(expectedStatus, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + } + } + + @SuppressWarnings("unchecked") + public void testGreen() throws IOException { + Set discoveryNodes = createNodesWithAllRoles(); + ClusterService clusterService = createClusterService(false, discoveryNodes); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + HealthStatus expectedStatus = HealthStatus.GREEN; + HealthInfo healthInfo = createHealthInfo(expectedStatus, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat(result.symptom(), equalTo("The cluster has enough available disk space.")); + assertThat(result.impacts().size(), equalTo(0)); + assertThat(result.diagnosisList().size(), equalTo(0)); + Map detailsMap = xContentToMap(result.details()); + assertThat(detailsMap.size(), equalTo(1)); + List> nodeDetails = (List>) detailsMap.get("nodes"); + assertThat(nodeDetails.size(), equalTo(discoveryNodes.size())); + Map nodeIdToName = discoveryNodes.stream().collect(Collectors.toMap(DiscoveryNode::getId, DiscoveryNode::getName)); + for (Map nodeDetail : nodeDetails) { + assertThat(nodeDetail.size(), greaterThanOrEqualTo(3)); + assertThat(nodeDetail.size(), lessThanOrEqualTo(4)); // Could have a cause + String nodeId = nodeDetail.get("node_id"); + assertThat(nodeDetail.get("name"), equalTo(nodeIdToName.get(nodeId))); + assertThat(nodeDetail.get("status"), equalTo("GREEN")); + } + } + + @SuppressWarnings("unchecked") + public void testRedNoBlocksNoIndices() throws IOException { + Set discoveryNodes = createNodesWithAllRoles(); + ClusterService clusterService = createClusterService(false, discoveryNodes); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + HealthStatus expectedStatus = HealthStatus.RED; + HealthInfo healthInfo = createHealthInfo(expectedStatus, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat(result.symptom(), containsString("1 node with roles: [data")); + assertThat(result.symptom(), containsString("] is out of disk or running low on disk space.")); + assertThat(result.impacts().size(), equalTo(3)); + HealthIndicatorImpact impact = result.impacts().get(0); + assertNotNull(impact); + List impactAreas = impact.impactAreas(); + assertThat(impactAreas.size(), equalTo(1)); + assertThat(impactAreas.get(0), equalTo(ImpactArea.INGEST)); + assertThat(impact.severity(), equalTo(1)); + assertThat(impact.impactDescription(), equalTo("Cannot insert or update documents in the affected indices.")); + assertThat(result.diagnosisList().size(), equalTo(3)); + Diagnosis diagnosis = result.diagnosisList().get(0); + List affectedResources = diagnosis.affectedResources(); + assertThat(affectedResources.size(), equalTo(1)); + String expectedRedNodeId = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> expectedStatus.equals(entry.getValue().healthStatus())) + .map(Map.Entry::getKey) + .findAny() + .orElseThrow(); + assertThat(affectedResources.get(0), equalTo(expectedRedNodeId)); + Map detailsMap = xContentToMap(result.details()); + assertThat(detailsMap.size(), equalTo(1)); + List> nodeDetails = (List>) detailsMap.get("nodes"); + assertThat(nodeDetails.size(), equalTo(discoveryNodes.size())); + Map nodeIdToName = discoveryNodes.stream().collect(Collectors.toMap(DiscoveryNode::getId, DiscoveryNode::getName)); + for (Map nodeDetail : nodeDetails) { + assertThat(nodeDetail.size(), greaterThanOrEqualTo(3)); + assertThat(nodeDetail.size(), lessThanOrEqualTo(4)); // Could have a cause + String nodeId = nodeDetail.get("node_id"); + assertThat(nodeDetail.get("name"), equalTo(nodeIdToName.get(nodeId))); + if (nodeId.equals(expectedRedNodeId)) { + assertThat(nodeDetail.get("status"), equalTo("RED")); + } else { + assertThat(nodeDetail.get("status"), equalTo("GREEN")); + } + } + } + + @SuppressWarnings("unchecked") + public void testRedNoBlocksWithIndices() throws IOException { + /* + * This method tests that we get the expected behavior when there are nodes with indices that report RED status and there are no + * blocks in the cluster state. + */ + Set discoveryNodes = createNodesWithAllRoles(); + HealthStatus expectedStatus = HealthStatus.RED; + int numberOfRedNodes = randomIntBetween(1, discoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo(expectedStatus, numberOfRedNodes, discoveryNodes); + Set redNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Set nonRedNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus) == false) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Map> indexNameToNodeIdsMap = new HashMap<>(); + int numberOfIndices = randomIntBetween(1, 1000); + int numberOfRedIndices = randomIntBetween(1, numberOfIndices); + for (int i = 0; i < numberOfIndices; i++) { + String indexName = randomAlphaOfLength(20); + /* + * The following is artificial but useful for making sure the test has the right counts. The first numberOfRedIndices indices + * are always placed on all of the red nodes. All other indices are placed on all of the non red nodes. + */ + if (i < numberOfRedIndices) { + indexNameToNodeIdsMap.put(indexName, redNodeIds); + } else { + indexNameToNodeIdsMap.put(indexName, nonRedNodeIds); + } + } + ClusterService clusterService = createClusterService(Set.of(), discoveryNodes, indexNameToNodeIdsMap); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat( + result.symptom(), + containsString(numberOfRedNodes + " node" + (numberOfRedNodes == 1 ? "" : "s") + " with roles: [data") + ); + assertThat(result.symptom(), containsString(" out of disk or running low on disk space.")); + assertThat(result.impacts().size(), equalTo(3)); + HealthIndicatorImpact impact = result.impacts().get(0); + assertNotNull(impact); + List impactAreas = impact.impactAreas(); + assertThat(impactAreas.size(), equalTo(1)); + assertThat(impactAreas.get(0), equalTo(ImpactArea.INGEST)); + assertThat(impact.severity(), equalTo(1)); + assertThat(impact.impactDescription(), equalTo("Cannot insert or update documents in the affected indices.")); + assertThat(result.diagnosisList().size(), equalTo(3)); + Diagnosis diagnosis = result.diagnosisList().get(0); + List affectedResources = diagnosis.affectedResources(); + assertThat(affectedResources.size(), equalTo(numberOfRedNodes)); + assertTrue(affectedResources.containsAll(redNodeIds)); + Map detailsMap = xContentToMap(result.details()); + assertThat(detailsMap.size(), equalTo(1)); + List> nodeDetails = (List>) detailsMap.get("nodes"); + assertThat(nodeDetails.size(), equalTo(discoveryNodes.size())); + Map nodeIdToName = discoveryNodes.stream().collect(Collectors.toMap(DiscoveryNode::getId, DiscoveryNode::getName)); + for (Map nodeDetail : nodeDetails) { + assertThat(nodeDetail.size(), greaterThanOrEqualTo(3)); + assertThat(nodeDetail.size(), lessThanOrEqualTo(4)); // Could have a cause + String nodeId = nodeDetail.get("node_id"); + assertThat(nodeDetail.get("name"), equalTo(nodeIdToName.get(nodeId))); + if (redNodeIds.contains(nodeId)) { + assertThat(nodeDetail.get("status"), equalTo("RED")); + } else { + assertThat(nodeDetail.get("status"), equalTo("GREEN")); + } + } + } + + public void testHasBlockButOtherwiseGreen() { + /* + * Tests when there is an index that has a block on it but the nodes report green (so the lock is probably about to be released). + */ + Set discoveryNodes = createNodesWithAllRoles(); + ClusterService clusterService = createClusterService(true, discoveryNodes); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + { + HealthStatus expectedStatus = HealthStatus.RED; + HealthInfo healthInfo = createHealthInfo(HealthStatus.GREEN, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat(result.symptom(), equalTo("1 index is blocked and cannot be updated but 0 nodes are currently out of space.")); + } + } + + public void testHasBlockButOtherwiseYellow() { + /* + * Tests when there is an index that has a block on it but the nodes report yellow. + */ + Set discoveryNodes = createNodesWithAllRoles(); + ClusterService clusterService = createClusterService(true, discoveryNodes); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + HealthStatus expectedStatus = HealthStatus.RED; + int numberOfYellowNodes = randomIntBetween(1, discoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo(HealthStatus.YELLOW, numberOfYellowNodes, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat(result.symptom(), containsString(" out of disk or running low on disk space.")); + } + + public void testHasBlockButOtherwiseRed() { + Set discoveryNodes = createNodesWithAllRoles(); + HealthStatus expectedStatus = HealthStatus.RED; + int numberOfRedNodes = randomIntBetween(1, discoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo(HealthStatus.RED, numberOfRedNodes, discoveryNodes); + Set redNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Set nonRedNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus) == false) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Map> indexNameToNodeIdsMap = new HashMap<>(); + int numberOfIndices = randomIntBetween(1, 1000); + Set blockedIndices = new HashSet<>(); + int numberOfRedIndices = randomIntBetween(1, numberOfIndices); + Set redIndices = new HashSet<>(); + Set allUnhealthyNodes = new HashSet<>(); + allUnhealthyNodes.addAll(redNodeIds); + for (int i = 0; i < numberOfIndices; i++) { + String indexName = randomAlphaOfLength(20); + /* + * The following is artificial but useful for making sure the test has the right counts. The first numberOfRedIndices indices + * are always placed on all of the red nodes. All other indices are placed on all of the non red nodes. + */ + if (i < numberOfRedIndices) { + indexNameToNodeIdsMap.put(indexName, redNodeIds); + redIndices.add(indexName); + } else { + indexNameToNodeIdsMap.put(indexName, nonRedNodeIds); + } + if (randomBoolean()) { + blockedIndices.add(indexName); + } + } + ClusterService clusterService = createClusterService(blockedIndices, discoveryNodes, indexNameToNodeIdsMap); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat(result.symptom(), containsString(" out of disk or running low on disk space.")); + for (String index : blockedIndices) { + allUnhealthyNodes.addAll(indexNameToNodeIdsMap.get(index)); + } + assertThat( + result.symptom(), + containsString(allUnhealthyNodes.size() + " node" + (allUnhealthyNodes.size() == 1 ? "" : "s") + " with roles: [data") + ); + } + + public void testMissingHealthInfo() { + Set discoveryNodes = createNodesWithAllRoles(); + Set discoveryNodesInClusterState = new HashSet<>(discoveryNodes); + discoveryNodesInClusterState.add( + new DiscoveryNode( + randomAlphaOfLength(30), + UUID.randomUUID().toString(), + buildNewFakeTransportAddress(), + Collections.emptyMap(), + DiscoveryNodeRole.roles(), + Version.CURRENT + ) + ); + ClusterService clusterService = createClusterService(false, discoveryNodesInClusterState); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + { + HealthInfo healthInfo = HealthInfo.EMPTY_HEALTH_INFO; + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(HealthStatus.UNKNOWN)); + } + { + HealthInfo healthInfo = createHealthInfo(HealthStatus.GREEN, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(HealthStatus.GREEN)); + } + { + HealthInfo healthInfo = createHealthInfo(HealthStatus.YELLOW, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(HealthStatus.YELLOW)); + } + { + HealthInfo healthInfo = createHealthInfo(HealthStatus.RED, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(HealthStatus.RED)); + } + } + + public void testMasterNodeProblems() { + Set roles = Set.of( + DiscoveryNodeRole.MASTER_ROLE, + randomFrom( + DiscoveryNodeRole.ML_ROLE, + DiscoveryNodeRole.INGEST_ROLE, + DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE, + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE, + DiscoveryNodeRole.TRANSFORM_ROLE + ) + ); + Set discoveryNodes = createNodes(roles); + ClusterService clusterService = createClusterService(false, discoveryNodes); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + HealthStatus expectedStatus = randomFrom(HealthStatus.RED, HealthStatus.YELLOW); + int numberOfProblemNodes = randomIntBetween(1, discoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo(expectedStatus, numberOfProblemNodes, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat( + result.symptom(), + equalTo( + numberOfProblemNodes + + " node" + + (numberOfProblemNodes == 1 ? "" : "s") + + " with roles: [" + + roles.stream().map(DiscoveryNodeRole::roleName).sorted().collect(Collectors.joining(", ")) + + "] " + + (numberOfProblemNodes == 1 ? "is" : "are") + + " out of disk or running low on disk space." + ) + ); + List impacts = result.impacts(); + assertThat(impacts.size(), equalTo(2)); + assertThat(impacts.get(0).impactDescription(), equalTo("Cluster stability might be impaired.")); + assertThat(impacts.get(0).severity(), equalTo(2)); + assertThat(impacts.get(0).impactAreas(), equalTo(List.of(ImpactArea.DEPLOYMENT_MANAGEMENT))); + assertThat(impacts.get(1).impactDescription(), equalTo("Some cluster functionality might be unavailable.")); + assertThat(impacts.get(1).severity(), equalTo(2)); + assertThat(impacts.get(1).impactAreas(), equalTo(List.of(ImpactArea.DEPLOYMENT_MANAGEMENT))); + + List diagnosisList = result.diagnosisList(); + assertThat(diagnosisList.size(), equalTo(2)); + Diagnosis diagnosis = diagnosisList.get(0); + List affectedResources = diagnosis.affectedResources(); + assertThat(affectedResources.size(), equalTo(numberOfProblemNodes)); + Diagnosis.Definition diagnosisDefinition = diagnosis.definition(); + assertThat(diagnosisDefinition.cause(), equalTo("Disk is almost full.")); + assertThat( + diagnosisDefinition.action(), + equalTo("Please add capacity to the current nodes, or replace them with ones with higher capacity.") + ); + } + + public void testNonDataNonMasterNodeProblems() { + Set nonMasterNonDataRoles = Set.of( + DiscoveryNodeRole.ML_ROLE, + DiscoveryNodeRole.INGEST_ROLE, + DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE, + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE, + DiscoveryNodeRole.TRANSFORM_ROLE + ); + Set roles = new HashSet<>( + randomSubsetOf(randomIntBetween(1, nonMasterNonDataRoles.size()), nonMasterNonDataRoles) + ); + Set discoveryNodes = createNodes(roles); + ClusterService clusterService = createClusterService(false, discoveryNodes); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + HealthStatus expectedStatus = randomFrom(HealthStatus.RED, HealthStatus.YELLOW); + int numberOfProblemNodes = randomIntBetween(1, discoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo(expectedStatus, numberOfProblemNodes, discoveryNodes); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(expectedStatus)); + assertThat( + result.symptom(), + equalTo( + numberOfProblemNodes + + " node" + + (numberOfProblemNodes == 1 ? "" : "s") + + " with roles: [" + + roles.stream().map(DiscoveryNodeRole::roleName).sorted().collect(Collectors.joining(", ")) + + "] " + + (numberOfProblemNodes == 1 ? "is" : "are") + + " out of disk or running low on disk space." + ) + ); + List impacts = result.impacts(); + assertThat(impacts.size(), equalTo(1)); + assertThat(impacts.get(0).impactDescription(), equalTo("Some cluster functionality might be unavailable.")); + assertThat(impacts.get(0).severity(), equalTo(2)); + assertThat(impacts.get(0).impactAreas(), equalTo(List.of(ImpactArea.DEPLOYMENT_MANAGEMENT))); + List diagnosisList = result.diagnosisList(); + assertThat(diagnosisList.size(), equalTo(1)); + Diagnosis diagnosis = diagnosisList.get(0); + List affectedResources = diagnosis.affectedResources(); + assertThat(affectedResources.size(), equalTo(numberOfProblemNodes)); + Diagnosis.Definition diagnosisDefinition = diagnosis.definition(); + assertThat(diagnosisDefinition.cause(), equalTo("Disk is almost full.")); + assertThat( + diagnosisDefinition.action(), + equalTo("Please add capacity to the current nodes, or replace them with ones with higher capacity.") + ); + } + + public void testBlockedIndexWithRedNonDataNodesAndYellowDataNodes() { + /* + * In this test, there are indices with blocks on them, master nodes that report RED, non-data nodes that report RED, and data + * nodes that report YELLOW. We expect the overall status will be RED, with 3 impacts and 3 diagnoses (for the 3 different node + * types experiencing problems). + */ + Set allNonDataNonMasterRoles = Set.of( + DiscoveryNodeRole.ML_ROLE, + DiscoveryNodeRole.INGEST_ROLE, + DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE, + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE, + DiscoveryNodeRole.TRANSFORM_ROLE + ); + Set nonDataNonMasterRoles = new HashSet<>( + randomSubsetOf(randomIntBetween(1, allNonDataNonMasterRoles.size()), allNonDataNonMasterRoles) + ); + Set allDataRoles = Set.of( + DiscoveryNodeRole.DATA_ROLE, + DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE, + DiscoveryNodeRole.DATA_COLD_NODE_ROLE, + DiscoveryNodeRole.DATA_HOT_NODE_ROLE, + DiscoveryNodeRole.DATA_WARM_NODE_ROLE + ); + Set dataRoles = new HashSet<>(randomSubsetOf(randomIntBetween(1, allDataRoles.size()), allDataRoles)); + Set masterDiscoveryNodes = createNodes(Set.of(DiscoveryNodeRole.MASTER_ROLE)); + Set nonDataNonMasterDiscoveryNodes = createNodes(nonDataNonMasterRoles); + Set dataDiscoveryNodes = createNodes(dataRoles); + ClusterService clusterService = createClusterService( + true, + Stream.concat( + masterDiscoveryNodes.stream(), + (Stream.concat(nonDataNonMasterDiscoveryNodes.stream(), dataDiscoveryNodes.stream())) + ).collect(Collectors.toSet()) + ); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + int numberOfRedMasterNodes = randomIntBetween(1, masterDiscoveryNodes.size()); + int numberOfRedNonDataNonMasterNodes = randomIntBetween(1, nonDataNonMasterDiscoveryNodes.size()); + int numberOfYellowDataNodes = randomIntBetween(1, dataDiscoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo( + HealthStatus.RED, + numberOfRedMasterNodes, + masterDiscoveryNodes, + HealthStatus.RED, + numberOfRedNonDataNonMasterNodes, + nonDataNonMasterDiscoveryNodes, + HealthStatus.YELLOW, + numberOfYellowDataNodes, + dataDiscoveryNodes + ); + HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); + assertThat(result.status(), equalTo(HealthStatus.RED)); + assertThat(result.symptom(), containsString(" out of disk or running low on disk space.")); + List impacts = result.impacts(); + assertThat(impacts.size(), equalTo(3)); + assertThat(impacts.get(0).impactDescription(), equalTo("Cannot insert or update documents in the affected indices.")); + assertThat(impacts.get(0).severity(), equalTo(1)); + assertThat(impacts.get(0).impactAreas(), equalTo(List.of(ImpactArea.INGEST))); + assertThat(impacts.get(1).impactDescription(), equalTo("Cluster stability might be impaired.")); + assertThat(impacts.get(1).severity(), equalTo(2)); + assertThat(impacts.get(1).impactAreas(), equalTo(List.of(ImpactArea.DEPLOYMENT_MANAGEMENT))); + assertThat(impacts.get(2).impactDescription(), equalTo("Some cluster functionality might be unavailable.")); + assertThat(impacts.get(2).severity(), equalTo(2)); + assertThat(impacts.get(2).impactAreas(), equalTo(List.of(ImpactArea.DEPLOYMENT_MANAGEMENT))); + + List diagnosisList = result.diagnosisList(); + assertThat(diagnosisList.size(), equalTo(3)); + Diagnosis dataDiagnosis = diagnosisList.get(0); + List dataAffectedResources = dataDiagnosis.affectedResources(); + assertThat(dataAffectedResources.size(), equalTo(numberOfYellowDataNodes)); + Diagnosis.Definition dataDiagnosisDefinition = dataDiagnosis.definition(); + assertThat( + dataDiagnosisDefinition.cause(), + equalTo("1 index resides on nodes that have run out of space and writing has been blocked by the system.") + ); + assertThat( + dataDiagnosisDefinition.action(), + equalTo( + "Enable autoscaling (if applicable), add disk capacity or free up disk space to resolve this. If you have already " + + "taken action please wait for the rebalancing to complete." + ) + ); + + Diagnosis masterDiagnosis = diagnosisList.get(1); + List masterAffectedResources = masterDiagnosis.affectedResources(); + assertThat(masterAffectedResources.size(), equalTo(numberOfRedMasterNodes)); + Diagnosis.Definition masterDiagnosisDefinition = masterDiagnosis.definition(); + assertThat(masterDiagnosisDefinition.cause(), equalTo("Disk is almost full.")); + assertThat( + masterDiagnosisDefinition.action(), + equalTo("Please add capacity to the current nodes, or replace them with ones with higher capacity.") + ); + + Diagnosis nonDataNonMasterDiagnosis = diagnosisList.get(2); + List nonDataNonMasterAffectedResources = nonDataNonMasterDiagnosis.affectedResources(); + assertThat(nonDataNonMasterAffectedResources.size(), equalTo(numberOfRedNonDataNonMasterNodes)); + Diagnosis.Definition nonDataNonMasterDiagnosisDefinition = nonDataNonMasterDiagnosis.definition(); + assertThat(nonDataNonMasterDiagnosisDefinition.cause(), equalTo("Disk is almost full.")); + assertThat( + nonDataNonMasterDiagnosisDefinition.action(), + equalTo("Please add capacity to the current nodes, or replace them with ones with higher capacity.") + ); + } + + public void testGetNodeIdsReportingStatus() { + Set discoveryNodes = createNodesWithAllRoles(); + Map diskInfoByNode = new HashMap<>(discoveryNodes.size()); + Map> statusToNodeIdMap = new HashMap<>(HealthStatus.values().length); + for (DiscoveryNode node : discoveryNodes) { + HealthStatus status = randomFrom(HealthStatus.values()); + final DiskHealthInfo diskHealthInfo = randomBoolean() + ? new DiskHealthInfo(status) + : new DiskHealthInfo(status, randomFrom(DiskHealthInfo.Cause.values())); + Set nodeIdsForStatus = statusToNodeIdMap.computeIfAbsent(status, k -> new HashSet<>()); + nodeIdsForStatus.add(node.getId()); + diskInfoByNode.put(node.getId(), diskHealthInfo); + } + for (HealthStatus status : HealthStatus.values()) { + assertThat( + DiskHealthIndicatorService.getNodeIdsReportingStatus(diskInfoByNode, status), + equalTo(statusToNodeIdMap.get(status) == null ? Set.of() : statusToNodeIdMap.get(status)) + ); + } + } + + public void testGetRolesOnNodes() { + Set roles = new HashSet<>( + randomSubsetOf(randomIntBetween(1, DiscoveryNodeRole.roles().size()), DiscoveryNodeRole.roles()) + ); + Set discoveryNodes = createNodes(roles); + ClusterService clusterService = createClusterService(false, discoveryNodes); + Set result = DiskHealthIndicatorService.getRolesOnNodes( + discoveryNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet()), + clusterService.state() + ); + assertThat(result, equalTo(roles)); + } + + public void testGetNodesWithDataRole() { + Set nonDataRoles = Set.of( + DiscoveryNodeRole.MASTER_ROLE, + DiscoveryNodeRole.ML_ROLE, + DiscoveryNodeRole.INGEST_ROLE, + DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE, + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE, + DiscoveryNodeRole.TRANSFORM_ROLE + ); + Set dataRoles = Set.of( + DiscoveryNodeRole.DATA_ROLE, + DiscoveryNodeRole.DATA_COLD_NODE_ROLE, + DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE, + DiscoveryNodeRole.DATA_HOT_NODE_ROLE, + DiscoveryNodeRole.DATA_CONTENT_NODE_ROLE, + DiscoveryNodeRole.DATA_WARM_NODE_ROLE + ); + Set nonDataNodes = createNodes( + new HashSet<>(randomSubsetOf(randomIntBetween(1, nonDataRoles.size()), nonDataRoles)) + ); + Set pureDataNodes = createNodes(new HashSet<>(randomSubsetOf(randomIntBetween(1, dataRoles.size()), dataRoles))); + Set mixedNodes = createNodes( + Stream.concat( + randomSubsetOf(randomIntBetween(1, nonDataRoles.size()), nonDataRoles).stream(), + randomSubsetOf(randomIntBetween(1, dataRoles.size()), dataRoles).stream() + ).collect(Collectors.toSet()) + ); + Set allNodes = Stream.concat(Stream.concat(nonDataNodes.stream(), pureDataNodes.stream()), mixedNodes.stream()) + .collect(Collectors.toSet()); + ClusterService clusterService = createClusterService(false, allNodes); + assertThat( + DiskHealthIndicatorService.getNodesWithDataRole( + allNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet()), + clusterService.state() + ), + equalTo(Stream.concat(pureDataNodes.stream(), mixedNodes.stream()).map(DiscoveryNode::getId).collect(Collectors.toSet())) + ); + } + + public void testGetNodesWithMasterRole() { + Set nonDataRoles = Set.of( + DiscoveryNodeRole.ML_ROLE, + DiscoveryNodeRole.INGEST_ROLE, + DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE, + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE, + DiscoveryNodeRole.TRANSFORM_ROLE, + DiscoveryNodeRole.DATA_ROLE, + DiscoveryNodeRole.DATA_COLD_NODE_ROLE, + DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE, + DiscoveryNodeRole.DATA_HOT_NODE_ROLE, + DiscoveryNodeRole.DATA_CONTENT_NODE_ROLE, + DiscoveryNodeRole.DATA_WARM_NODE_ROLE + ); + Set nonMasterNodes = createNodes( + new HashSet<>(randomSubsetOf(randomIntBetween(1, nonDataRoles.size()), nonDataRoles)) + ); + Set pureMasterNodes = createNodes(Set.of(DiscoveryNodeRole.MASTER_ROLE)); + Set mixedNodes = createNodes( + Stream.concat( + randomSubsetOf(randomIntBetween(1, nonDataRoles.size()), nonDataRoles).stream(), + Stream.of(DiscoveryNodeRole.MASTER_ROLE) + ).collect(Collectors.toSet()) + ); + Set allNodes = Stream.concat(Stream.concat(nonMasterNodes.stream(), pureMasterNodes.stream()), mixedNodes.stream()) + .collect(Collectors.toSet()); + ClusterService clusterService = createClusterService(false, allNodes); + assertThat( + DiskHealthIndicatorService.getNodesWithMasterRole( + allNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet()), + clusterService.state() + ), + equalTo(Stream.concat(pureMasterNodes.stream(), mixedNodes.stream()).map(DiscoveryNode::getId).collect(Collectors.toSet())) + ); + } + + public void testGetNodesWithNonDataNonMasterRoles() { + Set dataAndMasterRoles = Set.of( + DiscoveryNodeRole.MASTER_ROLE, + DiscoveryNodeRole.DATA_ROLE, + DiscoveryNodeRole.DATA_COLD_NODE_ROLE, + DiscoveryNodeRole.DATA_FROZEN_NODE_ROLE, + DiscoveryNodeRole.DATA_HOT_NODE_ROLE, + DiscoveryNodeRole.DATA_CONTENT_NODE_ROLE, + DiscoveryNodeRole.DATA_WARM_NODE_ROLE + ); + Set nonDataNonMasterRoles = Set.of( + DiscoveryNodeRole.ML_ROLE, + DiscoveryNodeRole.INGEST_ROLE, + DiscoveryNodeRole.VOTING_ONLY_NODE_ROLE, + DiscoveryNodeRole.REMOTE_CLUSTER_CLIENT_ROLE, + DiscoveryNodeRole.TRANSFORM_ROLE + ); + Set dataAndMasterNodes = createNodes( + new HashSet<>(randomSubsetOf(randomIntBetween(1, dataAndMasterRoles.size()), dataAndMasterRoles)) + ); + Set pureNonDataNonMasterNodes = createNodes( + new HashSet<>(randomSubsetOf(randomIntBetween(1, nonDataNonMasterRoles.size()), nonDataNonMasterRoles)) + ); + Set mixedNodes = createNodes( + Stream.concat( + randomSubsetOf(randomIntBetween(1, dataAndMasterRoles.size()), dataAndMasterRoles).stream(), + randomSubsetOf(randomIntBetween(1, nonDataNonMasterRoles.size()), nonDataNonMasterRoles).stream() + ).collect(Collectors.toSet()) + ); + Set allNodes = Stream.concat( + Stream.concat(dataAndMasterNodes.stream(), pureNonDataNonMasterNodes.stream()), + mixedNodes.stream() + ).collect(Collectors.toSet()); + ClusterService clusterService = createClusterService(false, allNodes); + assertThat( + DiskHealthIndicatorService.getNodesWithNonDataNonMasterRoles( + allNodes.stream().map(DiscoveryNode::getId).collect(Collectors.toSet()), + clusterService.state() + ), + equalTo( + Stream.concat(pureNonDataNonMasterNodes.stream(), mixedNodes.stream()).map(DiscoveryNode::getId).collect(Collectors.toSet()) + ) + ); + } + + public void testGetIndicesForNodes() { + Set discoveryNodes = createNodesWithAllRoles(); + HealthStatus expectedStatus = HealthStatus.RED; + int numberOfRedNodes = randomIntBetween(1, discoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo(expectedStatus, numberOfRedNodes, discoveryNodes); + Set redNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Set nonRedNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus) == false) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Map> indexNameToNodeIdsMap = new HashMap<>(); + int numberOfIndices = randomIntBetween(1, 1000); + Set redNodeIndices = new HashSet<>(); + Set nonRedNodeIndices = new HashSet<>(); + for (int i = 0; i < numberOfIndices; i++) { + String indexName = randomAlphaOfLength(20); + if (randomBoolean()) { + indexNameToNodeIdsMap.put(indexName, redNodeIds); + redNodeIndices.add(indexName); + } else { + indexNameToNodeIdsMap.put(indexName, nonRedNodeIds); + nonRedNodeIndices.add(indexName); + } + } + ClusterService clusterService = createClusterService(Set.of(), discoveryNodes, indexNameToNodeIdsMap); + assertThat(DiskHealthIndicatorService.getIndicesForNodes(redNodeIds, clusterService.state()), equalTo(redNodeIndices)); + assertThat(DiskHealthIndicatorService.getIndicesForNodes(nonRedNodeIds, clusterService.state()), equalTo(nonRedNodeIndices)); + } + + public void testGetNodeIdsForIndices() { + Set discoveryNodes = createNodesWithAllRoles(); + HealthStatus expectedStatus = HealthStatus.RED; + int numberOfRedNodes = randomIntBetween(1, discoveryNodes.size()); + HealthInfo healthInfo = createHealthInfo(expectedStatus, numberOfRedNodes, discoveryNodes); + Set redNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus)) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Set nonRedNodeIds = healthInfo.diskInfoByNode() + .entrySet() + .stream() + .filter(entry -> entry.getValue().healthStatus().equals(expectedStatus) == false) + .map(Map.Entry::getKey) + .collect(Collectors.toSet()); + Map> indexNameToNodeIdsMap = new HashMap<>(); + int numberOfIndices = randomIntBetween(1, 1000); + Set redNodeIndices = new HashSet<>(); + Set nonRedNodeIndices = new HashSet<>(); + for (int i = 0; i < numberOfIndices; i++) { + String indexName = randomAlphaOfLength(20); + if (randomBoolean()) { + indexNameToNodeIdsMap.put(indexName, redNodeIds); + redNodeIndices.add(indexName); + } else { + indexNameToNodeIdsMap.put(indexName, nonRedNodeIds); + nonRedNodeIndices.add(indexName); + } + } + ClusterService clusterService = createClusterService(Set.of(), discoveryNodes, indexNameToNodeIdsMap); + assertThat(DiskHealthIndicatorService.getNodeIdsForIndices(redNodeIndices, clusterService.state()), equalTo(redNodeIds)); + assertThat(DiskHealthIndicatorService.getNodeIdsForIndices(nonRedNodeIndices, clusterService.state()), equalTo(nonRedNodeIds)); + } + + private Set createNodesWithAllRoles() { + return createNodes(DiscoveryNodeRole.roles()); + } + + private Set createNodes(Set roles) { + int numberOfNodes = randomIntBetween(1, 200); + Set discoveryNodes = new HashSet<>(); + for (int i = 0; i < numberOfNodes; i++) { + discoveryNodes.add( + new DiscoveryNode( + randomAlphaOfLength(30), + UUID.randomUUID().toString(), + buildNewFakeTransportAddress(), + Collections.emptyMap(), + roles, + Version.CURRENT + ) + ); + } + return discoveryNodes; + } + + private HealthInfo createHealthInfo(HealthStatus expectedStatus, Set nodes) { + return createHealthInfo(expectedStatus, 1, nodes); + } + + private HealthInfo createHealthInfo(HealthStatus expectedStatus, int numberOfNodesWithExpectedStatus, Set nodes) { + assert numberOfNodesWithExpectedStatus <= nodes.size(); + Map diskInfoByNode = new HashMap<>(nodes.size()); + createHealthInfoForNodes(diskInfoByNode, expectedStatus, numberOfNodesWithExpectedStatus, nodes); + return new HealthInfo(diskInfoByNode); + } + + /* + * This version of the method is similar to the one above, except it applies three different statuses to three different sets of nodes. + */ + private HealthInfo createHealthInfo( + HealthStatus expectedStatus1, + int numberOfNodesWithExpectedStatus1, + Set nodes1, + HealthStatus expectedStatus2, + int numberOfNodesWithExpectedStatus2, + Set nodes2, + HealthStatus expectedStatus3, + int numberOfNodesWithExpectedStatus3, + Set nodes3 + ) { + assert numberOfNodesWithExpectedStatus1 <= nodes1.size(); + assert numberOfNodesWithExpectedStatus2 <= nodes2.size(); + assert numberOfNodesWithExpectedStatus3 <= nodes3.size(); + Map diskInfoByNode = new HashMap<>(); + createHealthInfoForNodes(diskInfoByNode, expectedStatus1, numberOfNodesWithExpectedStatus1, nodes1); + createHealthInfoForNodes(diskInfoByNode, expectedStatus2, numberOfNodesWithExpectedStatus2, nodes2); + createHealthInfoForNodes(diskInfoByNode, expectedStatus3, numberOfNodesWithExpectedStatus3, nodes3); + return new HealthInfo(diskInfoByNode); + } + + private void createHealthInfoForNodes( + Map diskInfoByNode, + HealthStatus expectedStatus, + int numberOfNodesWithExpectedStatus, + Set nodes + ) { + int numberWithNonGreenStatus3 = 0; + for (DiscoveryNode node : nodes) { + final DiskHealthInfo diskHealthInfo; + if (numberWithNonGreenStatus3 < numberOfNodesWithExpectedStatus) { + diskHealthInfo = randomBoolean() + ? new DiskHealthInfo(expectedStatus) + : new DiskHealthInfo(expectedStatus, randomFrom(DiskHealthInfo.Cause.values())); + numberWithNonGreenStatus3++; + } else { + diskHealthInfo = randomBoolean() + ? new DiskHealthInfo(HealthStatus.GREEN) + : new DiskHealthInfo(HealthStatus.GREEN, randomFrom(DiskHealthInfo.Cause.values())); + } + diskInfoByNode.put(node.getId(), diskHealthInfo); + } + } + + private static ClusterService createClusterService(boolean blockIndex, Set nodes) { + return createClusterService(1, blockIndex ? 1 : 0, nodes); + } + + private static ClusterService createClusterService(int numberOfIndices, int numberOfIndicesToBlock, Set nodes) { + Map> indexNameToNodeIdsMap = new HashMap<>(); + Set blockedIndices = new HashSet<>(numberOfIndicesToBlock); + for (int i = 0; i < numberOfIndices; i++) { + String indexName = randomAlphaOfLength(20); + /* + * The following effectively makes it so that the index does not exist on any node. That's not realistic, but works out for + * tests where we want for there to be no indices on red/yellow nodes + */ + indexNameToNodeIdsMap.put(indexName, Set.of()); + if (i < numberOfIndicesToBlock) { + blockedIndices.add(indexName); + } + } + return createClusterService(blockedIndices, nodes, indexNameToNodeIdsMap); + } + + private static ClusterService createClusterService( + Set blockedIndices, + Set nodes, + Map> indexNameToNodeIdsMap + ) { + RoutingTable routingTable = mock(RoutingTable.class); + List shardRoutings = new ArrayList<>(); + when(routingTable.allShards()).thenReturn(shardRoutings); + + ClusterBlocks.Builder clusterBlocksBuilder = new ClusterBlocks.Builder(); + Map indexMetadataMap = new HashMap<>(); + List clusterBlocksList = new ArrayList<>(); + for (String indexName : indexNameToNodeIdsMap.keySet()) { + boolean blockIndex = blockedIndices.contains(indexName); + IndexMetadata indexMetadata = new IndexMetadata.Builder(indexName).settings( + Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.INDEX_BLOCKS_READ_ONLY_ALLOW_DELETE_SETTING.getKey(), blockIndex) + .build() + ).build(); + indexMetadataMap.put(indexMetadata.getIndex().getName(), indexMetadata); + if (blockIndex) { + ClusterBlocks clusterBlocks = clusterBlocksBuilder.addBlocks(indexMetadata).build(); + clusterBlocksList.add(clusterBlocks); + } + for (String nodeId : indexNameToNodeIdsMap.get(indexName)) { + ShardRouting shardRouting = TestShardRouting.newShardRouting( + indexMetadata.getIndex().getName(), + randomIntBetween(1, 5), + nodeId, + randomBoolean(), + ShardRoutingState.STARTED + ); + shardRoutings.add(shardRouting); + } + } + Metadata.Builder metadataBuilder = Metadata.builder(); + metadataBuilder.indices(indexMetadataMap); + DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); + for (DiscoveryNode node : nodes) { + nodesBuilder.add(node); + } + ClusterState.Builder clusterStateBuilder = ClusterState.builder(new ClusterName("test-cluster")) + .routingTable(routingTable) + .metadata(metadataBuilder.build()) + .nodes(nodesBuilder); + for (ClusterBlocks clusterBlocks : clusterBlocksList) { + clusterStateBuilder.blocks(clusterBlocks); + } + clusterStateBuilder.nodes(nodesBuilder); + ClusterState clusterState = clusterStateBuilder.build(); + var clusterService = mock(ClusterService.class); + when(clusterService.state()).thenReturn(clusterState); + return clusterService; + } + + private Map xContentToMap(ToXContent xcontent) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder(); + xcontent.toXContent(builder, ToXContent.EMPTY_PARAMS); + XContentParser parser = XContentType.JSON.xContent() + .createParser(xContentRegistry(), LoggingDeprecationHandler.INSTANCE, BytesReference.bytes(builder).streamInput()); + return parser.map(); + } +}