diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index 8bbb122e61e5..21a3e573fcff 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -139,16 +139,19 @@ specific index module: The number of replicas each primary shard has. Defaults to 1. `index.auto_expand_replicas`:: - - Auto-expand the number of replicas based on the number of data nodes in the cluster. - Set to a dash delimited lower and upper bound (e.g. `0-5`) or use `all` - for the upper bound (e.g. `0-all`). Defaults to `false` (i.e. disabled). - Note that the auto-expanded number of replicas only takes - <> rules into account, but ignores - any other allocation rules such as <> - and <>, and this can lead to the - cluster health becoming `YELLOW` if the applicable rules prevent all the replicas - from being allocated. +Auto-expand the number of replicas based on the number of data nodes in the +cluster. Set to a dash delimited lower and upper bound (e.g. `0-5`) or use `all` +for the upper bound (e.g. `0-all`). Defaults to `false` (i.e. disabled). Note +that the auto-expanded number of replicas only takes +<> rules into account, but +ignores other allocation rules such as <>, and this can lead to the cluster health becoming `YELLOW` if the +applicable rules prevent all the replicas from being allocated. ++ +If the upper bound is `all` then <> and +<> +are ignored for this index. `index.search.idle.after`:: How long a shard can not receive a search or get request until it's considered diff --git a/docs/reference/modules/cluster/shards_allocation.asciidoc b/docs/reference/modules/cluster/shards_allocation.asciidoc index e26e732e3455..34313c2dc6ee 100644 --- a/docs/reference/modules/cluster/shards_allocation.asciidoc +++ b/docs/reference/modules/cluster/shards_allocation.asciidoc @@ -45,7 +45,7 @@ one of the active allocation ids in the cluster state. These should be fast so more initial primary recoveries can happen in parallel on the same node. Defaults to `4`. - +[[cluster-routing-allocation-same-shard-host]] `cluster.routing.allocation.same_shard.host`:: (<>) Allows to perform a check to prevent allocation of multiple instances of diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/AutoExpandReplicas.java b/server/src/main/java/org/elasticsearch/cluster/metadata/AutoExpandReplicas.java index 181281a9f5cb..5689235d78c2 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/AutoExpandReplicas.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/AutoExpandReplicas.java @@ -91,6 +91,10 @@ public final class AutoExpandReplicas { return Math.min(maxReplicas, numDataNodes-1); } + public boolean expandToAllNodes() { + return maxReplicas == Integer.MAX_VALUE; + } + private OptionalInt getDesiredNumberOfReplicas(IndexMetadata indexMetadata, RoutingAllocation allocation) { if (enabled) { int numMatchingDataNodes = 0; diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java index fdc43e98d0ca..886436139fc9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java @@ -8,11 +8,6 @@ package org.elasticsearch.cluster.routing.allocation.decider; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Function; - import com.carrotsearch.hppc.ObjectIntHashMap; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.routing.RoutingNode; @@ -24,7 +19,13 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + import static java.util.Collections.emptyList; +import static org.elasticsearch.cluster.metadata.IndexMetadata.INDEX_AUTO_EXPAND_REPLICAS_SETTING; /** * This {@link AllocationDecider} controls shard allocation based on @@ -118,6 +119,9 @@ public class AwarenessAllocationDecider extends AllocationDecider { "allocation awareness is not enabled, set cluster setting [" + CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey() + "] to enable it"); + private static final Decision YES_AUTO_EXPAND_ALL = Decision.single(Decision.Type.YES, NAME, + "allocation awareness is ignored, this index is set to auto-expand to all nodes"); + private static final Decision YES_ALL_MET = Decision.single(Decision.Type.YES, NAME, "node meets all awareness attribute requirements"); @@ -128,6 +132,11 @@ public class AwarenessAllocationDecider extends AllocationDecider { final boolean debug = allocation.debugDecision(); IndexMetadata indexMetadata = allocation.metadata().getIndexSafe(shardRouting.index()); + + if (INDEX_AUTO_EXPAND_REPLICAS_SETTING.get(indexMetadata.getSettings()).expandToAllNodes()) { + return YES_AUTO_EXPAND_ALL; + } + int shardCount = indexMetadata.getNumberOfReplicas() + 1; // 1 for primary for (String awarenessAttribute : awarenessAttributes) { // the node the shard exists on must be associated with an awareness attribute diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java index 82254ae4cc96..1f3f995deaf0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java @@ -17,6 +17,8 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; +import static org.elasticsearch.cluster.metadata.IndexMetadata.INDEX_AUTO_EXPAND_REPLICAS_SETTING; + /** * An allocation decider that prevents multiple instances of the same shard to * be allocated on the same {@code node}. @@ -58,6 +60,9 @@ public class SameShardAllocationDecider extends AllocationDecider { private static final Decision YES_NONE_HOLD_COPY = Decision.single(Decision.Type.YES, NAME, "none of the nodes on this host hold a copy of this shard"); + private static final Decision YES_AUTO_EXPAND_ALL = Decision.single(Decision.Type.YES, NAME, + "same-host allocation is ignored, this index is set to auto-expand to all nodes"); + @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { Iterable assignedShards = allocation.routingNodes().assignedShards(shardRouting.shardId()); @@ -66,6 +71,10 @@ public class SameShardAllocationDecider extends AllocationDecider { // if its already a NO decision looking at the node, or we aren't configured to look at the host, return the decision return decision; } + if (INDEX_AUTO_EXPAND_REPLICAS_SETTING.get( + allocation.metadata().getIndexSafe(shardRouting.index()).getSettings()).expandToAllNodes()) { + return YES_AUTO_EXPAND_ALL; + } if (node.node() != null) { for (RoutingNode checkNode : allocation.routingNodes()) { if (checkNode.node() == null) { diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java index ef2011431e46..f5fc9764ff8f 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/AutoExpandReplicasTests.java @@ -50,16 +50,19 @@ public class AutoExpandReplicasTests extends ESTestCase { assertEquals(0, autoExpandReplicas.getMinReplicas()); assertEquals(5, autoExpandReplicas.getMaxReplicas(8)); assertEquals(2, autoExpandReplicas.getMaxReplicas(3)); + assertFalse(autoExpandReplicas.expandToAllNodes()); autoExpandReplicas = AutoExpandReplicas.SETTING.get(Settings.builder().put("index.auto_expand_replicas", "0-all").build()); assertEquals(0, autoExpandReplicas.getMinReplicas()); assertEquals(5, autoExpandReplicas.getMaxReplicas(6)); assertEquals(2, autoExpandReplicas.getMaxReplicas(3)); + assertTrue(autoExpandReplicas.expandToAllNodes()); autoExpandReplicas = AutoExpandReplicas.SETTING.get(Settings.builder().put("index.auto_expand_replicas", "1-all").build()); assertEquals(1, autoExpandReplicas.getMinReplicas()); assertEquals(5, autoExpandReplicas.getMaxReplicas(6)); assertEquals(2, autoExpandReplicas.getMaxReplicas(3)); + assertTrue(autoExpandReplicas.expandToAllNodes()); } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java index d8677aa211c4..7a7702c4d767 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AwarenessAllocationTests.java @@ -11,6 +11,7 @@ package org.elasticsearch.cluster.routing.allocation; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.Version; +import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ESAllocationTestCase; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -34,6 +35,7 @@ import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; import static org.elasticsearch.cluster.routing.ShardRoutingState.RELOCATING; import static org.elasticsearch.cluster.routing.ShardRoutingState.STARTED; import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.sameInstance; @@ -770,8 +772,13 @@ public class AwarenessAllocationTests extends ESAllocationTestCase { logger.info("Building initial routing table for 'testUnassignedShardsWithUnbalancedZones'"); + final Settings.Builder indexSettings = settings(Version.CURRENT); + if (randomBoolean()) { + indexSettings.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-4"); + } + Metadata metadata = Metadata.builder() - .put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(1).numberOfReplicas(4)) + .put(IndexMetadata.builder("test").settings(indexSettings).numberOfShards(1).numberOfReplicas(4)) .build(); RoutingTable initialRoutingTable = RoutingTable.builder() @@ -865,4 +872,36 @@ public class AwarenessAllocationTests extends ESAllocationTestCase { assertThat(clusterState.getRoutingNodes().shardsWithState(STARTED).size(), equalTo(2)); assertThat(clusterState.getRoutingNodes().shardsWithState(INITIALIZING).size(), equalTo(0)); } + + public void testDisabledByAutoExpandReplicas() { + final Settings settings = Settings.builder() + .put(AwarenessAllocationDecider.CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), "zone") + .build(); + + final AllocationService strategy = createAllocationService(settings); + + final Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(settings(Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 99) + .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-all"))) + .build(); + + final ClusterState clusterState = applyStartedShardsUntilNoChange( + ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.get(Settings.EMPTY)) + .metadata(metadata) + .routingTable(RoutingTable.builder() + .addAsNew(metadata.index("test")) + .build()) + .nodes(DiscoveryNodes.builder() + .add(newNode("A-0", singletonMap("zone", "a"))) + .add(newNode("A-1", singletonMap("zone", "a"))) + .add(newNode("A-2", singletonMap("zone", "a"))) + .add(newNode("A-3", singletonMap("zone", "a"))) + .add(newNode("A-4", singletonMap("zone", "a"))) + .add(newNode("B-0", singletonMap("zone", "b"))) + ).build(), strategy); + + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED), empty()); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/SameShardRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/SameShardRoutingTests.java index f269bb1ff877..7ef4b8f23a3c 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/SameShardRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/SameShardRoutingTests.java @@ -13,7 +13,6 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.Version; import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; import org.elasticsearch.cluster.ClusterInfo; -import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ESAllocationTestCase; import org.elasticsearch.cluster.metadata.IndexMetadata; @@ -37,8 +36,11 @@ import org.elasticsearch.snapshots.SnapshotShardSizeInfo; import java.util.Collections; import static java.util.Collections.emptyMap; +import static org.elasticsearch.cluster.ClusterName.CLUSTER_NAME_SETTING; import static org.elasticsearch.cluster.routing.ShardRoutingState.INITIALIZING; +import static org.elasticsearch.cluster.routing.ShardRoutingState.UNASSIGNED; import static org.elasticsearch.cluster.routing.allocation.RoutingNodesUtils.numberOfShardsOfType; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; public class SameShardRoutingTests extends ESAllocationTestCase { @@ -48,14 +50,19 @@ public class SameShardRoutingTests extends ESAllocationTestCase { AllocationService strategy = createAllocationService( Settings.builder().put(SameShardAllocationDecider.CLUSTER_ROUTING_ALLOCATION_SAME_HOST_SETTING.getKey(), true).build()); + final Settings.Builder indexSettings = settings(Version.CURRENT); + if (randomBoolean()) { + indexSettings.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1"); + } + Metadata metadata = Metadata.builder() - .put(IndexMetadata.builder("test").settings(settings(Version.CURRENT)).numberOfShards(2).numberOfReplicas(1)) + .put(IndexMetadata.builder("test").settings(indexSettings).numberOfShards(2).numberOfReplicas(1)) .build(); RoutingTable routingTable = RoutingTable.builder() .addAsNew(metadata.index("test")) .build(); - ClusterState clusterState = ClusterState.builder(ClusterName.CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata) + ClusterState clusterState = ClusterState.builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)).metadata(metadata) .routingTable(routingTable).build(); logger.info("--> adding two nodes with the same host"); @@ -88,6 +95,54 @@ public class SameShardRoutingTests extends ESAllocationTestCase { } } + public void testSameHostCheckDisabledByAutoExpandReplicas() { + final AllocationService strategy = createAllocationService( + Settings.builder().put(SameShardAllocationDecider.CLUSTER_ROUTING_ALLOCATION_SAME_HOST_SETTING.getKey(), true).build()); + + final Metadata metadata = Metadata.builder() + .put(IndexMetadata.builder("test").settings(settings(Version.CURRENT) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 99) + .put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-all"))) + .build(); + + final DiscoveryNode node1 = new DiscoveryNode( + "node1", + "node1", + "node1", + "test1", + "test1", + buildNewFakeTransportAddress(), + emptyMap(), + MASTER_DATA_ROLES, + Version.CURRENT); + + + final DiscoveryNode node2 = new DiscoveryNode( + "node2", + "node2", + "node2", + "test1", + "test1", + buildNewFakeTransportAddress(), + emptyMap(), + MASTER_DATA_ROLES, + Version.CURRENT); + + final ClusterState clusterState = applyStartedShardsUntilNoChange(ClusterState + .builder(CLUSTER_NAME_SETTING.getDefault(Settings.EMPTY)) + .metadata(metadata) + .routingTable(RoutingTable.builder() + .addAsNew(metadata.index("test")) + .build()) + .nodes( + DiscoveryNodes.builder() + .add(node1) + .add(node2)).build(), strategy); + + assertThat(clusterState.getRoutingNodes().shardsWithState(UNASSIGNED), empty()); + } + public void testForceAllocatePrimaryOnSameNodeNotAllowed() { SameShardAllocationDecider decider = new SameShardAllocationDecider( Settings.EMPTY, new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS));