Set recovery rate for dedicated cold nodes (#68480)

This commit sets the recovery rate for dedicated cold nodes. The goal is
here is enhance performance of recovery in a dedicated cold tier, where
we expect such nodes to be predominantly using searchable snapshots to
back the indices located on them. This commit follows a simple approach
where we increase the recovery rate as a function of the node size, for
nodes that appear to be dedicated cold nodes.
This commit is contained in:
Jason Tedor 2021-02-04 10:36:07 -05:00 committed by GitHub
parent fdb147ad6a
commit 6e94e67ae9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 3 deletions

View file

@ -18,7 +18,21 @@ You can view a list of in-progress and completed recoveries using the
`indices.recovery.max_bytes_per_sec`::
(<<cluster-update-settings,Dynamic>>) Limits total inbound and outbound
recovery traffic for each node. Applies to both peer recoveries as well
as snapshot recoveries (i.e., restores from a snapshot). Defaults to `40mb`.
as snapshot recoveries (i.e., restores from a snapshot). Defaults to `40mb`
unless the node is a <<cold-tier, dedicated cold node>> in which case the
default relates to the total memory available to the node:
.Recovery Rate for Cold Nodes
[options="header"]
|======
|total memory | default value
|<= 4 GB | 40 MB/s
|> 4 GB and <= 8 GB | 60 MB/s
|> 8 GB and <= 16 GB | 90 MB/s
|> 16 GB and <= 32 GB | 125 MB/s
|> 32 GB | 250 MB/s
|======
+
This limit applies to each node separately. If multiple nodes in a cluster
perform recoveries at the same time, the cluster's total recovery traffic may

View file

@ -12,6 +12,8 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.RateLimiter;
import org.apache.lucene.store.RateLimiter.SimpleRateLimiter;
import org.elasticsearch.bootstrap.JavaVersion;
import org.elasticsearch.cluster.node.DiscoveryNodeRole;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
@ -19,14 +21,60 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.monitor.os.OsProbe;
import org.elasticsearch.node.NodeRoleSettings;
import java.util.List;
import java.util.stream.Collectors;
public class RecoverySettings {
private static final Logger logger = LogManager.getLogger(RecoverySettings.class);
public static final Setting<ByteSizeValue> INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING =
Setting.byteSizeSetting("indices.recovery.max_bytes_per_sec", new ByteSizeValue(40, ByteSizeUnit.MB),
Property.Dynamic, Property.NodeScope);
Setting.byteSizeSetting(
"indices.recovery.max_bytes_per_sec",
s -> {
final ByteSizeValue defaultMaxBytesPerSec = new ByteSizeValue(40, ByteSizeUnit.MB);
final List<DiscoveryNodeRole> roles = NodeRoleSettings.NODE_ROLES_SETTING.get(s);
final List<DiscoveryNodeRole> dataRoles =
roles.stream().filter(DiscoveryNodeRole::canContainData).collect(Collectors.toUnmodifiableList());
if (dataRoles.isEmpty()) {
// if the node is not a data node, this value doesn't matter, use the default
return defaultMaxBytesPerSec.getStringRep();
}
if ((dataRoles.size() > 1 || dataRoles.get(0).roleName().equals("data_cold") == false) ||
roles.contains(DiscoveryNodeRole.MASTER_ROLE)) {
// if the node is not a dedicated cold node, use the default
return defaultMaxBytesPerSec.getStringRep();
}
/*
* Now we are looking at a node that has a single data role, that data role is the cold data role, and the node does not
* have the master role. In this case, we are going to set the recovery size as a function of the memory size. We are making
* an assumption here that the size of the instance is correlated with I/O resources. That is we are assuming that the
* larger the instance, the more disk and networking capacity it has available.
*/
if (JavaVersion.current().compareTo(JavaVersion.parse("14")) < 0) {
// prior to JDK 14, the JDK did not take into consideration container memory limits when reporting total system memory
return defaultMaxBytesPerSec.getStringRep();
}
final ByteSizeValue totalPhysicalMemory = new ByteSizeValue(OsProbe.getInstance().getTotalPhysicalMemorySize());
final ByteSizeValue maxBytesPerSec;
if (totalPhysicalMemory.compareTo(new ByteSizeValue(4, ByteSizeUnit.GB)) <= 0) {
maxBytesPerSec = new ByteSizeValue(40, ByteSizeUnit.MB);
} else if (totalPhysicalMemory.compareTo(new ByteSizeValue(8, ByteSizeUnit.GB)) <= 0) {
maxBytesPerSec = new ByteSizeValue(60, ByteSizeUnit.MB);
} else if (totalPhysicalMemory.compareTo(new ByteSizeValue(16, ByteSizeUnit.GB)) <= 0) {
maxBytesPerSec = new ByteSizeValue(90, ByteSizeUnit.MB);
} else if (totalPhysicalMemory.compareTo(new ByteSizeValue(32, ByteSizeUnit.GB)) <= 0) {
maxBytesPerSec = new ByteSizeValue(125, ByteSizeUnit.MB);
} else {
maxBytesPerSec = new ByteSizeValue(250, ByteSizeUnit.MB);
}
return maxBytesPerSec.getStringRep();
},
Property.Dynamic,
Property.NodeScope);
/**
* Controls the maximum number of file chunk requests that can be sent concurrently from the source node to the target node.