mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-22 22:27:47 -04:00
Handle cgroups v2 in OsProbe
(#76883)
Closes #76812. `OsProbe` was only capable of handle cgroup data in the v1 format. However, Debian 11 uses cgroups v2 by default, and Elasticsearch isn't capable of reporting any cgroup information. Therefore, add support for the v2 layout.
This commit is contained in:
parent
6c7f6922db
commit
eff6cd69e6
4 changed files with 299 additions and 98 deletions
|
@ -838,7 +838,6 @@ public class DockerTests extends PackagingTestCase {
|
|||
/**
|
||||
* Check that Elasticsearch reports per-node cgroup information.
|
||||
*/
|
||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/76812")
|
||||
public void test140CgroupOsStatsAreAvailable() throws Exception {
|
||||
waitForElasticsearch(installation);
|
||||
|
||||
|
|
|
@ -30,25 +30,31 @@ import java.util.Optional;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* The {@link OsProbe} class retrieves information about the physical and swap size of the machine
|
||||
* memory, as well as the system load average and cpu load.
|
||||
*
|
||||
* In some exceptional cases, it's possible the underlying native methods used by
|
||||
* <p>In some exceptional cases, it's possible the underlying native methods used by
|
||||
* {@link #getFreePhysicalMemorySize()}, {@link #getTotalPhysicalMemorySize()},
|
||||
* {@link #getFreeSwapSpaceSize()}, and {@link #getTotalSwapSpaceSize()} can return a
|
||||
* negative value. Because of this, we prevent those methods from returning negative values,
|
||||
* returning 0 instead.
|
||||
*
|
||||
* The OS can report a negative number in a number of cases:
|
||||
* - Non-supported OSes (HP-UX, or AIX)
|
||||
* - A failure of macOS to initialize host statistics
|
||||
* - An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
|
||||
* - An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
|
||||
* - An error case retrieving these values from a linux kernel
|
||||
* - A non-standard libc implementation not implementing the required values
|
||||
* For a more exhaustive explanation, see https://github.com/elastic/elasticsearch/pull/42725
|
||||
* <p>The OS can report a negative number in a number of cases:
|
||||
*
|
||||
* <ul>
|
||||
* <li>Non-supported OSes (HP-UX, or AIX)
|
||||
* <li>A failure of macOS to initialize host statistics
|
||||
* <li>An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
|
||||
* <li>An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
|
||||
* <li>An error case retrieving these values from a linux kernel
|
||||
* <li>A non-standard libc implementation not implementing the required values
|
||||
* </ul>
|
||||
*
|
||||
* <p>For a more exhaustive explanation, see <a href="https://github.com/elastic/elasticsearch/pull/42725"
|
||||
* >https://github.com/elastic/elasticsearch/pull/42725</a>
|
||||
*/
|
||||
public class OsProbe {
|
||||
|
||||
|
@ -178,7 +184,7 @@ public class OsProbe {
|
|||
final String procLoadAvg = readProcLoadavg();
|
||||
assert procLoadAvg.matches("(\\d+\\.\\d+\\s+){3}\\d+/\\d+\\s+\\d+");
|
||||
final String[] fields = procLoadAvg.split("\\s+");
|
||||
return new double[]{Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2])};
|
||||
return new double[] { Double.parseDouble(fields[0]), Double.parseDouble(fields[1]), Double.parseDouble(fields[2]) };
|
||||
} catch (final IOException e) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("error reading /proc/loadavg", e);
|
||||
|
@ -192,7 +198,7 @@ public class OsProbe {
|
|||
}
|
||||
try {
|
||||
final double oneMinuteLoadAverage = (double) getSystemLoadAverage.invoke(osMxBean);
|
||||
return new double[]{oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1, -1, -1};
|
||||
return new double[] { oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1, -1, -1 };
|
||||
} catch (IllegalAccessException | InvocationTargetException e) {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("error reading one minute load average from operating system", e);
|
||||
|
@ -318,6 +324,23 @@ public class OsProbe {
|
|||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/cpuacct", controlGroup, "cpuacct.usage"));
|
||||
}
|
||||
|
||||
private long[] getCgroupV2CpuLimit(String controlGroup) throws IOException {
|
||||
String entry = readCgroupV2CpuLimit(controlGroup);
|
||||
String[] parts = entry.split("\\s+");
|
||||
assert parts.length == 2 : "Expected 2 fields in [cpu.max]";
|
||||
|
||||
long[] values = new long[2];
|
||||
|
||||
values[0] = "max".equals(parts[0]) ? -1L : Long.parseLong(parts[0]);
|
||||
values[1] = Long.parseLong(parts[1]);
|
||||
return values;
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu.max")
|
||||
String readCgroupV2CpuLimit(String controlGroup) throws IOException {
|
||||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "cpu.max"));
|
||||
}
|
||||
|
||||
/**
|
||||
* The total period of time in microseconds for how frequently the Elasticsearch control group's access to CPU resources will be
|
||||
* reallocated.
|
||||
|
@ -454,6 +477,35 @@ public class OsProbe {
|
|||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.limit_in_bytes"));
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum amount of user memory (including file cache).
|
||||
* If there is no limit then some Linux versions return the maximum value that can be stored in an
|
||||
* unsigned 64 bit number, and this will overflow a long, hence the result type is <code>String</code>.
|
||||
* (The alternative would have been <code>BigInteger</code> but then it would not be possible to index
|
||||
* the OS stats document into Elasticsearch without losing information, as <code>BigInteger</code> is
|
||||
* not a supported Elasticsearch type.)
|
||||
*
|
||||
* @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
|
||||
* @return the maximum amount of user memory (including file cache)
|
||||
* @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group
|
||||
*/
|
||||
private String getCgroupV2MemoryLimitInBytes(final String controlGroup) throws IOException {
|
||||
return readSysFsCgroupV2MemoryLimitInBytes(controlGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the line from {@code memory.max} for the control group to which the Elasticsearch process belongs for the
|
||||
* {@code memory} subsystem. This line represents the maximum amount of user memory (including file cache).
|
||||
*
|
||||
* @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
|
||||
* @return the line from {@code memory.max}
|
||||
* @throws IOException if an I/O exception occurs reading {@code memory.max} for the control group
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/memory.max")
|
||||
String readSysFsCgroupV2MemoryLimitInBytes(final String controlGroup) throws IOException {
|
||||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "memory.max"));
|
||||
}
|
||||
|
||||
/**
|
||||
* The total current memory usage by processes in the cgroup (in bytes).
|
||||
* If there is no limit then some Linux versions return the maximum value that can be stored in an
|
||||
|
@ -483,6 +535,35 @@ public class OsProbe {
|
|||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/memory", controlGroup, "memory.usage_in_bytes"));
|
||||
}
|
||||
|
||||
/**
|
||||
* The total current memory usage by processes in the cgroup (in bytes).
|
||||
* If there is no limit then some Linux versions return the maximum value that can be stored in an
|
||||
* unsigned 64 bit number, and this will overflow a long, hence the result type is <code>String</code>.
|
||||
* (The alternative would have been <code>BigInteger</code> but then it would not be possible to index
|
||||
* the OS stats document into Elasticsearch without losing information, as <code>BigInteger</code> is
|
||||
* not a supported Elasticsearch type.)
|
||||
*
|
||||
* @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
|
||||
* @return the total current memory usage by processes in the cgroup (in bytes)
|
||||
* @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
|
||||
*/
|
||||
private String getCgroupV2MemoryUsageInBytes(final String controlGroup) throws IOException {
|
||||
return readSysFsCgroupV2MemoryUsageInBytes(controlGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the line from {@code memory.current} for the control group to which the Elasticsearch process belongs for the
|
||||
* {@code memory} subsystem. This line represents the total current memory usage by processes in the cgroup (in bytes).
|
||||
*
|
||||
* @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
|
||||
* @return the line from {@code memory.current}
|
||||
* @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/memory.current")
|
||||
String readSysFsCgroupV2MemoryUsageInBytes(final String controlGroup) throws IOException {
|
||||
return readSingleLine(PathUtils.get("/sys/fs/cgroup/", controlGroup, "memory.current"));
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, {@code /sys/fs/cgroup/cpu},
|
||||
* {@code /sys/fs/cgroup/cpuacct} and {@code /sys/fs/cgroup/memory}.
|
||||
|
@ -490,20 +571,58 @@ public class OsProbe {
|
|||
* @return {@code true} if the stats are available, otherwise {@code false}
|
||||
*/
|
||||
@SuppressForbidden(reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, /sys/fs/cgroup/cpuacct and /sys/fs/cgroup/memory")
|
||||
boolean areCgroupStatsAvailable() {
|
||||
boolean areCgroupStatsAvailable() throws IOException {
|
||||
if (Files.exists(PathUtils.get("/proc/self/cgroup")) == false) {
|
||||
return false;
|
||||
}
|
||||
if (Files.exists(PathUtils.get("/sys/fs/cgroup/cpu")) == false) {
|
||||
return false;
|
||||
|
||||
List<String> lines = readProcSelfCgroup();
|
||||
|
||||
// cgroup v2
|
||||
if (lines.size() == 1 && lines.get(0).startsWith("0::")) {
|
||||
return Stream.of("/sys/fs/cgroup/cpu.stat", "/sys/fs/cgroup/memory.stat").allMatch(path -> Files.exists(PathUtils.get(path)));
|
||||
}
|
||||
if (Files.exists(PathUtils.get("/sys/fs/cgroup/cpuacct")) == false) {
|
||||
return false;
|
||||
|
||||
return Stream.of("/sys/fs/cgroup/cpu", "/sys/fs/cgroup/cpuacct", "/sys/fs/cgroup/memory")
|
||||
.allMatch(path -> Files.exists(PathUtils.get(path)));
|
||||
}
|
||||
|
||||
/**
|
||||
* The CPU statistics for all tasks in the Elasticsearch control group.
|
||||
*
|
||||
* @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
|
||||
* @return the CPU statistics
|
||||
* @throws IOException if an I/O exception occurs reading {@code cpu.stat} for the control group
|
||||
*/
|
||||
private Map<String, Long> getCgroupV2CpuStats(String controlGroup) throws IOException {
|
||||
final List<String> lines = readCgroupV2CpuStats(controlGroup);
|
||||
final Map<String, Long> stats = new HashMap<>();
|
||||
|
||||
for (String line : lines) {
|
||||
String[] parts = line.split("\\s+");
|
||||
assert parts.length == 2 : "Corrupt cpu.stat line: [" + line + "]";
|
||||
stats.put(parts[0], Long.parseLong(parts[1]));
|
||||
}
|
||||
if (Files.exists(PathUtils.get("/sys/fs/cgroup/memory")) == false) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
final List<String> expectedKeys = org.elasticsearch.core.List.of(
|
||||
"nr_periods",
|
||||
"nr_throttled",
|
||||
"system_usec",
|
||||
"throttled_usec",
|
||||
"usage_usec",
|
||||
"user_usec"
|
||||
);
|
||||
expectedKeys.forEach(key -> {
|
||||
assert stats.containsKey(key) : key;
|
||||
assert stats.get(key) != -1 : stats.get(key);
|
||||
});
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "access /sys/fs/cgroup/cpu.stat")
|
||||
List<String> readCgroupV2CpuStats(final String controlGroup) throws IOException {
|
||||
return Files.readAllLines(PathUtils.get("/sys/fs/cgroup", controlGroup, "cpu.stat"));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -515,45 +634,79 @@ public class OsProbe {
|
|||
try {
|
||||
if (areCgroupStatsAvailable() == false) {
|
||||
return null;
|
||||
} else {
|
||||
final Map<String, String> controllerMap = getControlGroups();
|
||||
assert controllerMap.isEmpty() == false;
|
||||
}
|
||||
|
||||
final String cpuAcctControlGroup = controllerMap.get("cpuacct");
|
||||
final Map<String, String> controllerMap = getControlGroups();
|
||||
assert controllerMap.isEmpty() == false;
|
||||
|
||||
final String cpuAcctControlGroup;
|
||||
final long cgroupCpuAcctUsageNanos;
|
||||
final long cgroupCpuAcctCpuCfsPeriodMicros;
|
||||
final long cgroupCpuAcctCpuCfsQuotaMicros;
|
||||
final String cpuControlGroup;
|
||||
final OsStats.Cgroup.CpuStat cpuStat;
|
||||
final String memoryControlGroup;
|
||||
final String cgroupMemoryLimitInBytes;
|
||||
final String cgroupMemoryUsageInBytes;
|
||||
|
||||
if (controllerMap.size() == 1 && controllerMap.containsKey("")) {
|
||||
// There's a single hierarchy for all controllers
|
||||
cpuControlGroup = cpuAcctControlGroup = memoryControlGroup = controllerMap.get("");
|
||||
|
||||
// `cpuacct` was merged with `cpu` in v2
|
||||
final Map<String, Long> cpuStatsMap = getCgroupV2CpuStats(cpuControlGroup);
|
||||
|
||||
cgroupCpuAcctUsageNanos = cpuStatsMap.get("usage_usec");
|
||||
|
||||
long[] cpuLimits = getCgroupV2CpuLimit(cpuControlGroup);
|
||||
cgroupCpuAcctCpuCfsQuotaMicros = cpuLimits[0];
|
||||
cgroupCpuAcctCpuCfsPeriodMicros = cpuLimits[1];
|
||||
|
||||
cpuStat = new OsStats.Cgroup.CpuStat(
|
||||
cpuStatsMap.get("nr_periods"),
|
||||
cpuStatsMap.get("nr_throttled"),
|
||||
cpuStatsMap.get("throttled_usec")
|
||||
);
|
||||
|
||||
cgroupMemoryLimitInBytes = getCgroupV2MemoryLimitInBytes(memoryControlGroup);
|
||||
cgroupMemoryUsageInBytes = getCgroupV2MemoryUsageInBytes(memoryControlGroup);
|
||||
} else {
|
||||
cpuAcctControlGroup = controllerMap.get("cpuacct");
|
||||
if (cpuAcctControlGroup == null) {
|
||||
logger.debug("no [cpuacct] data found in cgroup stats");
|
||||
return null;
|
||||
}
|
||||
final long cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup);
|
||||
cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos(cpuAcctControlGroup);
|
||||
|
||||
final String cpuControlGroup = controllerMap.get("cpu");
|
||||
cpuControlGroup = controllerMap.get("cpu");
|
||||
if (cpuControlGroup == null) {
|
||||
logger.debug("no [cpu] data found in cgroup stats");
|
||||
return null;
|
||||
}
|
||||
final long cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup);
|
||||
final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup);
|
||||
final OsStats.Cgroup.CpuStat cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup);
|
||||
cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros(cpuControlGroup);
|
||||
cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros(cpuControlGroup);
|
||||
cpuStat = getCgroupCpuAcctCpuStat(cpuControlGroup);
|
||||
|
||||
final String memoryControlGroup = controllerMap.get("memory");
|
||||
memoryControlGroup = controllerMap.get("memory");
|
||||
if (memoryControlGroup == null) {
|
||||
logger.debug("no [memory] data found in cgroup stats");
|
||||
return null;
|
||||
}
|
||||
final String cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes(memoryControlGroup);
|
||||
final String cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes(memoryControlGroup);
|
||||
|
||||
return new OsStats.Cgroup(
|
||||
cpuAcctControlGroup,
|
||||
cgroupCpuAcctUsageNanos,
|
||||
cpuControlGroup,
|
||||
cgroupCpuAcctCpuCfsPeriodMicros,
|
||||
cgroupCpuAcctCpuCfsQuotaMicros,
|
||||
cpuStat,
|
||||
memoryControlGroup,
|
||||
cgroupMemoryLimitInBytes,
|
||||
cgroupMemoryUsageInBytes);
|
||||
cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes(memoryControlGroup);
|
||||
cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes(memoryControlGroup);
|
||||
}
|
||||
|
||||
return new OsStats.Cgroup(
|
||||
cpuAcctControlGroup,
|
||||
cgroupCpuAcctUsageNanos,
|
||||
cpuControlGroup,
|
||||
cgroupCpuAcctCpuCfsPeriodMicros,
|
||||
cgroupCpuAcctCpuCfsQuotaMicros,
|
||||
cpuStat,
|
||||
memoryControlGroup,
|
||||
cgroupMemoryLimitInBytes,
|
||||
cgroupMemoryUsageInBytes
|
||||
);
|
||||
} catch (final IOException e) {
|
||||
logger.debug("error reading control group stats", e);
|
||||
return null;
|
||||
|
@ -576,13 +729,14 @@ public class OsProbe {
|
|||
|
||||
OsInfo osInfo(long refreshInterval, int allocatedProcessors) throws IOException {
|
||||
return new OsInfo(
|
||||
refreshInterval,
|
||||
Runtime.getRuntime().availableProcessors(),
|
||||
allocatedProcessors,
|
||||
Constants.OS_NAME,
|
||||
getPrettyName(),
|
||||
Constants.OS_ARCH,
|
||||
Constants.OS_VERSION);
|
||||
refreshInterval,
|
||||
Runtime.getRuntime().availableProcessors(),
|
||||
allocatedProcessors,
|
||||
Constants.OS_NAME,
|
||||
getPrettyName(),
|
||||
Constants.OS_ARCH,
|
||||
Constants.OS_VERSION
|
||||
);
|
||||
}
|
||||
|
||||
private String getPrettyName() throws IOException {
|
||||
|
@ -594,11 +748,13 @@ public class OsProbe {
|
|||
* wrapped in single- or double-quotes.
|
||||
*/
|
||||
final List<String> etcOsReleaseLines = readOsRelease();
|
||||
final List<String> prettyNameLines =
|
||||
etcOsReleaseLines.stream().filter(line -> line.startsWith("PRETTY_NAME")).collect(Collectors.toList());
|
||||
final List<String> prettyNameLines = etcOsReleaseLines.stream()
|
||||
.filter(line -> line.startsWith("PRETTY_NAME"))
|
||||
.collect(Collectors.toList());
|
||||
assert prettyNameLines.size() <= 1 : prettyNameLines;
|
||||
final Optional<String> maybePrettyNameLine =
|
||||
prettyNameLines.size() == 1 ? Optional.of(prettyNameLines.get(0)) : Optional.empty();
|
||||
final Optional<String> maybePrettyNameLine = prettyNameLines.size() == 1
|
||||
? Optional.of(prettyNameLines.get(0))
|
||||
: Optional.empty();
|
||||
if (maybePrettyNameLine.isPresent()) {
|
||||
// we trim since some OS contain trailing space, for example, Oracle Linux Server 6.9 has a trailing space after the quote
|
||||
final String trimmedPrettyNameLine = maybePrettyNameLine.get().trim();
|
||||
|
@ -695,11 +851,15 @@ public class OsProbe {
|
|||
return Constants.LINUX && getPrettyName().equals("Debian GNU/Linux 8 (jessie)");
|
||||
}
|
||||
|
||||
OsStats.Cgroup getCgroup(boolean isLinux) {
|
||||
return isLinux ? getCgroup() : null;
|
||||
}
|
||||
|
||||
public OsStats osStats() {
|
||||
final OsStats.Cpu cpu = new OsStats.Cpu(getSystemCpuPercent(), getSystemLoadAverage());
|
||||
final OsStats.Mem mem = new OsStats.Mem(getTotalPhysicalMemorySize(), getFreePhysicalMemorySize());
|
||||
final OsStats.Swap swap = new OsStats.Swap(getTotalSwapSpaceSize(), getFreeSwapSpaceSize());
|
||||
final OsStats.Cgroup cgroup = Constants.LINUX ? getCgroup() : null;
|
||||
final OsStats.Cgroup cgroup = getCgroup(Constants.LINUX);
|
||||
return new OsStats(System.currentTimeMillis(), cpu, mem, swap, cgroup);
|
||||
}
|
||||
|
||||
|
|
|
@ -153,6 +153,13 @@ grant {
|
|||
permission java.io.FilePermission "/sys/fs/cgroup/memory", "read";
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/memory/-", "read";
|
||||
|
||||
// control group v2 stats on linux
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/cpu.max", "read";
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/cpu.stat", "read";
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/memory.current", "read";
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/memory.max", "read";
|
||||
permission java.io.FilePermission "/sys/fs/cgroup/memory.stat", "read";
|
||||
|
||||
// system memory on Linux systems affected by JDK bug (#66629)
|
||||
permission java.io.FilePermission "/proc/meminfo", "read";
|
||||
};
|
||||
|
|
|
@ -43,7 +43,7 @@ public class OsProbeTests extends ESTestCase {
|
|||
final OsProbe osProbe = new OsProbe() {
|
||||
|
||||
@Override
|
||||
List<String> readOsRelease() throws IOException {
|
||||
List<String> readOsRelease() {
|
||||
assert Constants.LINUX : Constants.OS_NAME;
|
||||
if (prettyName != null) {
|
||||
final String quote = randomFrom("\"", "'", "");
|
||||
|
@ -78,8 +78,10 @@ public class OsProbeTests extends ESTestCase {
|
|||
OsStats stats = osProbe.osStats();
|
||||
assertNotNull(stats);
|
||||
assertThat(stats.getTimestamp(), greaterThan(0L));
|
||||
assertThat(stats.getCpu().getPercent(), anyOf(equalTo((short) -1),
|
||||
is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100)))));
|
||||
assertThat(
|
||||
stats.getCpu().getPercent(),
|
||||
anyOf(equalTo((short) -1), is(both(greaterThanOrEqualTo((short) 0)).and(lessThanOrEqualTo((short) 100))))
|
||||
);
|
||||
double[] loadAverage = stats.getCpu().getLoadAverage();
|
||||
if (loadAverage != null) {
|
||||
assertThat(loadAverage.length, equalTo(3));
|
||||
|
@ -173,16 +175,14 @@ public class OsProbeTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testCgroupProbe() {
|
||||
assumeTrue("test runs on Linux only", Constants.LINUX);
|
||||
|
||||
final boolean areCgroupStatsAvailable = randomBoolean();
|
||||
final int availableCgroupsVersion = randomFrom(0, 1, 2);
|
||||
final String hierarchy = randomAlphaOfLength(16);
|
||||
|
||||
final OsProbe probe = buildStubOsProbe(areCgroupStatsAvailable, hierarchy);
|
||||
final OsProbe probe = buildStubOsProbe(availableCgroupsVersion, hierarchy);
|
||||
|
||||
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
|
||||
|
||||
if (areCgroupStatsAvailable) {
|
||||
if (availableCgroupsVersion > 0) {
|
||||
assertNotNull(cgroup);
|
||||
assertThat(cgroup.getCpuAcctControlGroup(), equalTo("/" + hierarchy));
|
||||
assertThat(cgroup.getCpuAcctUsageNanos(), equalTo(364869866063112L));
|
||||
|
@ -200,17 +200,14 @@ public class OsProbeTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testCgroupProbeWithMissingCpuAcct() {
|
||||
assumeTrue("test runs on Linux only", Constants.LINUX);
|
||||
|
||||
final String hierarchy = randomAlphaOfLength(16);
|
||||
|
||||
// This cgroup data is missing a line about cpuacct
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
|
||||
.stream()
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
|
||||
.map(line -> line.replaceFirst(",cpuacct", ""))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
|
||||
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
|
||||
|
||||
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
|
||||
|
||||
|
@ -218,18 +215,14 @@ public class OsProbeTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testCgroupProbeWithMissingCpu() {
|
||||
assumeTrue("test runs on Linux only", Constants.LINUX);
|
||||
|
||||
final String hierarchy = randomAlphaOfLength(16);
|
||||
|
||||
// This cgroup data is missing a line about cpu
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
|
||||
.stream()
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
|
||||
.map(line -> line.replaceFirst(":cpu,", ":"))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
|
||||
final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
|
||||
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
|
||||
|
||||
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
|
||||
|
||||
|
@ -237,17 +230,14 @@ public class OsProbeTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testCgroupProbeWithMissingMemory() {
|
||||
assumeTrue("test runs on Linux only", Constants.LINUX);
|
||||
|
||||
final String hierarchy = randomAlphaOfLength(16);
|
||||
|
||||
// This cgroup data is missing a line about memory
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy)
|
||||
.stream()
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(1, hierarchy).stream()
|
||||
.filter(line -> line.contains(":memory:") == false)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
final OsProbe probe = buildStubOsProbe(true, hierarchy, procSelfCgroupLines);
|
||||
final OsProbe probe = buildStubOsProbe(1, hierarchy, procSelfCgroupLines);
|
||||
|
||||
final OsStats.Cgroup cgroup = probe.osStats().getCgroup();
|
||||
|
||||
|
@ -255,6 +245,8 @@ public class OsProbeTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testGetTotalMemFromProcMeminfo() throws Exception {
|
||||
int cgroupsVersion = randomFrom(1, 2);
|
||||
|
||||
// missing MemTotal line
|
||||
List<String> meminfoLines = Arrays.asList(
|
||||
"MemFree: 8467692 kB",
|
||||
|
@ -265,7 +257,7 @@ public class OsProbeTests extends ESTestCase {
|
|||
"Active: 43637908 kB",
|
||||
"Inactive: 8130280 kB"
|
||||
);
|
||||
OsProbe probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
OsProbe probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
|
||||
|
||||
// MemTotal line with invalid value
|
||||
|
@ -279,7 +271,7 @@ public class OsProbeTests extends ESTestCase {
|
|||
"Active: 43637908 kB",
|
||||
"Inactive: 8130280 kB"
|
||||
);
|
||||
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
|
||||
|
||||
// MemTotal line with invalid unit
|
||||
|
@ -293,7 +285,7 @@ public class OsProbeTests extends ESTestCase {
|
|||
"Active: 43637908 kB",
|
||||
"Inactive: 8130280 kB"
|
||||
);
|
||||
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(0L));
|
||||
|
||||
// MemTotal line with random valid value
|
||||
|
@ -308,7 +300,7 @@ public class OsProbeTests extends ESTestCase {
|
|||
"Active: 43637908 kB",
|
||||
"Inactive: 8130280 kB"
|
||||
);
|
||||
probe = buildStubOsProbe(true, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
probe = buildStubOsProbe(cgroupsVersion, "", org.elasticsearch.core.List.of(), meminfoLines);
|
||||
assertThat(probe.getTotalMemFromProcMeminfo(), equalTo(memTotalInKb * 1024L));
|
||||
}
|
||||
|
||||
|
@ -319,7 +311,13 @@ public class OsProbeTests extends ESTestCase {
|
|||
assertThat(osProbe.getTotalPhysicalMemorySize(), greaterThan(0L));
|
||||
}
|
||||
|
||||
private static List<String> getProcSelfGroupLines(String hierarchy) {
|
||||
private static List<String> getProcSelfGroupLines(int cgroupsVersion, String hierarchy) {
|
||||
// It doesn't really matter if cgroupsVersion == 0 here
|
||||
|
||||
if (cgroupsVersion == 2) {
|
||||
return List.of("0::/" + hierarchy);
|
||||
}
|
||||
|
||||
return Arrays.asList(
|
||||
"10:freezer:/",
|
||||
"9:net_cls,net_prio:/",
|
||||
|
@ -331,32 +329,40 @@ public class OsProbeTests extends ESTestCase {
|
|||
"3:perf_event:/",
|
||||
"2:cpu,cpuacct,cpuset:/" + hierarchy,
|
||||
"1:name=systemd:/user.slice/user-1000.slice/session-2359.scope",
|
||||
"0::/cgroup2");
|
||||
"0::/cgroup2"
|
||||
);
|
||||
}
|
||||
|
||||
private static OsProbe buildStubOsProbe(final boolean areCgroupStatsAvailable, final String hierarchy) {
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(hierarchy);
|
||||
private static OsProbe buildStubOsProbe(final int availableCgroupsVersion, final String hierarchy) {
|
||||
List<String> procSelfCgroupLines = getProcSelfGroupLines(availableCgroupsVersion, hierarchy);
|
||||
|
||||
return buildStubOsProbe(areCgroupStatsAvailable, hierarchy, procSelfCgroupLines);
|
||||
return buildStubOsProbe(availableCgroupsVersion, hierarchy, procSelfCgroupLines);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a test instance of OsProbe. Methods that ordinarily read from the filesystem are overridden to return values based upon
|
||||
* the arguments to this method.
|
||||
*
|
||||
* @param areCgroupStatsAvailable whether or not cgroup data is available. Normally OsProbe establishes this for itself.
|
||||
* @param availableCgroupsVersion what version of cgroups are available, 1 or 2, or 0 for no cgroups. Normally OsProbe establishes this
|
||||
* for itself.
|
||||
* @param hierarchy a mock value used to generate a cgroup hierarchy.
|
||||
* @param procSelfCgroupLines the lines that will be used as the content of <code>/proc/self/cgroup</code>
|
||||
* @param procMeminfoLines lines that will be used as the content of <code>/proc/meminfo</code>
|
||||
* @return a test instance
|
||||
*/
|
||||
private static OsProbe buildStubOsProbe(
|
||||
final boolean areCgroupStatsAvailable,
|
||||
final int availableCgroupsVersion,
|
||||
final String hierarchy,
|
||||
List<String> procSelfCgroupLines,
|
||||
List<String> procMeminfoLines
|
||||
) {
|
||||
return new OsProbe() {
|
||||
@Override
|
||||
OsStats.Cgroup getCgroup(boolean isLinux) {
|
||||
// Pretend we're always on Linux so that we can run the cgroup tests
|
||||
return super.getCgroup(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
List<String> readProcSelfCgroup() {
|
||||
return procSelfCgroupLines;
|
||||
|
@ -382,10 +388,7 @@ public class OsProbeTests extends ESTestCase {
|
|||
|
||||
@Override
|
||||
List<String> readSysFsCgroupCpuAcctCpuStat(String controlGroup) {
|
||||
return Arrays.asList(
|
||||
"nr_periods 17992",
|
||||
"nr_throttled 1311",
|
||||
"throttled_time 139298645489");
|
||||
return Arrays.asList("nr_periods 17992", "nr_throttled 1311", "throttled_time 139298645489");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -403,18 +406,50 @@ public class OsProbeTests extends ESTestCase {
|
|||
|
||||
@Override
|
||||
boolean areCgroupStatsAvailable() {
|
||||
return areCgroupStatsAvailable;
|
||||
return availableCgroupsVersion > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
List<String> readProcMeminfo() throws IOException {
|
||||
List<String> readProcMeminfo() {
|
||||
return procMeminfoLines;
|
||||
}
|
||||
|
||||
@Override
|
||||
String readSysFsCgroupV2MemoryLimitInBytes(String controlGroup) {
|
||||
assertThat(controlGroup, equalTo("/" + hierarchy));
|
||||
// This is the highest value that can be stored in an unsigned 64 bit number, hence too big for long
|
||||
return "18446744073709551615";
|
||||
}
|
||||
|
||||
@Override
|
||||
String readSysFsCgroupV2MemoryUsageInBytes(String controlGroup) {
|
||||
assertThat(controlGroup, equalTo("/" + hierarchy));
|
||||
return "4796416";
|
||||
}
|
||||
|
||||
@Override
|
||||
List<String> readCgroupV2CpuStats(String controlGroup) {
|
||||
assertThat(controlGroup, equalTo("/" + hierarchy));
|
||||
return List.of(
|
||||
"usage_usec 364869866063112",
|
||||
"user_usec 34636",
|
||||
"system_usec 9896",
|
||||
"nr_periods 17992",
|
||||
"nr_throttled 1311",
|
||||
"throttled_usec 139298645489"
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
String readCgroupV2CpuLimit(String controlGroup) {
|
||||
assertThat(controlGroup, equalTo("/" + hierarchy));
|
||||
return "50000 100000";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static OsProbe buildStubOsProbe(
|
||||
final boolean areCgroupStatsAvailable,
|
||||
final int availableCgroupsVersion,
|
||||
final String hierarchy,
|
||||
List<String> procSelfCgroupLines
|
||||
) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue