HealthPeriodicLogger disabled by default (#97722)

Co-authored-by: Matt Culbreth <matt.culbreth@elastic.co>
This commit is contained in:
Mary Gouseti 2023-07-18 10:18:23 +03:00 committed by GitHub
parent 82cc93b998
commit 9aed799b51
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 127 additions and 70 deletions

View file

@ -45,6 +45,10 @@ comprise its local health such as its disk usage.
`health.ilm.max_retries_per_step`::
(<<cluster-update-settings,Dynamic>>) The minimum amount of times an index has retried by an {ilm-init} step before it is considered stagnant. Defaults to `100`
`health.periodic_logger.enabled`::
(<<cluster-update-settings,Dynamic>>) Enables the health periodic logger, which logs the health statuses of each health indicator along with the top level one as observed by the Health API.
Defaults to `false`.
`health.periodic_logger.poll_interval`::
(<<cluster-update-settings,Dynamic>>, <<time-units, time unit value>>) How often {es} logs the health status of the cluster and of each health indicator as observed by the Health API.
Defaults to `60s` (60 seconds).

View file

@ -568,7 +568,8 @@ public final class ClusterSettings extends AbstractScopedSettings {
TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_NO_DELAY : null,
TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_REUSE_ADDRESS : null,
TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_SEND_BUFFER_SIZE : null,
HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING,
HealthPeriodicLogger.POLL_INTERVAL_SETTING,
HealthPeriodicLogger.ENABLED_SETTING,
DataStreamLifecycle.isEnabled() ? DataStreamLifecycle.CLUSTER_LIFECYCLE_DEFAULT_ROLLOVER_SETTING : null,
IndicesClusterStateService.SHARD_LOCK_RETRY_INTERVAL_SETTING,
IndicesClusterStateService.SHARD_LOCK_RETRY_TIMEOUT_SETTING,

View file

@ -42,15 +42,21 @@ import java.util.concurrent.atomic.AtomicBoolean;
public class HealthPeriodicLogger implements ClusterStateListener, Closeable, SchedulerEngine.Listener {
public static final String HEALTH_FIELD_PREFIX = "elasticsearch.health";
public static final String HEALTH_PERIODIC_LOGGER_POLL_INTERVAL = "health.periodic_logger.poll_interval";
public static final Setting<TimeValue> HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING = Setting.timeSetting(
HEALTH_PERIODIC_LOGGER_POLL_INTERVAL,
public static final Setting<TimeValue> POLL_INTERVAL_SETTING = Setting.timeSetting(
"health.periodic_logger.poll_interval",
TimeValue.timeValueSeconds(60),
TimeValue.timeValueSeconds(15),
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
public static final Setting<Boolean> ENABLED_SETTING = Setting.boolSetting(
"health.periodic_logger.enabled",
false,
Setting.Property.Dynamic,
Setting.Property.NodeScope
);
/**
* Name constant for the job HealthService schedules
*/
@ -71,6 +77,7 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
private final SetOnce<SchedulerEngine> scheduler = new SetOnce<>();
private volatile TimeValue pollInterval;
private volatile boolean enabled;
private static final Logger logger = LogManager.getLogger(HealthPeriodicLogger.class);
@ -89,16 +96,19 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
this.client = client;
this.healthService = healthService;
this.clock = Clock.systemUTC();
this.pollInterval = HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING.get(settings);
this.pollInterval = POLL_INTERVAL_SETTING.get(settings);
this.enabled = ENABLED_SETTING.get(settings);
}
/**
* Initializer method to avoid the publication of a self reference in the constructor.
*/
public void init() {
if (this.enabled) {
clusterService.addListener(this);
clusterService.getClusterSettings()
.addSettingsUpdateConsumer(HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING, this::updatePollInterval);
}
clusterService.getClusterSettings().addSettingsUpdateConsumer(ENABLED_SETTING, this::enable);
clusterService.getClusterSettings().addSettingsUpdateConsumer(POLL_INTERVAL_SETTING, this::updatePollInterval);
}
@Override
@ -137,7 +147,7 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
@Override
public void triggered(SchedulerEngine.Event event) {
if (event.getJobName().equals(HEALTH_PERIODIC_LOGGER_JOB_NAME)) {
if (event.getJobName().equals(HEALTH_PERIODIC_LOGGER_JOB_NAME) && this.enabled) {
this.tryToLogHealth();
}
}
@ -229,6 +239,10 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
return;
}
if (this.enabled == false) {
return;
}
// don't schedule the job if the node is shutting down
if (isClusterServiceStoppedOrClosed()) {
logger.trace(
@ -257,6 +271,17 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc
}
}
private void enable(boolean enabled) {
this.enabled = enabled;
if (enabled) {
clusterService.addListener(this);
maybeScheduleJob();
} else {
clusterService.removeListener(this);
maybeCancelJob();
}
}
private void updatePollInterval(TimeValue newInterval) {
this.pollInterval = newInterval;
maybeScheduleJob();

View file

@ -24,8 +24,8 @@ import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.scheduler.SchedulerEngine;
import org.elasticsearch.common.settings.ClusterSettings;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.MockLogAppender;
import org.elasticsearch.threadpool.TestThreadPool;
@ -34,7 +34,6 @@ import org.junit.After;
import org.junit.Before;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -62,7 +61,12 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
private ClusterService clusterService;
private HealthPeriodicLogger testHealthPeriodicLogger;
private ClusterService testClusterService;
private ClusterSettings clusterSettings;
private final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build();
private final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2")
.roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE))
.build();
private ClusterState stateWithLocalHealthNode;
private NodeClient getTestClient() {
return mock(NodeClient.class);
@ -75,22 +79,15 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
@Before
public void setupServices() {
threadPool = new TestThreadPool(getTestName());
Set<Setting<?>> builtInClusterSettings = new HashSet<>(ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
builtInClusterSettings.add(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING);
ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, builtInClusterSettings);
this.clusterService = createClusterService(this.threadPool, clusterSettings);
stateWithLocalHealthNode = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 });
this.clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
this.clusterService = createClusterService(stateWithLocalHealthNode, this.threadPool, clusterSettings);
this.client = getTestClient();
}
@After
public void cleanup() {
clusterService.close();
if (testClusterService != null) {
testClusterService.close();
}
if (testHealthPeriodicLogger != null) {
testHealthPeriodicLogger.close();
}
@ -98,18 +95,6 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
client.close();
}
private List<HealthIndicatorResult> getTestIndicatorResults() {
var networkLatency = new HealthIndicatorResult("network_latency", GREEN, null, null, null, null);
var slowTasks = new HealthIndicatorResult("slow_task_assignment", YELLOW, null, null, null, null);
var shardsAvailable = new HealthIndicatorResult("shards_availability", GREEN, null, null, null, null);
return List.of(networkLatency, slowTasks, shardsAvailable);
}
private String makeHealthStatusString(String key) {
return String.format(Locale.ROOT, "%s.%s.status", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, key);
}
public void testConvertToLoggedFields() {
var results = getTestIndicatorResults();
var overallStatus = HealthStatus.merge(results.stream().map(HealthIndicatorResult::status));
@ -137,52 +122,76 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
public void testHealthNodeIsSelected() {
HealthService testHealthService = this.getMockedHealthService();
// create a cluster topology
final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build();
final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2")
.roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE))
.build();
ClusterState current = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 });
testClusterService = createClusterService(current, this.threadPool);
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, testClusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true);
// test that it knows that it's not initially the health node
assertFalse(testHealthPeriodicLogger.isHealthNode);
// trigger a cluster change and recheck
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", current, ClusterState.EMPTY_STATE));
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
assertTrue(testHealthPeriodicLogger.isHealthNode);
}
public void testJobScheduling() {
HealthService testHealthService = this.getMockedHealthService();
// create a cluster topology
final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build();
final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2")
.roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE))
.build();
ClusterState current = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 });
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true);
testClusterService = createClusterService(current, this.threadPool);
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, testClusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", current, ClusterState.EMPTY_STATE));
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
assertTrue(testHealthPeriodicLogger.isHealthNode);
SchedulerEngine scheduler = testHealthPeriodicLogger.getScheduler();
assertNotNull(scheduler);
assertTrue(scheduler.scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME));
ClusterState noHealthNode = ClusterStateCreationUtils.state(node2, node1, new DiscoveryNode[] { node1, node2 });
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", noHealthNode, current));
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", noHealthNode, stateWithLocalHealthNode));
assertFalse(testHealthPeriodicLogger.isHealthNode);
assertFalse(scheduler.scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME));
}
public void testEnabled() {
HealthService testHealthService = this.getMockedHealthService();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true);
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
assertTrue(testHealthPeriodicLogger.isHealthNode);
// disable it and then verify that the job is gone
{
this.clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), false).build());
assertFalse(
testHealthPeriodicLogger.getScheduler().scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME)
);
}
// enable it and then verify that the job is created
{
this.clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), true).build());
assertTrue(
testHealthPeriodicLogger.getScheduler().scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME)
);
}
}
public void testUpdatePollInterval() {
HealthService testHealthService = this.getMockedHealthService();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, false);
assertNull(testHealthPeriodicLogger.getScheduler());
testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE));
assertTrue(testHealthPeriodicLogger.isHealthNode);
// verify that updating the polling interval doesn't schedule the job
{
this.clusterSettings.applySettings(
Settings.builder().put(HealthPeriodicLogger.POLL_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(15)).build()
);
assertNull(testHealthPeriodicLogger.getScheduler());
}
}
public void testTriggeredJobCallsTryToLogHealth() throws Exception {
AtomicBoolean listenerCalled = new AtomicBoolean(false);
AtomicBoolean failureCalled = new AtomicBoolean(false);
@ -199,8 +208,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
};
HealthService testHealthService = this.getMockedHealthService();
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
spyHealthPeriodicLogger.isHealthNode = true;
@ -221,8 +229,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
HealthService testHealthService = this.getMockedHealthService();
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
spyHealthPeriodicLogger.isHealthNode = true;
@ -266,8 +273,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
return null;
}).when(testHealthService).getHealth(any(), isNull(), anyBoolean(), anyInt(), any());
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
spyHealthPeriodicLogger.isHealthNode = true;
@ -301,8 +307,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
return null;
}).when(testHealthService).getHealth(any(), isNull(), anyBoolean(), anyInt(), any());
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
spyHealthPeriodicLogger.isHealthNode = true;
@ -329,8 +334,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
HealthService testHealthService = this.getMockedHealthService();
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
spyHealthPeriodicLogger.isHealthNode = true;
@ -398,8 +402,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
HealthService testHealthService = this.getMockedHealthService();
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService);
testHealthPeriodicLogger.init();
testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true);
HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger);
spyHealthPeriodicLogger.isHealthNode = true;
@ -420,4 +423,28 @@ public class HealthPeriodicLoggerTests extends ESTestCase {
}
private List<HealthIndicatorResult> getTestIndicatorResults() {
var networkLatency = new HealthIndicatorResult("network_latency", GREEN, null, null, null, null);
var slowTasks = new HealthIndicatorResult("slow_task_assignment", YELLOW, null, null, null, null);
var shardsAvailable = new HealthIndicatorResult("shards_availability", GREEN, null, null, null, null);
return List.of(networkLatency, slowTasks, shardsAvailable);
}
private String makeHealthStatusString(String key) {
return String.format(Locale.ROOT, "%s.%s.status", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, key);
}
private HealthPeriodicLogger createAndInitHealthPeriodicLogger(
ClusterService clusterService,
HealthService testHealthService,
boolean enabled
) {
testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, this.client, testHealthService);
testHealthPeriodicLogger.init();
if (enabled) {
clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), true).build());
}
return testHealthPeriodicLogger;
}
}