From 9aed799b51f53c0113d5bfc7ad3ab467a55fa080 Mon Sep 17 00:00:00 2001 From: Mary Gouseti Date: Tue, 18 Jul 2023 10:18:23 +0300 Subject: [PATCH] HealthPeriodicLogger disabled by default (#97722) Co-authored-by: Matt Culbreth --- .../health-diagnostic-settings.asciidoc | 4 + .../common/settings/ClusterSettings.java | 3 +- .../health/HealthPeriodicLogger.java | 41 ++++- .../health/HealthPeriodicLoggerTests.java | 149 +++++++++++------- 4 files changed, 127 insertions(+), 70 deletions(-) diff --git a/docs/reference/settings/health-diagnostic-settings.asciidoc b/docs/reference/settings/health-diagnostic-settings.asciidoc index e453e91acde5..9d39aee1e721 100644 --- a/docs/reference/settings/health-diagnostic-settings.asciidoc +++ b/docs/reference/settings/health-diagnostic-settings.asciidoc @@ -45,6 +45,10 @@ comprise its local health such as its disk usage. `health.ilm.max_retries_per_step`:: (<>) The minimum amount of times an index has retried by an {ilm-init} step before it is considered stagnant. Defaults to `100` +`health.periodic_logger.enabled`:: +(<>) Enables the health periodic logger, which logs the health statuses of each health indicator along with the top level one as observed by the Health API. +Defaults to `false`. + `health.periodic_logger.poll_interval`:: (<>, <>) How often {es} logs the health status of the cluster and of each health indicator as observed by the Health API. Defaults to `60s` (60 seconds). diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index e21850d60f05..9159ecaa0d77 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -568,7 +568,8 @@ public final class ClusterSettings extends AbstractScopedSettings { TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_NO_DELAY : null, TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_REUSE_ADDRESS : null, TcpTransport.isUntrustedRemoteClusterEnabled() ? RemoteClusterPortSettings.TCP_SEND_BUFFER_SIZE : null, - HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING, + HealthPeriodicLogger.POLL_INTERVAL_SETTING, + HealthPeriodicLogger.ENABLED_SETTING, DataStreamLifecycle.isEnabled() ? DataStreamLifecycle.CLUSTER_LIFECYCLE_DEFAULT_ROLLOVER_SETTING : null, IndicesClusterStateService.SHARD_LOCK_RETRY_INTERVAL_SETTING, IndicesClusterStateService.SHARD_LOCK_RETRY_TIMEOUT_SETTING, diff --git a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java index 681d3477944e..4d50764aa0cc 100644 --- a/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java +++ b/server/src/main/java/org/elasticsearch/health/HealthPeriodicLogger.java @@ -42,15 +42,21 @@ import java.util.concurrent.atomic.AtomicBoolean; public class HealthPeriodicLogger implements ClusterStateListener, Closeable, SchedulerEngine.Listener { public static final String HEALTH_FIELD_PREFIX = "elasticsearch.health"; - public static final String HEALTH_PERIODIC_LOGGER_POLL_INTERVAL = "health.periodic_logger.poll_interval"; - public static final Setting HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING = Setting.timeSetting( - HEALTH_PERIODIC_LOGGER_POLL_INTERVAL, + public static final Setting POLL_INTERVAL_SETTING = Setting.timeSetting( + "health.periodic_logger.poll_interval", TimeValue.timeValueSeconds(60), TimeValue.timeValueSeconds(15), Setting.Property.Dynamic, Setting.Property.NodeScope ); + public static final Setting ENABLED_SETTING = Setting.boolSetting( + "health.periodic_logger.enabled", + false, + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + /** * Name constant for the job HealthService schedules */ @@ -71,6 +77,7 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc private final SetOnce scheduler = new SetOnce<>(); private volatile TimeValue pollInterval; + private volatile boolean enabled; private static final Logger logger = LogManager.getLogger(HealthPeriodicLogger.class); @@ -89,16 +96,19 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc this.client = client; this.healthService = healthService; this.clock = Clock.systemUTC(); - this.pollInterval = HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING.get(settings); + this.pollInterval = POLL_INTERVAL_SETTING.get(settings); + this.enabled = ENABLED_SETTING.get(settings); } /** * Initializer method to avoid the publication of a self reference in the constructor. */ public void init() { - clusterService.addListener(this); - clusterService.getClusterSettings() - .addSettingsUpdateConsumer(HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING, this::updatePollInterval); + if (this.enabled) { + clusterService.addListener(this); + } + clusterService.getClusterSettings().addSettingsUpdateConsumer(ENABLED_SETTING, this::enable); + clusterService.getClusterSettings().addSettingsUpdateConsumer(POLL_INTERVAL_SETTING, this::updatePollInterval); } @Override @@ -137,7 +147,7 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc @Override public void triggered(SchedulerEngine.Event event) { - if (event.getJobName().equals(HEALTH_PERIODIC_LOGGER_JOB_NAME)) { + if (event.getJobName().equals(HEALTH_PERIODIC_LOGGER_JOB_NAME) && this.enabled) { this.tryToLogHealth(); } } @@ -229,6 +239,10 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc return; } + if (this.enabled == false) { + return; + } + // don't schedule the job if the node is shutting down if (isClusterServiceStoppedOrClosed()) { logger.trace( @@ -257,6 +271,17 @@ public class HealthPeriodicLogger implements ClusterStateListener, Closeable, Sc } } + private void enable(boolean enabled) { + this.enabled = enabled; + if (enabled) { + clusterService.addListener(this); + maybeScheduleJob(); + } else { + clusterService.removeListener(this); + maybeCancelJob(); + } + } + private void updatePollInterval(TimeValue newInterval) { this.pollInterval = newInterval; maybeScheduleJob(); diff --git a/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java b/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java index 9b21266a6128..846ed3e3021a 100644 --- a/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java +++ b/server/src/test/java/org/elasticsearch/health/HealthPeriodicLoggerTests.java @@ -24,8 +24,8 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.scheduler.SchedulerEngine; import org.elasticsearch.common.settings.ClusterSettings; -import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.MockLogAppender; import org.elasticsearch.threadpool.TestThreadPool; @@ -34,7 +34,6 @@ import org.junit.After; import org.junit.Before; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Map; @@ -62,7 +61,12 @@ public class HealthPeriodicLoggerTests extends ESTestCase { private ClusterService clusterService; private HealthPeriodicLogger testHealthPeriodicLogger; - private ClusterService testClusterService; + private ClusterSettings clusterSettings; + private final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build(); + private final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2") + .roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE)) + .build(); + private ClusterState stateWithLocalHealthNode; private NodeClient getTestClient() { return mock(NodeClient.class); @@ -75,22 +79,15 @@ public class HealthPeriodicLoggerTests extends ESTestCase { @Before public void setupServices() { threadPool = new TestThreadPool(getTestName()); - - Set> builtInClusterSettings = new HashSet<>(ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); - builtInClusterSettings.add(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_POLL_INTERVAL_SETTING); - ClusterSettings clusterSettings = new ClusterSettings(Settings.EMPTY, builtInClusterSettings); - this.clusterService = createClusterService(this.threadPool, clusterSettings); - + stateWithLocalHealthNode = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 }); + this.clusterSettings = new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + this.clusterService = createClusterService(stateWithLocalHealthNode, this.threadPool, clusterSettings); this.client = getTestClient(); - } @After public void cleanup() { clusterService.close(); - if (testClusterService != null) { - testClusterService.close(); - } if (testHealthPeriodicLogger != null) { testHealthPeriodicLogger.close(); } @@ -98,18 +95,6 @@ public class HealthPeriodicLoggerTests extends ESTestCase { client.close(); } - private List getTestIndicatorResults() { - var networkLatency = new HealthIndicatorResult("network_latency", GREEN, null, null, null, null); - var slowTasks = new HealthIndicatorResult("slow_task_assignment", YELLOW, null, null, null, null); - var shardsAvailable = new HealthIndicatorResult("shards_availability", GREEN, null, null, null, null); - - return List.of(networkLatency, slowTasks, shardsAvailable); - } - - private String makeHealthStatusString(String key) { - return String.format(Locale.ROOT, "%s.%s.status", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, key); - } - public void testConvertToLoggedFields() { var results = getTestIndicatorResults(); var overallStatus = HealthStatus.merge(results.stream().map(HealthIndicatorResult::status)); @@ -137,52 +122,76 @@ public class HealthPeriodicLoggerTests extends ESTestCase { public void testHealthNodeIsSelected() { HealthService testHealthService = this.getMockedHealthService(); - - // create a cluster topology - final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build(); - final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2") - .roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE)) - .build(); - ClusterState current = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 }); - - testClusterService = createClusterService(current, this.threadPool); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, testClusterService, client, testHealthService); - testHealthPeriodicLogger.init(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true); // test that it knows that it's not initially the health node assertFalse(testHealthPeriodicLogger.isHealthNode); // trigger a cluster change and recheck - testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", current, ClusterState.EMPTY_STATE)); + testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE)); assertTrue(testHealthPeriodicLogger.isHealthNode); } public void testJobScheduling() { HealthService testHealthService = this.getMockedHealthService(); - // create a cluster topology - final DiscoveryNode node1 = DiscoveryNodeUtils.builder("node_1").roles(Set.of(DiscoveryNodeRole.MASTER_ROLE)).build(); - final DiscoveryNode node2 = DiscoveryNodeUtils.builder("node_2") - .roles(Set.of(DiscoveryNodeRole.MASTER_ROLE, DiscoveryNodeRole.DATA_ROLE)) - .build(); - ClusterState current = ClusterStateCreationUtils.state(node2, node1, node2, new DiscoveryNode[] { node1, node2 }); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true); - testClusterService = createClusterService(current, this.threadPool); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, testClusterService, client, testHealthService); - testHealthPeriodicLogger.init(); - - testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", current, ClusterState.EMPTY_STATE)); + testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE)); assertTrue(testHealthPeriodicLogger.isHealthNode); SchedulerEngine scheduler = testHealthPeriodicLogger.getScheduler(); + assertNotNull(scheduler); assertTrue(scheduler.scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME)); ClusterState noHealthNode = ClusterStateCreationUtils.state(node2, node1, new DiscoveryNode[] { node1, node2 }); - testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", noHealthNode, current)); + testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", noHealthNode, stateWithLocalHealthNode)); assertFalse(testHealthPeriodicLogger.isHealthNode); assertFalse(scheduler.scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME)); } + public void testEnabled() { + HealthService testHealthService = this.getMockedHealthService(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, true); + + testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE)); + assertTrue(testHealthPeriodicLogger.isHealthNode); + + // disable it and then verify that the job is gone + { + this.clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), false).build()); + assertFalse( + testHealthPeriodicLogger.getScheduler().scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME) + ); + } + + // enable it and then verify that the job is created + { + this.clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), true).build()); + assertTrue( + testHealthPeriodicLogger.getScheduler().scheduledJobIds().contains(HealthPeriodicLogger.HEALTH_PERIODIC_LOGGER_JOB_NAME) + ); + } + } + + public void testUpdatePollInterval() { + HealthService testHealthService = this.getMockedHealthService(); + + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(clusterService, testHealthService, false); + assertNull(testHealthPeriodicLogger.getScheduler()); + + testHealthPeriodicLogger.clusterChanged(new ClusterChangedEvent("test", stateWithLocalHealthNode, ClusterState.EMPTY_STATE)); + assertTrue(testHealthPeriodicLogger.isHealthNode); + + // verify that updating the polling interval doesn't schedule the job + { + this.clusterSettings.applySettings( + Settings.builder().put(HealthPeriodicLogger.POLL_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(15)).build() + ); + assertNull(testHealthPeriodicLogger.getScheduler()); + } + } + public void testTriggeredJobCallsTryToLogHealth() throws Exception { AtomicBoolean listenerCalled = new AtomicBoolean(false); AtomicBoolean failureCalled = new AtomicBoolean(false); @@ -199,8 +208,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase { }; HealthService testHealthService = this.getMockedHealthService(); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService); - testHealthPeriodicLogger.init(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true); HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger); spyHealthPeriodicLogger.isHealthNode = true; @@ -221,8 +229,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase { HealthService testHealthService = this.getMockedHealthService(); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService); - testHealthPeriodicLogger.init(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true); HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger); spyHealthPeriodicLogger.isHealthNode = true; @@ -266,8 +273,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase { return null; }).when(testHealthService).getHealth(any(), isNull(), anyBoolean(), anyInt(), any()); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService); - testHealthPeriodicLogger.init(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true); HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger); spyHealthPeriodicLogger.isHealthNode = true; @@ -301,8 +307,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase { return null; }).when(testHealthService).getHealth(any(), isNull(), anyBoolean(), anyInt(), any()); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService); - testHealthPeriodicLogger.init(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true); HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger); spyHealthPeriodicLogger.isHealthNode = true; @@ -329,8 +334,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase { HealthService testHealthService = this.getMockedHealthService(); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService); - testHealthPeriodicLogger.init(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true); HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger); spyHealthPeriodicLogger.isHealthNode = true; @@ -398,8 +402,7 @@ public class HealthPeriodicLoggerTests extends ESTestCase { HealthService testHealthService = this.getMockedHealthService(); - testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, client, testHealthService); - testHealthPeriodicLogger.init(); + testHealthPeriodicLogger = createAndInitHealthPeriodicLogger(this.clusterService, testHealthService, true); HealthPeriodicLogger spyHealthPeriodicLogger = spy(testHealthPeriodicLogger); spyHealthPeriodicLogger.isHealthNode = true; @@ -420,4 +423,28 @@ public class HealthPeriodicLoggerTests extends ESTestCase { } + private List getTestIndicatorResults() { + var networkLatency = new HealthIndicatorResult("network_latency", GREEN, null, null, null, null); + var slowTasks = new HealthIndicatorResult("slow_task_assignment", YELLOW, null, null, null, null); + var shardsAvailable = new HealthIndicatorResult("shards_availability", GREEN, null, null, null, null); + + return List.of(networkLatency, slowTasks, shardsAvailable); + } + + private String makeHealthStatusString(String key) { + return String.format(Locale.ROOT, "%s.%s.status", HealthPeriodicLogger.HEALTH_FIELD_PREFIX, key); + } + + private HealthPeriodicLogger createAndInitHealthPeriodicLogger( + ClusterService clusterService, + HealthService testHealthService, + boolean enabled + ) { + testHealthPeriodicLogger = new HealthPeriodicLogger(Settings.EMPTY, clusterService, this.client, testHealthService); + testHealthPeriodicLogger.init(); + if (enabled) { + clusterSettings.applySettings(Settings.builder().put(HealthPeriodicLogger.ENABLED_SETTING.getKey(), true).build()); + } + return testHealthPeriodicLogger; + } }