Return 200 OK response code for a cluster health timeout (#78968)

Returning 408 for a cluster health timeout was deprecated in #78180 and backported to 7.x in #78940

Now we can do a breaking change in 8.0 respecting the user choice to run ES in 7.x compatible mode via the REST Compatibility layer.

Fixes #70849
This commit is contained in:
Artem Prigoda 2021-11-06 19:46:27 +01:00 committed by GitHub
parent 733381bed2
commit a2c3daead1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 110 additions and 38 deletions

View file

@ -334,14 +334,9 @@ public class ClusterClientIT extends ESRestHighLevelClientTestCase {
assertThat(response, notNullValue());
assertThat(response.isTimedOut(), equalTo(true));
assertThat(response.status(), equalTo(RestStatus.REQUEST_TIMEOUT));
assertThat(response.status(), equalTo(RestStatus.OK));
assertThat(response.getStatus(), equalTo(ClusterHealthStatus.RED));
assertNoIndices(response);
assertCriticalWarnings(
"The HTTP status code for a cluster health timeout will be changed from 408 to 200 in a "
+ "future version. Set the [return_200_for_cluster_health_timeout] query parameter to [true] to suppress this message and "
+ "opt in to the future behaviour now."
);
}
public void testRemoteInfo() throws Exception {

20
docs/changelog/78968.yaml Normal file
View file

@ -0,0 +1,20 @@
pr: 78968
summary: HTTP Status code has changed for the Cluster Health API in case of a server timeout
area: CRUD
type: breaking
issues: []
breaking:
title: HTTP Status code has changed for the Cluster Health API in case of a server timeout
area: API
details: |-
The cluster health API includes options for waiting
for certain health conditions to be satisfied. If the requested conditions are
not satisfied within a timeout then ES will send back a normal response
including the field `"timed_out": true`. In earlier versions it would also use
the HTTP response code `408 Request timeout` if the request timed out, and `200
OK` otherwise. The `408 Request timeout` response code is not appropriate for
this situation, so from version 8.0.0 ES will use the response code `200 OK`
for both cases.
impact: |-
To detect a server timeout, check the `timed_out` field of the JSON response.
notable: true

View file

@ -32,4 +32,21 @@ time out.
*Impact* +
Do not set `cluster.join.timeout` in your `elasticsearch.yml` file.
====
.HTTP Status code has changed for the Cluster Health API in case of a server timeout.
[%collapsible]
====
*Details* +
The {ref}/cluster-health.html[cluster health API] includes options for waiting
for certain health conditions to be satisfied. If the requested conditions are
not satisfied within a timeout then {es} will send back a normal response
including the field `"timed_out": true`. In earlier versions it would also use
the HTTP response code `408 Request timeout` if the request timed out, and `200
OK` otherwise. The `408 Request timeout` response code is not appropriate for
this situation, so from version 8.0.0 {es} will use the response code `200 OK`
for both cases.
*Impact* +
To detect a server timeout, check the `timed_out` field of the JSON response.
====
// end::notable-breaking-changes[]

View file

@ -1,7 +1,9 @@
---
"cluster health request timeout on waiting for nodes":
- skip:
version: " - 8.0.99"
reason: "Set for 7.99.99 when back-ported to 8.0"
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 10
timeout: 1ms
@ -19,8 +21,10 @@
---
"cluster health request timeout waiting for active shards":
- skip:
version: " - 8.0.99"
reason: "Set for 7.99.99 when back-ported to 8.0"
- do:
catch: request_timeout
cluster.health:
timeout: 1ms
wait_for_active_shards: 5

View file

@ -16,11 +16,10 @@ import org.elasticsearch.cluster.health.ClusterStateHealth;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.xcontent.StatusToXContentObject;
import org.elasticsearch.core.RestApiVersion;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.rest.action.search.RestSearchAction;
import org.elasticsearch.xcontent.ConstructingObjectParser;
import org.elasticsearch.xcontent.ObjectParser;
import org.elasticsearch.xcontent.ParseField;
@ -57,7 +56,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
private static final String INITIALIZING_SHARDS = "initializing_shards";
private static final String UNASSIGNED_SHARDS = "unassigned_shards";
private static final String INDICES = "indices";
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);
private static final ConstructingObjectParser<ClusterHealthResponse, Void> PARSER = new ConstructingObjectParser<>(
"cluster_health_response",
@ -122,12 +120,6 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
XContentParser parser,
Void context,
String index) -> ClusterIndexHealth.innerFromXContent(parser, index);
static final String ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY = "return_200_for_cluster_health_timeout";
static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "The HTTP status code for a cluster health timeout "
+ "will be changed from 408 to 200 in a future version. Set the ["
+ ES_CLUSTER_HEALTH_REQUEST_TIMEOUT_200_KEY
+ "] "
+ "query parameter to [true] to suppress this message and opt in to the future behaviour now.";
static {
// ClusterStateHealth fields
@ -351,15 +343,16 @@ public class ClusterHealthResponse extends ActionResponse implements StatusToXCo
@Override
public RestStatus status() {
if (isTimedOut() == false) {
return RestStatus.OK;
}
if (return200ForClusterHealthTimeout) {
return RestStatus.OK;
} else {
deprecationLogger.compatibleCritical("cluster_health_request_timeout", CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
return status(RestApiVersion.current());
}
@Override
public RestStatus status(RestApiVersion restApiVersion) {
// Legacy behaviour
if (isTimedOut() && restApiVersion == RestApiVersion.V_7 && return200ForClusterHealthTimeout == false) {
return RestStatus.REQUEST_TIMEOUT;
}
return RestStatus.OK;
}
@Override

View file

@ -7,6 +7,7 @@
*/
package org.elasticsearch.common.xcontent;
import org.elasticsearch.core.RestApiVersion;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.xcontent.ToXContentObject;
@ -20,4 +21,8 @@ public interface StatusToXContentObject extends ToXContentObject {
* Returns the REST status to make sure it is returned correctly
*/
RestStatus status();
default RestStatus status(RestApiVersion restApiVersion) {
return status();
}
}

View file

@ -44,7 +44,7 @@ public class RestStatusToXContentListener<Response extends StatusToXContentObjec
public RestResponse buildResponse(Response response, XContentBuilder builder) throws Exception {
assert response.isFragment() == false; // would be nice if we could make default methods final
response.toXContent(builder, channel.request());
RestResponse restResponse = new BytesRestResponse(response.status(), builder);
RestResponse restResponse = new BytesRestResponse(response.status(builder.getRestApiVersion()), builder);
if (RestStatus.CREATED == restResponse.status()) {
final String location = extractLocation.apply(response);
if (location != null) {

View file

@ -15,9 +15,12 @@ import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestRequest;
import org.elasticsearch.rest.action.RestStatusToXContentListener;
import org.elasticsearch.rest.action.search.RestSearchAction;
import java.io.IOException;
import java.util.Collections;
@ -30,6 +33,12 @@ import static org.elasticsearch.rest.RestRequest.Method.GET;
public class RestClusterHealthAction extends BaseRestHandler {
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class);
private static final String RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT = "return_200_for_cluster_health_timeout";
private static final String CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG = "the ["
+ RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT
+ "] parameter is deprecated and will be removed in a future release.";
@Override
public List<Route> routes() {
return List.of(new Route(GET, "/_cluster/health"), new Route(GET, "/_cluster/health/{index}"));
@ -81,9 +90,15 @@ public class RestClusterHealthAction extends BaseRestHandler {
if (request.param("wait_for_events") != null) {
clusterHealthRequest.waitForEvents(Priority.valueOf(request.param("wait_for_events").toUpperCase(Locale.ROOT)));
}
clusterHealthRequest.return200ForClusterHealthTimeout(
request.paramAsBoolean("return_200_for_cluster_health_timeout", clusterHealthRequest.doesReturn200ForClusterHealthTimeout())
);
String return200ForClusterHealthTimeout = request.param(RETURN_200_FOR_CLUSTER_HEALTH_TIMEOUT);
if (return200ForClusterHealthTimeout != null) {
deprecationLogger.warn(
DeprecationCategory.API,
"cluster_health_request_timeout",
CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG
);
}
clusterHealthRequest.return200ForClusterHealthTimeout(Boolean.parseBoolean(return200ForClusterHealthTimeout));
return clusterHealthRequest;
}

View file

@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.RestApiVersion;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.test.AbstractSerializingTestCase;
@ -43,16 +44,11 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo;
public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<ClusterHealthResponse> {
private final ClusterHealthRequest.Level level = randomFrom(ClusterHealthRequest.Level.values());
public void testIsTimeout() {
public void testIsTimeoutReturns200ByDefault() {
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
if (res.isTimedOut()) {
assertEquals(RestStatus.REQUEST_TIMEOUT, res.status());
assertCriticalWarnings(ClusterHealthResponse.CLUSTER_HEALTH_REQUEST_TIMEOUT_DEPRECATION_MSG);
} else {
assertEquals(RestStatus.OK, res.status());
}
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8));
}
}
@ -60,7 +56,27 @@ public class ClusterHealthResponsesTests extends AbstractSerializingTestCase<Clu
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
assertEquals(RestStatus.OK, res.status());
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_8));
}
}
public void testTimeoutReturns200InIfOptedInV7CompatibilityMode() {
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, true);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7));
}
}
public void testTimeoutReturns408InV7CompatibilityMode() {
ClusterHealthResponse res = new ClusterHealthResponse("", Strings.EMPTY_ARRAY, ClusterState.EMPTY_STATE, false);
for (int i = 0; i < 5; i++) {
res.setTimedOut(randomBoolean());
if (res.isTimedOut()) {
assertEquals(RestStatus.REQUEST_TIMEOUT, res.status(RestApiVersion.V_7));
} else {
assertEquals(RestStatus.OK, res.status(RestApiVersion.V_7));
}
}
}

View file

@ -8,6 +8,7 @@
package org.elasticsearch.rest.action.admin.cluster;
import org.apache.logging.log4j.Level;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthRequest;
import org.elasticsearch.action.support.ActiveShardCount;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
@ -68,6 +69,14 @@ public class RestClusterHealthActionTests extends ESTestCase {
assertThat(clusterHealthRequest.waitForNodes(), equalTo(waitForNodes));
assertThat(clusterHealthRequest.waitForEvents(), equalTo(waitForEvents));
assertThat(clusterHealthRequest.doesReturn200ForClusterHealthTimeout(), equalTo(requestTimeout200));
assertWarnings(
true,
new DeprecationWarning(
Level.WARN,
"the [return_200_for_cluster_health_timeout] parameter is deprecated and will be removed in a future release."
)
);
}
private FakeRestRequest buildRestRequest(Map<String, String> params) {

View file

@ -126,7 +126,6 @@ teardown:
# this is a hacky way to sleep for 5s, since we will never have 10 nodes
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 10
timeout: "5s"
@ -286,7 +285,6 @@ teardown:
# this is a hacky way to sleep for 5s, since we will never have 10 nodes
- do:
catch: request_timeout
cluster.health:
wait_for_nodes: 10
timeout: "5s"