mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 17:34:17 -04:00
Propagate status codes from shard failures appropriately (#118016)
* Return 502 if the underlying error is `NodeNotConnectedException` * Traverse through the cause stack trace and check for `NodeNotConnectedException` and undo `status()` override in `NodeDisconnectedException` * Rewrite `while` condition * Fix: precommit * Let status codes propagate rather than walking the stacktrace explicitly Walking the stacktrace explicitly and looking for a specific error (node connection-related errors in this case) is a workaround rather than a proper fix. Instead, let the status codes propagate all the way to the top so that they can be reported as-is. * Fix: unused import * Fix null deref * Do not map descendants of `ConnectTransportException` to `502` * Fix: precommit * Do not account for 4xx status codes 4xx status codes are not likely to appear along with 5xx status codes. As a result, we do not need to account for them when looking at shard failures' status codes. * Remove unnecessary `switch` case In reference to the previous commit: this case is no longer needed. * Rewrite code comment * Address review comments * [CI] Auto commit changes from spotless * Update docs/changelog/118016.yaml --------- Co-authored-by: elasticsearchmachine <infra-root+elasticsearchmachine@elastic.co>
This commit is contained in:
parent
c662aecac3
commit
22990df7f2
3 changed files with 74 additions and 7 deletions
6
docs/changelog/118016.yaml
Normal file
6
docs/changelog/118016.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
pr: 118016
|
||||
summary: Propagate status codes from shard failures appropriately
|
||||
area: Search
|
||||
type: enhancement
|
||||
issues:
|
||||
- 118482
|
|
@ -73,15 +73,21 @@ public class SearchPhaseExecutionException extends ElasticsearchException {
|
|||
// on coordinator node. so get the status from cause instead of returning SERVICE_UNAVAILABLE blindly
|
||||
return getCause() == null ? RestStatus.SERVICE_UNAVAILABLE : ExceptionsHelper.status(getCause());
|
||||
}
|
||||
RestStatus status = shardFailures[0].status();
|
||||
if (shardFailures.length > 1) {
|
||||
for (int i = 1; i < shardFailures.length; i++) {
|
||||
if (shardFailures[i].status().getStatus() >= RestStatus.INTERNAL_SERVER_ERROR.getStatus()) {
|
||||
status = shardFailures[i].status();
|
||||
}
|
||||
RestStatus status = null;
|
||||
for (ShardSearchFailure shardFailure : shardFailures) {
|
||||
RestStatus shardStatus = shardFailure.status();
|
||||
int statusCode = shardStatus.getStatus();
|
||||
|
||||
// Return if it's an error that can be retried.
|
||||
// These currently take precedence over other status code(s).
|
||||
if (statusCode >= 502 && statusCode <= 504) {
|
||||
return shardStatus;
|
||||
} else if (statusCode >= 500) {
|
||||
status = shardStatus;
|
||||
}
|
||||
}
|
||||
return status;
|
||||
|
||||
return status == null ? shardFailures[0].status() : status;
|
||||
}
|
||||
|
||||
public ShardSearchFailure[] shardFailures() {
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.elasticsearch.indices.InvalidIndexTemplateException;
|
|||
import org.elasticsearch.rest.RestStatus;
|
||||
import org.elasticsearch.search.SearchShardTarget;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.transport.NodeDisconnectedException;
|
||||
import org.elasticsearch.xcontent.ToXContent;
|
||||
import org.elasticsearch.xcontent.XContent;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
@ -31,6 +32,7 @@ import java.io.IOException;
|
|||
|
||||
import static org.hamcrest.CoreMatchers.hasItem;
|
||||
import static org.hamcrest.Matchers.hasSize;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class SearchPhaseExecutionExceptionTests extends ESTestCase {
|
||||
|
||||
|
@ -168,4 +170,57 @@ public class SearchPhaseExecutionExceptionTests extends ESTestCase {
|
|||
|
||||
assertEquals(actual.status(), RestStatus.BAD_REQUEST);
|
||||
}
|
||||
|
||||
public void testOnlyWithCodesThatDoNotRequirePrecedence() {
|
||||
int pickedIndex = randomIntBetween(0, 1);
|
||||
|
||||
// Pick one of these exceptions randomly.
|
||||
var searchExceptions = new ElasticsearchException[] {
|
||||
new ElasticsearchException("simulated"),
|
||||
new NodeDisconnectedException(null, "unused message", "unused action", null) };
|
||||
|
||||
// Status codes that map to searchExceptions.
|
||||
var expectedStatusCodes = new RestStatus[] { RestStatus.INTERNAL_SERVER_ERROR, RestStatus.BAD_GATEWAY };
|
||||
|
||||
ShardSearchFailure shardFailure1 = new ShardSearchFailure(
|
||||
searchExceptions[pickedIndex],
|
||||
new SearchShardTarget("nodeID", new ShardId("someIndex", "someUUID", 1), null)
|
||||
);
|
||||
|
||||
ShardSearchFailure shardFailure2 = new ShardSearchFailure(
|
||||
searchExceptions[pickedIndex],
|
||||
new SearchShardTarget("nodeID", new ShardId("someIndex", "someUUID", 2), null)
|
||||
);
|
||||
|
||||
SearchPhaseExecutionException ex = new SearchPhaseExecutionException(
|
||||
"search",
|
||||
"all shards failed",
|
||||
new ShardSearchFailure[] { shardFailure1, shardFailure2 }
|
||||
);
|
||||
|
||||
assertThat(ex.status(), is(expectedStatusCodes[pickedIndex]));
|
||||
}
|
||||
|
||||
public void testWithRetriableCodesThatTakePrecedence() {
|
||||
// Maps to a 500.
|
||||
ShardSearchFailure shardFailure1 = new ShardSearchFailure(
|
||||
new ElasticsearchException("simulated"),
|
||||
new SearchShardTarget("nodeID", new ShardId("someIndex", "someUUID", 1), null)
|
||||
);
|
||||
|
||||
// Maps to a 502.
|
||||
ShardSearchFailure shardFailure2 = new ShardSearchFailure(
|
||||
new NodeDisconnectedException(null, "unused message", "unused action", null),
|
||||
new SearchShardTarget("nodeID", new ShardId("someIndex", "someUUID", 2), null)
|
||||
);
|
||||
|
||||
SearchPhaseExecutionException ex = new SearchPhaseExecutionException(
|
||||
"search",
|
||||
"all shards failed",
|
||||
new ShardSearchFailure[] { shardFailure1, shardFailure2 }
|
||||
);
|
||||
|
||||
// The 502 takes precedence over 500.
|
||||
assertThat(ex.status(), is(RestStatus.BAD_GATEWAY));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue