mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-29 01:44:36 -04:00
Graph: fix race condition in timeout (#88946)
Previously `graph` checked if the request timed out, then spent some time doing work, then passed the timeout on to the next request. Over and over again. It's quite possible that the response may not have timed out for the first check but would have timed out for the second check. This manifests as the timeout being sent to the next hop being a negative number of milliseconds. We don't allow this sort of thing. This fixes this by moving the timeout check to the same spot it is read for setting the timeout on the next request - we just check if its `> 0` to find the timeouts. This does keep the request running slightly longer after it's officially timed out - but it's just long enough to prepare the next layer of request. Usually microseconds. Which should be fine. Closes #55396
This commit is contained in:
parent
f1071cab36
commit
09d00259f4
3 changed files with 23 additions and 28 deletions
6
docs/changelog/88946.yaml
Normal file
6
docs/changelog/88946.yaml
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
pr: 88946
|
||||||
|
summary: "Graph: fix race condition in timeout"
|
||||||
|
area: Graph
|
||||||
|
type: bug
|
||||||
|
issues:
|
||||||
|
- 55396
|
|
@ -222,19 +222,18 @@ public class GraphTests extends ESSingleNodeTestCase {
|
||||||
assertNull("Elvis is a 3rd tier connection so should not be returned here", response.getVertex(Vertex.createId("people", "elvis")));
|
assertNull("Elvis is a 3rd tier connection so should not be returned here", response.getVertex(Vertex.createId("people", "elvis")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/55396")
|
|
||||||
public void testTimedoutQueryCrawl() {
|
public void testTimedoutQueryCrawl() {
|
||||||
GraphExploreRequestBuilder grb = new GraphExploreRequestBuilder(client(), GraphExploreAction.INSTANCE).setIndices("test");
|
GraphExploreRequestBuilder grb = new GraphExploreRequestBuilder(client(), GraphExploreAction.INSTANCE).setIndices("test");
|
||||||
grb.setTimeout(TimeValue.timeValueMillis(400));
|
grb.setTimeout(TimeValue.timeValueMillis(400));
|
||||||
Hop hop1 = grb.createNextHop(QueryBuilders.termQuery("description", "beatles"));
|
Hop hop1 = grb.createNextHop(QueryBuilders.termQuery("description", "beatles"));
|
||||||
hop1.addVertexRequest("people").size(10).minDocCount(1); // members of beatles
|
hop1.addVertexRequest("people").size(10).minDocCount(1); // members of beatles
|
||||||
// 00s friends of beatles
|
|
||||||
grb.createNextHop(QueryBuilders.termQuery("decade", "00s")).addVertexRequest("people").size(100).minDocCount(1);
|
|
||||||
// A query that should cause a timeout
|
// A query that should cause a timeout
|
||||||
ScriptQueryBuilder timeoutQuery = QueryBuilders.scriptQuery(
|
ScriptQueryBuilder timeoutQuery = QueryBuilders.scriptQuery(
|
||||||
new Script(ScriptType.INLINE, "mockscript", "graph_timeout", Collections.emptyMap())
|
new Script(ScriptType.INLINE, "mockscript", "graph_timeout", Collections.emptyMap())
|
||||||
);
|
);
|
||||||
grb.createNextHop(timeoutQuery).addVertexRequest("people").size(100).minDocCount(1);
|
grb.createNextHop(timeoutQuery).addVertexRequest("people").size(100).minDocCount(1);
|
||||||
|
// 00s friends of beatles
|
||||||
|
grb.createNextHop(QueryBuilders.termQuery("decade", "00s")).addVertexRequest("people").size(100).minDocCount(1);
|
||||||
|
|
||||||
GraphExploreResponse response = grb.get();
|
GraphExploreResponse response = grb.get();
|
||||||
assertTrue(response.isTimedOut());
|
assertTrue(response.isTimedOut());
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.xpack.graph.action;
|
package org.elasticsearch.xpack.graph.action;
|
||||||
|
|
||||||
|
import org.apache.logging.log4j.LogManager;
|
||||||
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
@ -61,13 +63,13 @@ import java.util.Map.Entry;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.SortedSet;
|
import java.util.SortedSet;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Performs a series of elasticsearch queries and aggregations to explore
|
* Performs a series of elasticsearch queries and aggregations to explore
|
||||||
* connected terms in a single index.
|
* connected terms in a single index.
|
||||||
*/
|
*/
|
||||||
public class TransportGraphExploreAction extends HandledTransportAction<GraphExploreRequest, GraphExploreResponse> {
|
public class TransportGraphExploreAction extends HandledTransportAction<GraphExploreRequest, GraphExploreResponse> {
|
||||||
|
private static final Logger logger = LogManager.getLogger(TransportGraphExploreAction.class);
|
||||||
|
|
||||||
private final ThreadPool threadPool;
|
private final ThreadPool threadPool;
|
||||||
private final NodeClient client;
|
private final NodeClient client;
|
||||||
|
@ -115,7 +117,6 @@ public class TransportGraphExploreAction extends HandledTransportAction<GraphExp
|
||||||
private final ActionListener<GraphExploreResponse> listener;
|
private final ActionListener<GraphExploreResponse> listener;
|
||||||
|
|
||||||
private final long startTime;
|
private final long startTime;
|
||||||
private final AtomicBoolean timedOut;
|
|
||||||
private volatile ShardOperationFailedException[] shardFailures;
|
private volatile ShardOperationFailedException[] shardFailures;
|
||||||
private Map<VertexId, Vertex> vertices = new HashMap<>();
|
private Map<VertexId, Vertex> vertices = new HashMap<>();
|
||||||
private Map<ConnectionId, Connection> connections = new HashMap<>();
|
private Map<ConnectionId, Connection> connections = new HashMap<>();
|
||||||
|
@ -128,7 +129,6 @@ public class TransportGraphExploreAction extends HandledTransportAction<GraphExp
|
||||||
this.request = request;
|
this.request = request;
|
||||||
this.listener = listener;
|
this.listener = listener;
|
||||||
this.startTime = threadPool.relativeTimeInMillis();
|
this.startTime = threadPool.relativeTimeInMillis();
|
||||||
this.timedOut = new AtomicBoolean(false);
|
|
||||||
this.shardFailures = ShardSearchFailure.EMPTY_ARRAY;
|
this.shardFailures = ShardSearchFailure.EMPTY_ARRAY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,16 +173,11 @@ public class TransportGraphExploreAction extends HandledTransportAction<GraphExp
|
||||||
* connections
|
* connections
|
||||||
*/
|
*/
|
||||||
synchronized void expand() {
|
synchronized void expand() {
|
||||||
if (hasTimedOut()) {
|
|
||||||
timedOut.set(true);
|
|
||||||
listener.onResponse(buildResponse());
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
Map<String, Set<Vertex>> lastHopFindings = hopFindings.get(currentHopNumber);
|
Map<String, Set<Vertex>> lastHopFindings = hopFindings.get(currentHopNumber);
|
||||||
if ((currentHopNumber >= (request.getHopNumbers() - 1)) || (lastHopFindings == null) || (lastHopFindings.size() == 0)) {
|
if ((currentHopNumber >= (request.getHopNumbers() - 1)) || (lastHopFindings == null) || (lastHopFindings.size() == 0)) {
|
||||||
// Either we gathered no leads from the last hop or we have
|
// Either we gathered no leads from the last hop or we have
|
||||||
// reached the final hop
|
// reached the final hop
|
||||||
listener.onResponse(buildResponse());
|
listener.onResponse(buildResponse(false));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Hop lastHop = request.getHop(currentHopNumber);
|
Hop lastHop = request.getHop(currentHopNumber);
|
||||||
|
@ -318,16 +313,22 @@ public class TransportGraphExploreAction extends HandledTransportAction<GraphExp
|
||||||
// Execute the search
|
// Execute the search
|
||||||
SearchSourceBuilder source = new SearchSourceBuilder().query(rootBool).aggregation(sampleAgg).size(0);
|
SearchSourceBuilder source = new SearchSourceBuilder().query(rootBool).aggregation(sampleAgg).size(0);
|
||||||
if (request.timeout() != null) {
|
if (request.timeout() != null) {
|
||||||
source.timeout(TimeValue.timeValueMillis(timeRemainingMillis()));
|
// Actual resolution of timer is granularity of the interval
|
||||||
|
// configured globally for updating estimated time.
|
||||||
|
long timeRemainingMillis = startTime + request.timeout().millis() - threadPool.relativeTimeInMillis();
|
||||||
|
if (timeRemainingMillis <= 0) {
|
||||||
|
listener.onResponse(buildResponse(true));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
source.timeout(TimeValue.timeValueMillis(timeRemainingMillis));
|
||||||
}
|
}
|
||||||
searchRequest.source(source);
|
searchRequest.source(source);
|
||||||
|
|
||||||
// System.out.println(source);
|
|
||||||
logger.trace("executing expansion graph search request");
|
logger.trace("executing expansion graph search request");
|
||||||
client.search(searchRequest, new ActionListener.Delegating<>(listener) {
|
client.search(searchRequest, new ActionListener.Delegating<>(listener) {
|
||||||
@Override
|
@Override
|
||||||
public void onResponse(SearchResponse searchResponse) {
|
public void onResponse(SearchResponse searchResponse) {
|
||||||
// System.out.println(searchResponse);
|
|
||||||
addShardFailures(searchResponse.getShardFailures());
|
addShardFailures(searchResponse.getShardFailures());
|
||||||
|
|
||||||
ArrayList<Connection> newConnections = new ArrayList<Connection>();
|
ArrayList<Connection> newConnections = new ArrayList<Connection>();
|
||||||
|
@ -676,7 +677,6 @@ public class TransportGraphExploreAction extends HandledTransportAction<GraphExp
|
||||||
source.timeout(request.timeout());
|
source.timeout(request.timeout());
|
||||||
}
|
}
|
||||||
searchRequest.source(source);
|
searchRequest.source(source);
|
||||||
// System.out.println(source);
|
|
||||||
logger.trace("executing initial graph search request");
|
logger.trace("executing initial graph search request");
|
||||||
client.search(searchRequest, new ActionListener.Delegating<>(listener) {
|
client.search(searchRequest, new ActionListener.Delegating<>(listener) {
|
||||||
@Override
|
@Override
|
||||||
|
@ -774,16 +774,6 @@ public class TransportGraphExploreAction extends HandledTransportAction<GraphExp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean hasTimedOut() {
|
|
||||||
return request.timeout() != null && (timeRemainingMillis() <= 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
long timeRemainingMillis() {
|
|
||||||
// Actual resolution of timer is granularity of the interval
|
|
||||||
// configured globally for updating estimated time.
|
|
||||||
return (startTime + request.timeout().millis()) - threadPool.relativeTimeInMillis();
|
|
||||||
}
|
|
||||||
|
|
||||||
void addShardFailures(ShardOperationFailedException[] failures) {
|
void addShardFailures(ShardOperationFailedException[] failures) {
|
||||||
if (CollectionUtils.isEmpty(failures) == false) {
|
if (CollectionUtils.isEmpty(failures) == false) {
|
||||||
ShardOperationFailedException[] duplicates = new ShardOperationFailedException[shardFailures.length + failures.length];
|
ShardOperationFailedException[] duplicates = new ShardOperationFailedException[shardFailures.length + failures.length];
|
||||||
|
@ -793,9 +783,9 @@ public class TransportGraphExploreAction extends HandledTransportAction<GraphExp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected GraphExploreResponse buildResponse() {
|
protected GraphExploreResponse buildResponse(boolean timedOut) {
|
||||||
long took = threadPool.relativeTimeInMillis() - startTime;
|
long took = threadPool.relativeTimeInMillis() - startTime;
|
||||||
return new GraphExploreResponse(took, timedOut.get(), shardFailures, vertices, connections, request.returnDetailedInfo());
|
return new GraphExploreResponse(took, timedOut, shardFailures, vertices, connections, request.returnDetailedInfo());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue