From e4c80c94e94deb759cf245daf725dfd80086b74a Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 15 Sep 2016 11:57:05 -0400 Subject: [PATCH] Convert more search docs to CONSOLE `profile.asciidoc` now runs all of its command but it doesn't validate all of the results. Writing the validation is time consuming so I only did some of it. --- docs/reference/search/profile.asciidoc | 174 +++++++++++++------- docs/reference/search/request/sort.asciidoc | 18 +- 2 files changed, 129 insertions(+), 63 deletions(-) diff --git a/docs/reference/search/profile.asciidoc b/docs/reference/search/profile.asciidoc index e2c22caf6f42..0a03b322858e 100644 --- a/docs/reference/search/profile.asciidoc +++ b/docs/reference/search/profile.asciidoc @@ -17,13 +17,17 @@ Any `_search` request can be profiled by adding a top-level `profile` parameter: [source,js] -------------------------------------------------- -curl -XGET 'localhost:9200/_search' -d '{ +GET /_search +{ "profile": true,<1> "query" : { - "match" : { "message" : "search test" } + "match" : { "message" : "message number" } } -}' +} -------------------------------------------------- +// CONSOLE +// TEST[setup:twitter] + <1> Setting the top-level `profile` parameter to `true` will enable profiling for the search @@ -40,70 +44,70 @@ This will yield the following result: "failed": 0 }, "hits": { - "total": 3, - "max_score": 1.078072, - "hits": [ ... ] <1> + "total": 4, + "max_score": 0.5093388, + "hits": [...] <1> }, "profile": { "shards": [ { - "id": "[2aE02wS1R8q_QFnYu6vDVQ][test][1]", + "id": "[2aE02wS1R8q_QFnYu6vDVQ][twitter][1]", "searches": [ { "query": [ { "type": "BooleanQuery", - "description": "message:search message:test", - "time": "15.52889800ms", + "description": "message:message message:number", + "time": "1.873811000ms", "breakdown": { - "score": 6352, - "score_count": 1, - "build_scorer": 1800776, + "score": 51306, + "score_count": 4, + "build_scorer": 2935582, "build_scorer_count": 1, "match": 0, "match_count": 0, - "create_weight": 667400, + "create_weight": 919297, "create_weight_count": 1, - "next_doc": 10563, - "next_doc_count": 2, + "next_doc": 53876, + "next_doc_count": 5, "advance": 0, "advance_count": 0 }, "children": [ { "type": "TermQuery", - "description": "message:search", - "time": "4.938855000ms", + "description": "message:message", + "time": "0.3919430000ms", "breakdown": { - "score": 0, - "score_count": 0, - "build_scorer": 3230, + "score": 28776, + "score_count": 4, + "build_scorer": 784451, "build_scorer_count": 1, "match": 0, "match_count": 0, - "create_weight": 415612, + "create_weight": 1669564, "create_weight_count": 1, - "next_doc": 0, - "next_doc_count": 0, + "next_doc": 10111, + "next_doc_count": 5, "advance": 0, "advance_count": 0 } }, { "type": "TermQuery", - "description": "message:test", - "time": "0.5016660000ms", + "description": "message:number", + "time": "0.2106820000ms", "breakdown": { - "score": 5014, - "score_count": 1, - "build_scorer": 1689333, + "score": 4552, + "score_count": 4, + "build_scorer": 42602, "build_scorer_count": 1, "match": 0, "match_count": 0, - "create_weight": 166587, + "create_weight": 89323, "create_weight_count": 1, - "next_doc": 5542, - "next_doc_count": 2, + "next_doc": 2852, + "next_doc_count": 5, "advance": 0, "advance_count": 0 } @@ -111,21 +115,44 @@ This will yield the following result: ] } ], - "rewrite_time": 870954, + "rewrite_time": 51443, "collector": [ { "name": "SimpleTopScoreDocCollector", "reason": "search_top_hits", - "time": "0.009783000000ms" + "time": "0.06989100000ms" } ] } - ] + ], + "aggregations": [] } ] } } -------------------------------------------------- +// TESTRESPONSE[s/"took": 25/"took": $body.took/] +// TESTRESPONSE[s/"hits": \[...\]/"hits": $body.hits.hits/] +// TESTRESPONSE[s/"id": "\[2aE02wS1R8q_QFnYu6vDVQ\]\[twitter\]\[1\]"/"id": $body.profile.shards.0.id/] +// TESTRESPONSE[s/"rewrite_time": 51443/"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time/] +// TESTRESPONSE[s/"score": 51306/"score": $body.profile.shards.0.searches.0.query.0.breakdown.score/] +// TESTRESPONSE[s/"time": "1.873811000ms"/"time": $body.profile.shards.0.searches.0.query.0.time/] +// TESTRESPONSE[s/"build_scorer": 2935582/"build_scorer": $body.profile.shards.0.searches.0.query.0.breakdown.build_scorer/] +// TESTRESPONSE[s/"create_weight": 919297/"create_weight": $body.profile.shards.0.searches.0.query.0.breakdown.create_weight/] +// TESTRESPONSE[s/"next_doc": 53876/"next_doc": $body.profile.shards.0.searches.0.query.0.breakdown.next_doc/] +// TESTRESPONSE[s/"time": "0.3919430000ms"/"time": $body.profile.shards.0.searches.0.query.0.children.0.time/] +// TESTRESPONSE[s/"score": 28776/"score": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.score/] +// TESTRESPONSE[s/"build_scorer": 784451/"build_scorer": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.build_scorer/] +// TESTRESPONSE[s/"create_weight": 1669564/"create_weight": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.create_weight/] +// TESTRESPONSE[s/"next_doc": 10111/"next_doc": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.next_doc/] +// TESTRESPONSE[s/"time": "0.2106820000ms"/"time": $body.profile.shards.0.searches.0.query.0.children.1.time/] +// TESTRESPONSE[s/"score": 4552/"score": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.score/] +// TESTRESPONSE[s/"build_scorer": 42602/"build_scorer": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.build_scorer/] +// TESTRESPONSE[s/"create_weight": 89323/"create_weight": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.create_weight/] +// TESTRESPONSE[s/"next_doc": 2852/"next_doc": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.next_doc/] +// TESTRESPONSE[s/"time": "0.06989100000ms"/"time": $body.profile.shards.0.searches.0.collector.0.time/] +// Sorry for this mess.... + <1> Search results are returned, but were omitted here for brevity Even for a simple query, the response is relatively complicated. Let's break it down piece-by-piece before moving @@ -139,11 +166,11 @@ First, the overall structure of the profile response is as follows: "profile": { "shards": [ { - "id": "[2aE02wS1R8q_QFnYu6vDVQ][test][1]", <1> + "id": "[2aE02wS1R8q_QFnYu6vDVQ][twitter][1]", <1> "searches": [ { "query": [...], <2> - "rewrite_time": 870954, <3> + "rewrite_time": 51443, <3> "collector": [...] <4> } ], @@ -153,6 +180,12 @@ First, the overall structure of the profile response is as follows: } } -------------------------------------------------- +// TESTRESPONSE[s/"profile": /"took": $body.took, "timed_out": $body.timed_out, "_shards": $body._shards, "hits": $body.hits, "profile": /] +// TESTRESPONSE[s/"id": "\[2aE02wS1R8q_QFnYu6vDVQ\]\[twitter\]\[1\]"/"id": $body.profile.shards.0.id/] +// TESTRESPONSE[s/"query": \[...\]/"query": $body.profile.shards.0.searches.0.query/] +// TESTRESPONSE[s/"rewrite_time": 51443/"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time/] +// TESTRESPONSE[s/"collector": \[...\]/"collector": $body.profile.shards.0.searches.0.collector/] +// TESTRESPONSE[s/"aggregations": \[...\]/"aggregations": []/] <1> A profile is returned for each shard that participated in the response, and is identified by a unique ID <2> Each profile contains a section which holds details about the query execution @@ -195,33 +228,38 @@ the `advance` phase of that query is the cause, for example. The `query` section contains detailed timing of the query tree executed by Lucene on a particular shard. The overall structure of this query tree will resemble your original Elasticsearch query, but may be slightly (or sometimes very) different. It will also use similar but not always identical naming. Using our previous -`term` query example, let's analyze the `query` section: +`match` query example, let's analyze the `query` section: [source,js] -------------------------------------------------- "query": [ { "type": "BooleanQuery", - "description": "message:search message:test", - "time": "15.52889800ms", + "description": "message:message message:number", + "time": "1.873811000ms", "breakdown": {...}, <1> "children": [ { "type": "TermQuery", - "description": "message:search", - "time": "4.938855000ms", + "description": "message:message", + "time": "0.3919430000ms", "breakdown": {...} }, { "type": "TermQuery", - "description": "message:test", - "time": "0.5016660000ms", + "description": "message:number", + "time": "0.2106820000ms", "breakdown": {...} } ] } ] -------------------------------------------------- +// TESTRESPONSE[s/^/{\n"took": $body.took,\n"timed_out": $body.timed_out,\n"_shards": $body._shards,\n"hits": $body.hits,\n"profile": {\n"shards": [ {\n"id": "$body.profile.shards.0.id",\n"searches": [{\n/] +// TESTRESPONSE[s/]$/],"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time, "collector": $body.profile.shards.0.searches.0.collector}], "aggregations": []}]}}/] +// TESTRESPONSE[s/"time": "1.873811000ms",\n.+"breakdown": \{...\}/"time": $body.profile.shards.0.searches.0.query.0.time, "breakdown": $body.profile.shards.0.searches.0.query.0.breakdown/] +// TESTRESPONSE[s/"time": "0.3919430000ms",\n.+"breakdown": \{...\}/"time": $body.profile.shards.0.searches.0.query.0.children.0.time, "breakdown": $body.profile.shards.0.searches.0.query.0.children.0.breakdown/] +// TESTRESPONSE[s/"time": "0.2106820000ms",\n.+"breakdown": \{...\}/"time": $body.profile.shards.0.searches.0.query.0.children.1.time, "breakdown": $body.profile.shards.0.searches.0.query.0.children.1.breakdown/] <1> The breakdown timings are omitted for simplicity Based on the profile structure, we can see that our `match` query was rewritten by Lucene into a BooleanQuery with two @@ -245,20 +283,27 @@ The `"breakdown"` component lists detailed timing statistics about low-level Luc [source,js] -------------------------------------------------- "breakdown": { - "score": 5014, - "score_count": 1, - "build_scorer": 1689333, - "build_scorer_count": 1, - "match": 0, - "match_count": 0, - "create_weight": 166587, - "create_weight_count": 1, - "next_doc": 5542, - "next_doc_count": 2, - "advance": 0, - "advance_count": 0 + "score": 51306, + "score_count": 4, + "build_scorer": 2935582, + "build_scorer_count": 1, + "match": 0, + "match_count": 0, + "create_weight": 919297, + "create_weight_count": 1, + "next_doc": 53876, + "next_doc_count": 5, + "advance": 0, + "advance_count": 0 } -------------------------------------------------- +// TESTRESPONSE[s/^/{\n"took": $body.took,\n"timed_out": $body.timed_out,\n"_shards": $body._shards,\n"hits": $body.hits,\n"profile": {\n"shards": [ {\n"id": "$body.profile.shards.0.id",\n"searches": [{\n"query": [{\n"type": "BooleanQuery",\n"description": "message:message message:number",\n"time": $body.profile.shards.0.searches.0.query.0.time,/] +// TESTRESPONSE[s/}$/},\n"children": $body.profile.shards.0.searches.0.query.0.children}],\n"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time, "collector": $body.profile.shards.0.searches.0.collector}], "aggregations": []}]}}/] +// TESTRESPONSE[s/"score": 51306/"score": $body.profile.shards.0.searches.0.query.0.breakdown.score/] +// TESTRESPONSE[s/"time": "1.873811000ms"/"time": $body.profile.shards.0.searches.0.query.0.time/] +// TESTRESPONSE[s/"build_scorer": 2935582/"build_scorer": $body.profile.shards.0.searches.0.query.0.breakdown.build_scorer/] +// TESTRESPONSE[s/"create_weight": 919297/"create_weight": $body.profile.shards.0.searches.0.query.0.breakdown.create_weight/] +// TESTRESPONSE[s/"next_doc": 53876/"next_doc": $body.profile.shards.0.searches.0.query.0.breakdown.next_doc/] Timings are listed in wall-clock nanoseconds and are not normalized at all. All caveats about the overall `"time"` apply here. The intention of the breakdown is to give you a feel for A) what machinery in Lucene is @@ -348,10 +393,13 @@ Looking at the previous example: { "name": "SimpleTopScoreDocCollector", "reason": "search_top_hits", - "time": "2.206529000ms" + "time": "0.06989100000ms" } ] -------------------------------------------------- +// TESTRESPONSE[s/^/{\n"took": $body.took,\n"timed_out": $body.timed_out,\n"_shards": $body._shards,\n"hits": $body.hits,\n"profile": {\n"shards": [ {\n"id": "$body.profile.shards.0.id",\n"searches": [{\n"query": $body.profile.shards.0.searches.0.query,\n"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time,/] +// TESTRESPONSE[s/]$/]}], "aggregations": []}]}}/] +// TESTRESPONSE[s/"time": "0.06989100000ms"/"time": $body.profile.shards.0.searches.0.collector.0.time/] We see a single collector named `SimpleTopScoreDocCollector`. This is the default "scoring and sorting" Collector used by Elasticsearch. The `"reason"` field attempts to give a plain english description of the class name. The @@ -473,6 +521,8 @@ GET /test/_search } } -------------------------------------------------- +// CONSOLE +// TEST[s/^/PUT test\n/] This example has: @@ -509,7 +559,7 @@ And the response: "create_weight_count": 1, "build_scorer": 377872, "build_scorer_count": 1, - "advance": 0 + "advance": 0, "advance_count": 0 } }, @@ -528,7 +578,7 @@ And the response: "create_weight_count": 1, "build_scorer": 112551, "build_scorer_count": 1, - "advance": 0 + "advance": 0, "advance_count": 0 } } @@ -578,7 +628,7 @@ And the response: "create_weight_count": 1, "build_scorer": 38310, "build_scorer_count": 1, - "advance": 0 + "advance": 0, "advance_count": 0 } } @@ -640,7 +690,7 @@ the following example aggregations request: [source,js] -------------------------------------------------- -curl -XGET "http://localhost:9200/house-prices/_search" -d' +GET /house-prices/_search { "profile": true, "size": 0, @@ -658,8 +708,10 @@ curl -XGET "http://localhost:9200/house-prices/_search" -d' } } } -}' +} -------------------------------------------------- +// CONSOLE +// TEST[s/^/PUT house-prices\n/] Which yields the following aggregation profile output diff --git a/docs/reference/search/request/sort.asciidoc b/docs/reference/search/request/sort.asciidoc index 85c5d1e675fe..d3bbe283a2de 100644 --- a/docs/reference/search/request/sort.asciidoc +++ b/docs/reference/search/request/sort.asciidoc @@ -351,9 +351,23 @@ Multiple geo points can be passed as an array containing any `geo_point` format, [source,js] -------------------------------------------------- -"pin.location" : [[-70, 40], [-71, 42]] -"pin.location" : [{"lat": 40, "lon": -70}, {"lat": 42, "lon": -71}] +GET /_search +{ + "sort" : [ + { + "_geo_distance" : { + "pin.location" : [[-70, 40], [-71, 42]], + "order" : "asc", + "unit" : "km" + } + } + ], + "query" : { + "term" : { "user" : "kimchy" } + } +} -------------------------------------------------- +// CONSOLE and so forth.