mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-24 23:27:25 -04:00
525 lines
12 KiB
Text
Executable file
525 lines
12 KiB
Text
Executable file
[chapter]
|
||
[[getting-started]]
|
||
= Quick start
|
||
|
||
This guide helps beginners learn how to:
|
||
|
||
* Install and run {es} in a test environment
|
||
* Add data to {es}
|
||
* Search and sort data
|
||
* Extract fields from unstructured content during a search
|
||
|
||
[discrete]
|
||
[[run-elasticsearch]]
|
||
=== Step 1. Run {es}
|
||
|
||
The simplest way to set up {es} is to create a managed deployment with {ess} on
|
||
{ecloud}. If you prefer to manage your own test environment, you can install and
|
||
run {es} using Docker.
|
||
|
||
include::{es-repo-dir}/tab-widgets/code.asciidoc[]
|
||
include::{es-repo-dir}/tab-widgets/quick-start-install-widget.asciidoc[]
|
||
|
||
[discrete]
|
||
[[send-requests-to-elasticsearch]]
|
||
=== Step 2. Send requests to {es}
|
||
|
||
You send data and other requests to {es} using REST APIs. This lets you interact
|
||
with {es} using any client that sends HTTP requests, such as
|
||
https://curl.se[curl]. You can also use {kib}'s console to send requests to
|
||
{es}.
|
||
|
||
include::{es-repo-dir}/tab-widgets/api-call-widget.asciidoc[]
|
||
|
||
[discrete]
|
||
[[add-data]]
|
||
=== Step 3. Add data
|
||
|
||
You add data to {es} as JSON objects called documents. {es} stores these
|
||
documents in searchable indices.
|
||
|
||
For time series data, such as logs and metrics, you typically add documents to a
|
||
data stream made up of multiple auto-generated backing indices.
|
||
|
||
A data stream requires an index template that matches its name. {es} uses this
|
||
template to configure the stream's backing indices. Documents sent to a data
|
||
stream must have a `@timestamp` field.
|
||
|
||
[discrete]
|
||
[[add-single-document]]
|
||
==== Add a single document
|
||
|
||
Submit the following indexing request to add a single log entry to the
|
||
`logs-my_app-default` data stream. Since `logs-my_app-default` doesn't exist, the
|
||
request automatically creates it using the built-in `logs-*-*` index template.
|
||
|
||
[source,console]
|
||
----
|
||
POST logs-my_app-default/_doc
|
||
{
|
||
"@timestamp": "2099-05-06T16:21:15.000Z",
|
||
"event": {
|
||
"original": "192.0.2.42 - - [06/May/2099:16:21:15 +0000] \"GET /images/bg.jpg HTTP/1.0\" 200 24736"
|
||
}
|
||
}
|
||
----
|
||
// TEST[s/_doc/_doc?refresh=wait_for/]
|
||
|
||
The response includes metadata that {es} generates for the document:
|
||
|
||
* The backing `_index` that contains the document. {es} automatically generates
|
||
the names of backing indices.
|
||
* A unique `_id` for the document within the index.
|
||
|
||
[source,console-result]
|
||
----
|
||
{
|
||
"_index": ".ds-logs-my_app-default-2099-05-06-000001",
|
||
"_id": "gl5MJXMBMk1dGnErnBW8",
|
||
"_version": 1,
|
||
"result": "created",
|
||
"_shards": {
|
||
"total": 2,
|
||
"successful": 1,
|
||
"failed": 0
|
||
},
|
||
"_seq_no": 0,
|
||
"_primary_term": 1
|
||
}
|
||
----
|
||
// TESTRESPONSE[s/"_index": ".ds-logs-my_app-default-2099-05-06-000001"/"_index": $body._index/]
|
||
// TESTRESPONSE[s/"_id": "gl5MJXMBMk1dGnErnBW8"/"_id": $body._id/]
|
||
|
||
[discrete]
|
||
[[add-multiple-documents]]
|
||
==== Add multiple documents
|
||
|
||
To add multiple documents in one request, use the bulk API. Bulk data must be
|
||
newline-delimited JSON (NDJSON). Each line must end in a newline character
|
||
(`\n`), including the last line.
|
||
|
||
[source,console]
|
||
----
|
||
PUT logs-my_app-default/_bulk
|
||
{ "create": { } }
|
||
{ "@timestamp": "2099-05-07T16:24:32.000Z", "event": { "original": "192.0.2.242 - - [07/May/2020:16:24:32 -0500] \"GET /images/hm_nbg.jpg HTTP/1.0\" 304 0" } }
|
||
{ "create": { } }
|
||
{ "@timestamp": "2099-05-08T16:25:42.000Z", "event": { "original": "192.0.2.255 - - [08/May/2099:16:25:42 +0000] \"GET /favicon.ico HTTP/1.0\" 200 3638" } }
|
||
----
|
||
// TEST[continued]
|
||
// TEST[s/_bulk/_bulk?refresh=wait_for/]
|
||
|
||
[discrete]
|
||
[[qs-search-data]]
|
||
=== Step 4. Search data
|
||
|
||
Indexed documents are available for search in near real-time. To search your
|
||
data stream, use the search API.
|
||
|
||
The following search matches all log entries in `logs-my_app-default` and
|
||
sorts them by `@timestamp` in descending order.
|
||
|
||
[source,console]
|
||
----
|
||
GET logs-my_app-default/_search
|
||
{
|
||
"query": {
|
||
"match_all": { }
|
||
},
|
||
"sort": [
|
||
{
|
||
"@timestamp": "desc"
|
||
}
|
||
]
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
By default, the `hits` section of the response includes up to the first 10
|
||
documents that match the search. The `_source` of each hit contains the original
|
||
JSON object submitted during indexing.
|
||
|
||
[source,console-result]
|
||
----
|
||
{
|
||
"took": 2,
|
||
"timed_out": false,
|
||
"_shards": {
|
||
"total": 1,
|
||
"successful": 1,
|
||
"skipped": 0,
|
||
"failed": 0
|
||
},
|
||
"hits": {
|
||
"total": {
|
||
"value": 3,
|
||
"relation": "eq"
|
||
},
|
||
"max_score": null,
|
||
"hits": [
|
||
{
|
||
"_index": ".ds-logs-my_app-default-2099-05-06-000001",
|
||
"_id": "PdjWongB9KPnaVm2IyaL",
|
||
"_score": null,
|
||
"_source": {
|
||
"@timestamp": "2099-05-08T16:25:42.000Z",
|
||
"event": {
|
||
"original": "192.0.2.255 - - [08/May/2099:16:25:42 +0000] \"GET /favicon.ico HTTP/1.0\" 200 3638"
|
||
}
|
||
},
|
||
"sort": [
|
||
4081940742000
|
||
]
|
||
},
|
||
...
|
||
]
|
||
}
|
||
}
|
||
----
|
||
// TESTRESPONSE[s/"took": 2/"took": $body.took/]
|
||
// TESTRESPONSE[s/"_index": ".ds-logs-my_app-default-2099-05-06-000001"/"_index": $body.hits.hits.0._index/]
|
||
// TESTRESPONSE[s/"_id": "PdjWongB9KPnaVm2IyaL"/"_id": $body.hits.hits.0._id/]
|
||
// TESTRESPONSE[s/\.\.\./$body.hits.hits.1,$body.hits.hits.2/]
|
||
|
||
[discrete]
|
||
[[get-specific-fields]]
|
||
==== Get specific fields
|
||
|
||
Parsing the entire `_source` is unwieldy for large documents. To exclude it from
|
||
the response, set the `_source` parameter to `false`. Instead, use the `fields`
|
||
parameter to retrieve the fields you want.
|
||
|
||
[source,console]
|
||
----
|
||
GET logs-my_app-default/_search
|
||
{
|
||
"query": {
|
||
"match_all": { }
|
||
},
|
||
"fields": [
|
||
"@timestamp"
|
||
],
|
||
"_source": false,
|
||
"sort": [
|
||
{
|
||
"@timestamp": "desc"
|
||
}
|
||
]
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
The response contains each hit's `fields` values as a flat array.
|
||
|
||
[source,console-result]
|
||
----
|
||
{
|
||
"took": 8,
|
||
"timed_out": false,
|
||
"_shards": {
|
||
"total": 1,
|
||
"successful": 1,
|
||
"skipped": 0,
|
||
"failed": 0
|
||
},
|
||
"hits": {
|
||
"total": {
|
||
"value": 3,
|
||
"relation": "eq"
|
||
},
|
||
"max_score": null,
|
||
"hits": [
|
||
{
|
||
"_index": ".ds-logs-my_app-default-2099-05-06-000001",
|
||
"_id": "PdjWongB9KPnaVm2IyaL",
|
||
"_score": null,
|
||
"fields": {
|
||
"@timestamp": [
|
||
"2099-05-08T16:25:42.000Z"
|
||
]
|
||
},
|
||
"sort": [
|
||
4081940742000
|
||
]
|
||
},
|
||
...
|
||
]
|
||
}
|
||
}
|
||
----
|
||
// TESTRESPONSE[s/"took": 8/"took": $body.took/]
|
||
// TESTRESPONSE[s/"_index": ".ds-logs-my_app-default-2099-05-06-000001"/"_index": $body.hits.hits.0._index/]
|
||
// TESTRESPONSE[s/"_id": "PdjWongB9KPnaVm2IyaL"/"_id": $body.hits.hits.0._id/]
|
||
// TESTRESPONSE[s/\.\.\./$body.hits.hits.1,$body.hits.hits.2/]
|
||
|
||
[discrete]
|
||
[[search-date-range]]
|
||
==== Search a date range
|
||
|
||
To search across a specific time or IP range, use a `range` query.
|
||
|
||
[source,console]
|
||
----
|
||
GET logs-my_app-default/_search
|
||
{
|
||
"query": {
|
||
"range": {
|
||
"@timestamp": {
|
||
"gte": "2099-05-05",
|
||
"lt": "2099-05-08"
|
||
}
|
||
}
|
||
},
|
||
"fields": [
|
||
"@timestamp"
|
||
],
|
||
"_source": false,
|
||
"sort": [
|
||
{
|
||
"@timestamp": "desc"
|
||
}
|
||
]
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
You can use date math to define relative time ranges. The following query
|
||
searches for data from the past day, which won't match any log entries in
|
||
`logs-my_app-default`.
|
||
|
||
[source,console]
|
||
----
|
||
GET logs-my_app-default/_search
|
||
{
|
||
"query": {
|
||
"range": {
|
||
"@timestamp": {
|
||
"gte": "now-1d/d",
|
||
"lt": "now/d"
|
||
}
|
||
}
|
||
},
|
||
"fields": [
|
||
"@timestamp"
|
||
],
|
||
"_source": false,
|
||
"sort": [
|
||
{
|
||
"@timestamp": "desc"
|
||
}
|
||
]
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
[discrete]
|
||
[[extract-fields]]
|
||
==== Extract fields from unstructured content
|
||
|
||
You can extract <<runtime-search-request,runtime fields>> from unstructured
|
||
content, such as log messages, during a search.
|
||
|
||
Use the following search to extract the `source.ip` runtime field from
|
||
`event.original`. To include it in the response, add `source.ip` to the `fields`
|
||
parameter.
|
||
|
||
[source,console]
|
||
----
|
||
GET logs-my_app-default/_search
|
||
{
|
||
"runtime_mappings": {
|
||
"source.ip": {
|
||
"type": "ip",
|
||
"script": """
|
||
String sourceip=grok('%{IPORHOST:sourceip} .*').extract(doc[ "event.original" ].value)?.sourceip;
|
||
if (sourceip != null) emit(sourceip);
|
||
"""
|
||
}
|
||
},
|
||
"query": {
|
||
"range": {
|
||
"@timestamp": {
|
||
"gte": "2099-05-05",
|
||
"lt": "2099-05-08"
|
||
}
|
||
}
|
||
},
|
||
"fields": [
|
||
"@timestamp",
|
||
"source.ip"
|
||
],
|
||
"_source": false,
|
||
"sort": [
|
||
{
|
||
"@timestamp": "desc"
|
||
}
|
||
]
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
[discrete]
|
||
[[combine-queries]]
|
||
==== Combine queries
|
||
|
||
You can use the `bool` query to combine multiple queries. The following search
|
||
combines two `range` queries: one on `@timestamp` and one on the `source.ip`
|
||
runtime field.
|
||
|
||
[source,console]
|
||
----
|
||
GET logs-my_app-default/_search
|
||
{
|
||
"runtime_mappings": {
|
||
"source.ip": {
|
||
"type": "ip",
|
||
"script": """
|
||
String sourceip=grok('%{IPORHOST:sourceip} .*').extract(doc[ "event.original" ].value)?.sourceip;
|
||
if (sourceip != null) emit(sourceip);
|
||
"""
|
||
}
|
||
},
|
||
"query": {
|
||
"bool": {
|
||
"filter": [
|
||
{
|
||
"range": {
|
||
"@timestamp": {
|
||
"gte": "2099-05-05",
|
||
"lt": "2099-05-08"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"range": {
|
||
"source.ip": {
|
||
"gte": "192.0.2.0",
|
||
"lte": "192.0.2.240"
|
||
}
|
||
}
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"fields": [
|
||
"@timestamp",
|
||
"source.ip"
|
||
],
|
||
"_source": false,
|
||
"sort": [
|
||
{
|
||
"@timestamp": "desc"
|
||
}
|
||
]
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
[discrete]
|
||
[[aggregate-data]]
|
||
==== Aggregate data
|
||
|
||
Use aggregations to summarize data as metrics, statistics, or other analytics.
|
||
|
||
The following search uses an aggregation to calculate the
|
||
`average_response_size` using the `http.response.body.bytes` runtime field. The
|
||
aggregation only runs on documents that match the `query`.
|
||
|
||
[source,console]
|
||
----
|
||
GET logs-my_app-default/_search
|
||
{
|
||
"runtime_mappings": {
|
||
"http.response.body.bytes": {
|
||
"type": "long",
|
||
"script": """
|
||
String bytes=grok('%{COMMONAPACHELOG}').extract(doc[ "event.original" ].value)?.bytes;
|
||
if (bytes != null) emit(Integer.parseInt(bytes));
|
||
"""
|
||
}
|
||
},
|
||
"aggs": {
|
||
"average_response_size":{
|
||
"avg": {
|
||
"field": "http.response.body.bytes"
|
||
}
|
||
}
|
||
},
|
||
"query": {
|
||
"bool": {
|
||
"filter": [
|
||
{
|
||
"range": {
|
||
"@timestamp": {
|
||
"gte": "2099-05-05",
|
||
"lt": "2099-05-08"
|
||
}
|
||
}
|
||
}
|
||
]
|
||
}
|
||
},
|
||
"fields": [
|
||
"@timestamp",
|
||
"http.response.body.bytes"
|
||
],
|
||
"_source": false,
|
||
"sort": [
|
||
{
|
||
"@timestamp": "desc"
|
||
}
|
||
]
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
The response’s `aggregations` object contains aggregation results.
|
||
|
||
[source,console-result]
|
||
----
|
||
{
|
||
...
|
||
"aggregations" : {
|
||
"average_response_size" : {
|
||
"value" : 12368.0
|
||
}
|
||
}
|
||
}
|
||
----
|
||
// TESTRESPONSE[s/\.\.\./"took": "$body.took", "timed_out": false, "_shards": "$body._shards", "hits": "$body.hits",/]
|
||
|
||
[discrete]
|
||
[[explore-more-search-options]]
|
||
==== Explore more search options
|
||
|
||
To keep exploring, index more data to your data stream and check out <<common-search-options>>.
|
||
|
||
[discrete]
|
||
[[clean-up]]
|
||
=== Step 5. Clean up
|
||
|
||
When you're done, delete your test data stream and its backing indices.
|
||
|
||
[source,console]
|
||
----
|
||
DELETE _data_stream/logs-my_app-default
|
||
----
|
||
// TEST[continued]
|
||
|
||
You can also delete your test deployment.
|
||
|
||
include::{es-repo-dir}/tab-widgets/quick-start-cleanup-widget.asciidoc[]
|
||
|
||
[discrete]
|
||
[[whats-next]]
|
||
=== What's next?
|
||
|
||
* Get the most out of your time series data by setting up data tiers and
|
||
{ilm-init}. See <<use-elasticsearch-for-time-series-data>>.
|
||
|
||
* Use {fleet} and {agent} to collect logs and metrics directly from your data
|
||
sources and send them to {es}. See the
|
||
{fleet-guide}/fleet-quick-start.html[{fleet} quick start guide].
|
||
|
||
* Use {kib} to explore, visualize, and manage your {es} data. See the
|
||
{kibana-ref}/get-started.html[{kib} quick start guide].
|