mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-25 07:37:19 -04:00
269 lines
7.5 KiB
Text
269 lines
7.5 KiB
Text
[[common-log-format-example]]
|
||
== Example: Parse logs in the Common Log Format
|
||
++++
|
||
<titleabbrev>Example: Parse logs</titleabbrev>
|
||
++++
|
||
|
||
In this example tutorial, you’ll use an <<ingest,ingest pipeline>> to parse
|
||
server logs in the {wikipedia}/Common_Log_Format[Common Log Format] before
|
||
indexing. Before starting, check the <<ingest-prerequisites,prerequisites>> for
|
||
ingest pipelines.
|
||
|
||
The logs you want to parse look similar to this:
|
||
|
||
[source,log]
|
||
----
|
||
212.87.37.154 - - [30/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\"
|
||
200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6)
|
||
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\"
|
||
----
|
||
// NOTCONSOLE
|
||
|
||
These logs contain a timestamp, IP address, and user agent. You want to give
|
||
these three items their own field in {es} for faster searches and
|
||
visualizations. You also want to know where the request is coming from.
|
||
|
||
. In {kib}, open the main menu and click **Stack Management** > **Ingest Node
|
||
Pipelines**.
|
||
+
|
||
[role="screenshot"]
|
||
image::images/ingest/ingest-pipeline-list.png[Kibana's Ingest Node Pipelines list view,align="center"]
|
||
|
||
. Click **Create pipeline**.
|
||
. Provide a name and description for the pipeline.
|
||
. Add a <<grok-processor,grok processor>> to parse the log message:
|
||
|
||
.. Click **Add a processor** and select the **Grok** processor type.
|
||
.. Set **Field** to `message` and **Patterns** to the following
|
||
<<grok-basics,grok pattern>>:
|
||
+
|
||
[source,grok]
|
||
----
|
||
%{IPORHOST:source.ip} %{USER:user.id} %{USER:user.name} \[%{HTTPDATE:@timestamp}\] "%{WORD:http.request.method} %{DATA:url.original} HTTP/%{NUMBER:http.version}" %{NUMBER:http.response.status_code:int} (?:-|%{NUMBER:http.response.body.bytes:int}) %{QS:http.request.referrer} %{QS:user_agent}
|
||
----
|
||
// NOTCONSOLE
|
||
+
|
||
.. Click **Add** to save the processor.
|
||
.. Set the processor description to `Extract fields from 'message'`.
|
||
|
||
. Add processors for the timestamp, IP address, and user agent fields. Configure
|
||
the processors as follows:
|
||
+
|
||
--
|
||
|
||
[options="header"]
|
||
|====
|
||
| Processor type | Field | Additional options | Description
|
||
|
||
| <<date-processor,**Date**>>
|
||
| `@timestamp`
|
||
| **Formats**: `dd/MMM/yyyy:HH:mm:ss Z`
|
||
| `Format '@timestamp' as 'dd/MMM/yyyy:HH:mm:ss Z'`
|
||
|
||
| <<geoip-processor,**GeoIP**>>
|
||
| `source.ip`
|
||
| **Target field**: `source.geo`
|
||
| `Add 'source.geo' GeoIP data for 'source.ip'`
|
||
|
||
| <<user-agent-processor,**User agent**>>
|
||
| `user_agent`
|
||
|
|
||
| `Extract fields from 'user_agent'`
|
||
|====
|
||
|
||
Your form should look similar to this:
|
||
|
||
[role="screenshot"]
|
||
image::images/ingest/ingest-pipeline-processor.png[Processors for Ingest Node Pipelines,align="center"]
|
||
|
||
The four processors will run sequentially: +
|
||
Grok > Date > GeoIP > User agent +
|
||
You can reorder processors using the arrow icons.
|
||
|
||
Alternatively, you can click the **Import processors** link and define the
|
||
processors as JSON:
|
||
|
||
[source,js]
|
||
----
|
||
{
|
||
include::common-log-format-example.asciidoc[tag=common-log-pipeline]
|
||
}
|
||
----
|
||
// NOTCONSOLE
|
||
|
||
////
|
||
[source,console]
|
||
----
|
||
PUT _ingest/pipeline/my-pipeline
|
||
{
|
||
// tag::common-log-pipeline[]
|
||
"processors": [
|
||
{
|
||
"grok": {
|
||
"description": "Extract fields from 'message'",
|
||
"field": "message",
|
||
"patterns": ["%{IPORHOST:source.ip} %{USER:user.id} %{USER:user.name} \\[%{HTTPDATE:@timestamp}\\] \"%{WORD:http.request.method} %{DATA:url.original} HTTP/%{NUMBER:http.version}\" %{NUMBER:http.response.status_code:int} (?:-|%{NUMBER:http.response.body.bytes:int}) %{QS:http.request.referrer} %{QS:user_agent}"]
|
||
}
|
||
},
|
||
{
|
||
"date": {
|
||
"description": "Format '@timestamp' as 'dd/MMM/yyyy:HH:mm:ss Z'",
|
||
"field": "@timestamp",
|
||
"formats": [ "dd/MMM/yyyy:HH:mm:ss Z" ]
|
||
}
|
||
},
|
||
{
|
||
"geoip": {
|
||
"description": "Add 'source.geo' GeoIP data for 'source.ip'",
|
||
"field": "source.ip",
|
||
"target_field": "source.geo"
|
||
}
|
||
},
|
||
{
|
||
"user_agent": {
|
||
"description": "Extract fields from 'user_agent'",
|
||
"field": "user_agent"
|
||
}
|
||
}
|
||
]
|
||
// end::common-log-pipeline[]
|
||
}
|
||
----
|
||
////
|
||
--
|
||
|
||
. To test the pipeline, click **Add documents**.
|
||
|
||
. In the **Documents** tab, provide a sample document for testing:
|
||
+
|
||
[source,js]
|
||
----
|
||
[
|
||
{
|
||
"_source": {
|
||
"message": "212.87.37.154 - - [05/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\" 200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\""
|
||
}
|
||
}
|
||
]
|
||
----
|
||
// NOTCONSOLE
|
||
|
||
. Click **Run the pipeline** and verify the pipeline worked as expected.
|
||
|
||
. If everything looks correct, close the panel, and then click **Create
|
||
pipeline**.
|
||
+
|
||
You’re now ready to index the logs data to a <<data-streams,data stream>>.
|
||
|
||
. Create an <<index-templates,index template>> with
|
||
<<create-index-template,data stream enabled>>.
|
||
+
|
||
[source,console]
|
||
----
|
||
PUT _index_template/my-data-stream-template
|
||
{
|
||
"index_patterns": [ "my-data-stream*" ],
|
||
"data_stream": { },
|
||
"priority": 500
|
||
}
|
||
----
|
||
// TEST[continued]
|
||
|
||
. Index a document with the pipeline you created.
|
||
+
|
||
[source,console]
|
||
----
|
||
POST my-data-stream/_doc?pipeline=my-pipeline
|
||
{
|
||
"message": "212.87.37.154 - - [05/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\" 200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\""
|
||
}
|
||
----
|
||
// TEST[s/my-pipeline/my-pipeline&refresh=wait_for/]
|
||
// TEST[continued]
|
||
|
||
. To verify, search the data stream to retrieve the document. The following
|
||
search uses <<common-options-response-filtering,`filter_path`>> to return only
|
||
the <<mapping-source-field,document source>>.
|
||
+
|
||
--
|
||
[source,console]
|
||
----
|
||
GET my-data-stream/_search?filter_path=hits.hits._source
|
||
----
|
||
// TEST[continued]
|
||
|
||
The API returns:
|
||
|
||
[source,console-result]
|
||
----
|
||
{
|
||
"hits": {
|
||
"hits": [
|
||
{
|
||
"_source": {
|
||
"@timestamp": "2099-05-05T16:21:15.000Z",
|
||
"http": {
|
||
"request": {
|
||
"referrer": "\"-\"",
|
||
"method": "GET"
|
||
},
|
||
"response": {
|
||
"status_code": 200,
|
||
"body": {
|
||
"bytes": 3638
|
||
}
|
||
},
|
||
"version": "1.1"
|
||
},
|
||
"source": {
|
||
"ip": "212.87.37.154",
|
||
"geo": {
|
||
"continent_name": "Europe",
|
||
"region_iso_code": "DE-BE",
|
||
"city_name": "Berlin",
|
||
"country_iso_code": "DE",
|
||
"country_name": "Germany",
|
||
"region_name": "Land Berlin",
|
||
"location": {
|
||
"lon": 13.4978,
|
||
"lat": 52.411
|
||
}
|
||
}
|
||
},
|
||
"message": "212.87.37.154 - - [05/May/2099:16:21:15 +0000] \"GET /favicon.ico HTTP/1.1\" 200 3638 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\"",
|
||
"url": {
|
||
"original": "/favicon.ico"
|
||
},
|
||
"user": {
|
||
"name": "-",
|
||
"id": "-"
|
||
},
|
||
"user_agent": {
|
||
"original": "\"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36\"",
|
||
"os": {
|
||
"name": "Mac OS X",
|
||
"version": "10.11.6",
|
||
"full": "Mac OS X 10.11.6"
|
||
},
|
||
"name": "Chrome",
|
||
"device": {
|
||
"name": "Mac"
|
||
},
|
||
"version": "52.0.2743.116"
|
||
}
|
||
}
|
||
}
|
||
]
|
||
}
|
||
}
|
||
----
|
||
--
|
||
|
||
////
|
||
[source,console]
|
||
----
|
||
DELETE _data_stream/*
|
||
DELETE _index_template/*
|
||
----
|
||
// TEST[continued]
|
||
////
|