mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-25 07:37:19 -04:00
* Remove `es-test-dir` book-scoped variable * Remove `plugins-examples-dir` book-scoped variable * Remove `:dependencies-dir:` and `:xes-repo-dir:` book-scoped variables - In `index.asciidoc`, two variables (`:dependencies-dir:` and `:xes-repo-dir:`) were removed. - In `sql/index.asciidoc`, the `:sql-tests:` path was updated to fuller path - In `esql/index.asciidoc`, the `:esql-tests:` path was updated idem * Replace `es-repo-dir` with `es-ref-dir` * Move `:include-xpack: true` to few files that use it, remove from index.asciidoc
292 lines
11 KiB
Text
292 lines
11 KiB
Text
[role="xpack"]
|
|
[[find-message-structure]]
|
|
= Find messages structure API
|
|
|
|
Finds the structure of a list of text messages.
|
|
|
|
[discrete]
|
|
[[find-message-structure-request]]
|
|
== {api-request-title}
|
|
|
|
`GET _text_structure/find_message_structure` +
|
|
`POST _text_structure/find_message_structure`
|
|
|
|
[discrete]
|
|
[[find-message-structure-prereqs]]
|
|
== {api-prereq-title}
|
|
|
|
* If the {es} {security-features} are enabled, you must have `monitor_text_structure` or
|
|
`monitor` cluster privileges to use this API. See
|
|
<<security-privileges>>.
|
|
|
|
[discrete]
|
|
[[find-message-structure-desc]]
|
|
== {api-description-title}
|
|
|
|
This API provides a starting point for ingesting data into {es} in a format that
|
|
is suitable for subsequent use with other {stack} functionality. Use this
|
|
API in preference to `find_structure` when your input text has already been
|
|
split up into separate messages by some other process.
|
|
|
|
The response from the API contains:
|
|
|
|
* Sample messages.
|
|
* Statistics that reveal the most common values for all fields detected within
|
|
the text and basic numeric statistics for numeric fields.
|
|
* Information about the structure of the text, which is useful when you write
|
|
ingest configurations to index it or similarly formatted text.
|
|
* Appropriate mappings for an {es} index, which you could use to ingest the text.
|
|
|
|
All this information can be calculated by the structure finder with no guidance.
|
|
However, you can optionally override some of the decisions about the text
|
|
structure by specifying one or more query parameters.
|
|
|
|
Details of the output can be seen in the <<find-message-structure-examples,examples>>.
|
|
|
|
If the structure finder produces unexpected results,
|
|
specify the `explain` query parameter and an `explanation` will appear in
|
|
the response. It helps determine why the returned structure was
|
|
chosen.
|
|
|
|
[discrete]
|
|
[[find-message-structure-query-parms]]
|
|
== {api-query-parms-title}
|
|
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-column-names]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-delimiter]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-explain]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-format]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-grok-pattern]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-ecs-compatibility]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-quote]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-should-trim-fields]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timeout]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-field]
|
|
include::{es-ref-dir}/text-structure/apis/find-structure-shared.asciidoc[tag=param-timestamp-format]
|
|
|
|
[discrete]
|
|
[[find-message-structure-request-body]]
|
|
== {api-request-body-title}
|
|
|
|
`messages`::
|
|
(Required, array of strings)
|
|
The list of messages you want to analyze.
|
|
|
|
[discrete]
|
|
[[find-message-structure-examples]]
|
|
== {api-examples-title}
|
|
|
|
[discrete]
|
|
[[find-message-structure-example]]
|
|
=== Analyzing Elasticsearch log files
|
|
|
|
Suppose you have a list of {es} logs messages.
|
|
You can send it to the `find_message_structure` endpoint as follows:
|
|
|
|
[source,console]
|
|
----
|
|
POST _text_structure/find_message_structure
|
|
{
|
|
"messages": [
|
|
"[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128",
|
|
"[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService ] [laptop] loaded module [repository-url]",
|
|
"[2024-03-05T10:52:41,042][INFO ][o.e.p.PluginsService ] [laptop] loaded module [rest-root]",
|
|
"[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-core]",
|
|
"[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-redact]",
|
|
"[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService ] [laptop] loaded module [ingest-user-agent]",
|
|
"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-monitoring]",
|
|
"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [repository-s3]",
|
|
"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-analytics]",
|
|
"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-ent-search]",
|
|
"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-autoscaling]",
|
|
"[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [lang-painless]]",
|
|
"[2024-03-05T10:52:41,059][INFO ][o.e.p.PluginsService ] [laptop] loaded module [lang-expression]",
|
|
"[2024-03-05T10:52:41,059][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-eql]",
|
|
"[2024-03-05T10:52:43,291][INFO ][o.e.e.NodeEnvironment ] [laptop] heap size [16gb], compressed ordinary object pointers [true]",
|
|
"[2024-03-05T10:52:46,098][INFO ][o.e.x.s.Security ] [laptop] Security is enabled",
|
|
"[2024-03-05T10:52:47,227][INFO ][o.e.x.p.ProfilingPlugin ] [laptop] Profiling is enabled",
|
|
"[2024-03-05T10:52:47,259][INFO ][o.e.x.p.ProfilingPlugin ] [laptop] profiling index templates will not be installed or reinstalled",
|
|
"[2024-03-05T10:52:47,755][INFO ][o.e.i.r.RecoverySettings ] [laptop] using rate limit [40mb] with [default=40mb, read=0b, write=0b, max=0b]",
|
|
"[2024-03-05T10:52:47,787][INFO ][o.e.d.DiscoveryModule ] [laptop] using discovery type [multi-node] and seed hosts providers [settings]",
|
|
"[2024-03-05T10:52:49,188][INFO ][o.e.n.Node ] [laptop] initialized",
|
|
"[2024-03-05T10:52:49,199][INFO ][o.e.n.Node ] [laptop] starting ..."
|
|
]
|
|
}
|
|
----
|
|
// TEST
|
|
|
|
If the request does not encounter errors, you receive the following result:
|
|
|
|
[source,console-result]
|
|
----
|
|
{
|
|
"num_lines_analyzed" : 22,
|
|
"num_messages_analyzed" : 22,
|
|
"sample_start" : "[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128\n[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService ] [laptop] loaded module [repository-url]\n", <3>
|
|
"charset" : "UTF-8",
|
|
"format" : "semi_structured_text",
|
|
"multiline_start_pattern" : "^\\[\\b\\d{4}-\\d{2}-\\d{2}[T ]\\d{2}:\\d{2}",
|
|
"grok_pattern" : "\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*",
|
|
"ecs_compatibility" : "disabled",
|
|
"timestamp_field" : "timestamp",
|
|
"joda_timestamp_formats" : [
|
|
"ISO8601"
|
|
],
|
|
"java_timestamp_formats" : [
|
|
"ISO8601"
|
|
],
|
|
"need_client_timezone" : true,
|
|
"mappings" : {
|
|
"properties" : {
|
|
"@timestamp" : {
|
|
"type" : "date"
|
|
},
|
|
"loglevel" : {
|
|
"type" : "keyword"
|
|
},
|
|
"message" : {
|
|
"type" : "text"
|
|
}
|
|
}
|
|
},
|
|
"ingest_pipeline" : {
|
|
"description" : "Ingest pipeline created by text structure finder",
|
|
"processors" : [
|
|
{
|
|
"grok" : {
|
|
"field" : "message",
|
|
"patterns" : [
|
|
"\\[%{TIMESTAMP_ISO8601:timestamp}\\]\\[%{LOGLEVEL:loglevel} \\]\\[.*"
|
|
],
|
|
"ecs_compatibility" : "disabled"
|
|
}
|
|
},
|
|
{
|
|
"date" : {
|
|
"field" : "timestamp",
|
|
"timezone" : "{{ event.timezone }}",
|
|
"formats" : [
|
|
"ISO8601"
|
|
]
|
|
}
|
|
},
|
|
{
|
|
"remove" : {
|
|
"field" : "timestamp"
|
|
}
|
|
}
|
|
]
|
|
},
|
|
"field_stats" : {
|
|
"loglevel" : {
|
|
"count" : 22,
|
|
"cardinality" : 1,
|
|
"top_hits" : [
|
|
{
|
|
"value" : "INFO",
|
|
"count" : 22
|
|
}
|
|
]
|
|
},
|
|
"message" : {
|
|
"count" : 22,
|
|
"cardinality" : 22,
|
|
"top_hits" : [
|
|
{
|
|
"value" : "[2024-03-05T10:52:36,256][INFO ][o.a.l.u.VectorUtilPanamaProvider] [laptop] Java vector incubator API enabled; uses preferredBitSize=128",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,038][INFO ][o.e.p.PluginsService ] [laptop] loaded module [repository-url]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,042][INFO ][o.e.p.PluginsService ] [laptop] loaded module [rest-root]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService ] [laptop] loaded module [ingest-user-agent]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-core]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,043][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-redact]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [lang-painless]]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [repository-s3]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-analytics]",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "[2024-03-05T10:52:41,044][INFO ][o.e.p.PluginsService ] [laptop] loaded module [x-pack-autoscaling]",
|
|
"count" : 1
|
|
}
|
|
]
|
|
},
|
|
"timestamp" : {
|
|
"count" : 22,
|
|
"cardinality" : 14,
|
|
"earliest" : "2024-03-05T10:52:36,256",
|
|
"latest" : "2024-03-05T10:52:49,199",
|
|
"top_hits" : [
|
|
{
|
|
"value" : "2024-03-05T10:52:41,044",
|
|
"count" : 6
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:41,043",
|
|
"count" : 3
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:41,059",
|
|
"count" : 2
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:36,256",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:41,038",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:41,042",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:43,291",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:46,098",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:47,227",
|
|
"count" : 1
|
|
},
|
|
{
|
|
"value" : "2024-03-05T10:52:47,259",
|
|
"count" : 1
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
----
|
|
// TESTRESPONSE
|
|
|
|
For a detailed description of the response format, or for additional examples
|
|
on ingesting delimited text (such as CSV) or newline-delimited JSON, refer to the
|
|
<<find-structure-examples,examples of the find text structure endpoint>>.
|