From 693807a6d39050f685686f95f4acfc0e249f330f Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Wed, 31 Mar 2021 09:57:47 -0400 Subject: [PATCH] [DOCS] Fix double spaces (#71082) --- TESTING.asciidoc | 8 +- docs/community-clients/index.asciidoc | 2 +- .../high-level/document/term-vectors.asciidoc | 2 +- .../high-level/indices/analyze.asciidoc | 2 +- .../high-level/indices/freeze_index.asciidoc | 2 +- .../high-level/indices/get_settings.asciidoc | 2 +- .../high-level/indices/open_index.asciidoc | 2 +- .../indices/unfreeze_index.asciidoc | 2 +- .../high-level/licensing/start-basic.asciidoc | 2 +- .../high-level/licensing/start-trial.asciidoc | 2 +- .../rollup/get_rollup_caps.asciidoc | 2 +- .../rollup/get_rollup_index_caps.asciidoc | 4 +- .../security/has-privileges.asciidoc | 2 +- .../snapshot/delete_snapshot.asciidoc | 2 +- .../low-level/configuration.asciidoc | 4 +- docs/java-rest/low-level/sniffer.asciidoc | 2 +- ...ainless-bucket-script-agg-context.asciidoc | 4 +- ...nless-bucket-selector-agg-context.asciidoc | 4 +- .../painless-metric-agg-init-context.asciidoc | 2 +- .../painless-metric-agg-map-context.asciidoc | 2 +- .../painless-score-context.asciidoc | 4 +- .../painless-similarity-context.asciidoc | 10 +- .../painless-sort-context.asciidoc | 4 +- .../painless-weight-context.asciidoc | 8 +- .../painless-casting.asciidoc | 2 +- .../painless-functions.asciidoc | 2 +- .../painless-literals.asciidoc | 4 +- .../painless-operators-general.asciidoc | 6 +- .../painless-operators-numeric.asciidoc | 6 +- .../painless-operators.asciidoc | 4 +- .../painless-types.asciidoc | 4 +- docs/plugins/analysis-icu.asciidoc | 18 ++-- docs/plugins/analysis-kuromoji.asciidoc | 8 +- docs/plugins/analysis-phonetic.asciidoc | 6 +- docs/plugins/analysis-stempel.asciidoc | 2 +- docs/plugins/authors.asciidoc | 4 +- docs/plugins/discovery-azure-classic.asciidoc | 2 +- docs/plugins/discovery-gce.asciidoc | 2 +- docs/plugins/mapper-annotated-text.asciidoc | 2 +- docs/plugins/plugin-script.asciidoc | 4 +- docs/plugins/redirects.asciidoc | 2 +- docs/plugins/repository-gcs.asciidoc | 4 +- docs/plugins/repository-s3.asciidoc | 10 +- .../repository-shared-settings.asciidoc | 2 +- docs/reference/aggregations.asciidoc | 2 +- .../autodatehistogram-aggregation.asciidoc | 2 +- .../bucket/composite-aggregation.asciidoc | 4 +- .../bucket/datehistogram-aggregation.asciidoc | 2 +- .../bucket/filters-aggregation.asciidoc | 2 +- .../bucket/histogram-aggregation.asciidoc | 4 +- .../bucket/range-field-note.asciidoc | 6 +- .../bucket/rare-terms-aggregation.asciidoc | 54 +++++------ .../significantterms-aggregation.asciidoc | 8 +- .../significanttext-aggregation.asciidoc | 4 +- .../bucket/terms-aggregation.asciidoc | 14 +-- .../metrics/boxplot-aggregation.asciidoc | 8 +- .../metrics/cardinality-aggregation.asciidoc | 4 +- .../extendedstats-aggregation.asciidoc | 8 +- .../metrics/percentile-aggregation.asciidoc | 34 +++---- .../percentile-rank-aggregation.asciidoc | 6 +- .../metrics/t-test-aggregation.asciidoc | 2 +- .../metrics/weighted-avg-aggregation.asciidoc | 12 +-- docs/reference/aggregations/pipeline.asciidoc | 18 ++-- ...umulative-cardinality-aggregation.asciidoc | 10 +- .../pipeline/derivative-aggregation.asciidoc | 2 +- .../pipeline/movfn-aggregation.asciidoc | 80 ++++++++-------- .../moving-percentiles-aggregation.asciidoc | 6 +- .../percentiles-bucket-aggregation.asciidoc | 2 +- .../pipeline/serial-diff-aggregation.asciidoc | 10 +- docs/reference/analysis/analyzers.asciidoc | 4 +- .../analysis/analyzers/configuring.asciidoc | 8 +- .../analyzers/custom-analyzer.asciidoc | 2 +- .../analyzers/fingerprint-analyzer.asciidoc | 10 +- .../analysis/analyzers/lang-analyzer.asciidoc | 4 +- .../analyzers/pattern-analyzer.asciidoc | 4 +- .../analyzers/standard-analyzer.asciidoc | 4 +- .../analysis/analyzers/stop-analyzer.asciidoc | 6 +- docs/reference/analysis/anatomy.asciidoc | 8 +- docs/reference/analysis/charfilters.asciidoc | 2 +- .../charfilters/mapping-charfilter.asciidoc | 2 +- .../multiplexer-tokenfilter.asciidoc | 6 +- .../synonym-graph-tokenfilter.asciidoc | 12 +-- .../tokenfilters/synonym-tokenfilter.asciidoc | 8 +- docs/reference/analysis/tokenizers.asciidoc | 6 +- .../tokenizers/edgengram-tokenizer.asciidoc | 12 +-- .../tokenizers/keyword-tokenizer.asciidoc | 8 +- .../tokenizers/lowercase-tokenizer.asciidoc | 2 +- .../tokenizers/ngram-tokenizer.asciidoc | 14 +-- .../pathhierarchy-tokenizer.asciidoc | 12 +-- .../tokenizers/pattern-tokenizer.asciidoc | 4 +- docs/reference/api-conventions.asciidoc | 4 +- docs/reference/cat.asciidoc | 8 +- docs/reference/cat/nodes.asciidoc | 2 +- .../cluster/allocation-explain.asciidoc | 2 +- .../cluster/nodes-hot-threads.asciidoc | 2 +- docs/reference/cluster/nodes-info.asciidoc | 2 +- docs/reference/cluster/nodes-stats.asciidoc | 2 +- docs/reference/cluster/tasks.asciidoc | 2 +- .../cluster/voting-exclusions.asciidoc | 2 +- docs/reference/commands/node-tool.asciidoc | 6 +- docs/reference/commands/shard-tool.asciidoc | 2 +- .../data-streams/data-streams.asciidoc | 2 +- .../promote-data-stream-api.asciidoc | 2 +- docs/reference/docs/data-replication.asciidoc | 2 +- docs/reference/docs/delete-by-query.asciidoc | 2 +- docs/reference/docs/get.asciidoc | 2 +- docs/reference/docs/index_.asciidoc | 2 +- docs/reference/docs/termvectors.asciidoc | 2 +- docs/reference/docs/update-by-query.asciidoc | 2 +- docs/reference/docs/update.asciidoc | 4 +- docs/reference/eql/functions.asciidoc | 2 +- docs/reference/ilm/ilm-skip-rollover.asciidoc | 2 +- docs/reference/index-modules.asciidoc | 8 +- .../index-modules/allocation/delayed.asciidoc | 4 +- .../allocation/prioritization.asciidoc | 2 +- .../allocation/total_shards.asciidoc | 4 +- docs/reference/index-modules/merge.asciidoc | 4 +- docs/reference/indices/clone-index.asciidoc | 2 +- .../reference/indices/rollover-index.asciidoc | 4 +- docs/reference/indices/shrink-index.asciidoc | 4 +- docs/reference/indices/split-index.asciidoc | 6 +- docs/reference/ingest/enrich.asciidoc | 6 +- .../ingest/processors/dissect.asciidoc | 2 +- .../reference/ingest/processors/sort.asciidoc | 2 +- docs/reference/intro.asciidoc | 2 +- .../licensing/delete-license.asciidoc | 2 +- docs/reference/licensing/start-basic.asciidoc | 2 +- .../mapping/dynamic/field-mapping.asciidoc | 2 +- .../mapping/dynamic/templates.asciidoc | 4 +- docs/reference/mapping/fields.asciidoc | 2 +- .../mapping/fields/field-names-field.asciidoc | 2 +- .../mapping/fields/index-field.asciidoc | 2 +- .../mapping/fields/routing-field.asciidoc | 6 +- .../mapping/fields/source-field.asciidoc | 4 +- docs/reference/mapping/params/coerce.asciidoc | 4 +- .../mapping/params/doc-values.asciidoc | 2 +- .../reference/mapping/params/dynamic.asciidoc | 2 +- .../reference/mapping/params/enabled.asciidoc | 6 +- docs/reference/mapping/params/format.asciidoc | 2 +- .../mapping/params/ignore-malformed.asciidoc | 4 +- .../mapping/params/index-phrases.asciidoc | 6 +- .../mapping/params/index-prefixes.asciidoc | 8 +- docs/reference/mapping/params/meta.asciidoc | 2 +- .../mapping/params/multi-fields.asciidoc | 8 +- docs/reference/mapping/params/norms.asciidoc | 2 +- .../mapping/params/null-value.asciidoc | 6 +- .../params/position-increment-gap.asciidoc | 2 +- .../mapping/params/properties.asciidoc | 4 +- docs/reference/mapping/params/store.asciidoc | 6 +- .../mapping/removal_of_types.asciidoc | 2 +- docs/reference/mapping/types.asciidoc | 2 +- docs/reference/mapping/types/alias.asciidoc | 2 +- docs/reference/mapping/types/array.asciidoc | 14 +-- docs/reference/mapping/types/boolean.asciidoc | 4 +- docs/reference/mapping/types/date.asciidoc | 6 +- .../mapping/types/geo-shape.asciidoc | 10 +- .../mapping/types/histogram.asciidoc | 2 +- docs/reference/mapping/types/keyword.asciidoc | 4 +- docs/reference/mapping/types/nested.asciidoc | 8 +- docs/reference/mapping/types/numeric.asciidoc | 10 +- docs/reference/mapping/types/object.asciidoc | 2 +- .../mapping/types/percolator.asciidoc | 2 +- docs/reference/mapping/types/shape.asciidoc | 4 +- docs/reference/mapping/types/text.asciidoc | 12 +-- .../mapping/types/token-count.asciidoc | 2 +- .../reference/mapping/types/wildcard.asciidoc | 4 +- docs/reference/migration/migrate_8_0.asciidoc | 2 +- .../migration/migrate_8_0/mappings.asciidoc | 10 +- .../anomaly-detection/apis/close-job.asciidoc | 2 +- .../anomaly-detection/apis/flush-job.asciidoc | 2 +- .../apis/get-ml-info.asciidoc | 4 +- .../apis/get-overall-buckets.asciidoc | 2 +- .../apis/start-datafeed.asciidoc | 4 +- .../apis/put-dfanalytics.asciidoc | 6 +- docs/reference/ml/ml-shared.asciidoc | 2 +- .../modules/cluster/disk_allocator.asciidoc | 4 +- docs/reference/modules/cluster/misc.asciidoc | 6 +- .../cluster/shards_allocation.asciidoc | 10 +- .../modules/discovery/bootstrapping.asciidoc | 2 +- .../discovery/discovery-settings.asciidoc | 2 +- .../modules/discovery/discovery.asciidoc | 4 +- .../modules/discovery/publishing.asciidoc | 2 +- .../modules/discovery/quorums.asciidoc | 2 +- .../modules/indices/indexing_buffer.asciidoc | 6 +- .../modules/indices/request_cache.asciidoc | 8 +- docs/reference/modules/node.asciidoc | 6 +- docs/reference/modules/threadpool.asciidoc | 2 +- .../configuring-metricbeat.asciidoc | 4 +- docs/reference/query-dsl.asciidoc | 2 +- docs/reference/query-dsl/bool-query.asciidoc | 6 +- .../query-dsl/compound-queries.asciidoc | 6 +- .../query-dsl/function-score-query.asciidoc | 2 +- .../query-dsl/intervals-query.asciidoc | 4 +- .../query-dsl/joining-queries.asciidoc | 2 +- .../query-dsl/multi-match-query.asciidoc | 26 +++--- .../query-dsl/percolate-query.asciidoc | 2 +- .../query-dsl/query-string-syntax.asciidoc | 10 +- .../query-dsl/query_filter_context.asciidoc | 4 +- .../query-dsl/special-queries.asciidoc | 2 +- docs/reference/redirects.asciidoc | 4 +- docs/reference/rest-api/common-parms.asciidoc | 2 +- docs/reference/rollup/apis/get-job.asciidoc | 2 +- .../rollup/apis/rollup-caps.asciidoc | 8 +- .../rollup/apis/rollup-index-caps.asciidoc | 4 +- docs/reference/rollup/index.asciidoc | 2 +- docs/reference/rollup/overview.asciidoc | 40 ++++---- .../rollup/rollup-agg-limitations.asciidoc | 2 +- .../rollup/rollup-getting-started.asciidoc | 42 ++++----- .../rollup/rollup-search-limitations.asciidoc | 32 +++---- .../rollup/understanding-groups.asciidoc | 22 ++--- docs/reference/scripting/fields.asciidoc | 6 +- docs/reference/scripting/security.asciidoc | 6 +- docs/reference/search/multi-search.asciidoc | 4 +- docs/reference/search/profile.asciidoc | 92 +++++++++---------- .../reference/search/search-template.asciidoc | 2 +- .../collapse-search-results.asciidoc | 4 +- .../filter-search-results.asciidoc | 14 +-- .../search-your-data/highlighting.asciidoc | 18 ++-- .../paginate-search-results.asciidoc | 6 +- .../sort-search-results.asciidoc | 2 +- docs/reference/search/search.asciidoc | 2 +- .../suggesters/completion-suggest.asciidoc | 6 +- .../search/suggesters/phrase-suggest.asciidoc | 10 +- .../settings/monitoring-settings.asciidoc | 2 +- .../settings/notification-settings.asciidoc | 2 +- .../settings/security-settings.asciidoc | 20 ++-- .../setup/bootstrap-checks-xes.asciidoc | 2 +- .../reference/setup/bootstrap-checks.asciidoc | 2 +- docs/reference/setup/install.asciidoc | 4 +- docs/reference/setup/install/deb.asciidoc | 2 +- docs/reference/setup/install/docker.asciidoc | 6 +- .../setup/install/etc-elasticsearch.asciidoc | 2 +- .../setup/install/next-steps.asciidoc | 2 +- docs/reference/setup/install/rpm.asciidoc | 4 +- docs/reference/setup/install/targz.asciidoc | 4 +- docs/reference/setup/install/windows.asciidoc | 4 +- .../setup/install/zip-windows.asciidoc | 20 ++-- docs/reference/setup/sysconfig.asciidoc | 6 +- .../setup/sysconfig/configuring.asciidoc | 4 +- .../sysconfig/executable-jna-tmpdir.asciidoc | 2 +- .../setup/sysconfig/file-descriptors.asciidoc | 2 +- docs/reference/setup/sysconfig/swap.asciidoc | 4 +- .../setup/sysconfig/virtual-memory.asciidoc | 6 +- .../apis/create-snapshot-api.asciidoc | 2 +- .../apis/repo-analysis-api.asciidoc | 2 +- .../monitor-snapshot-restore.asciidoc | 2 +- docs/reference/sql/endpoints/jdbc.asciidoc | 2 +- .../sql/endpoints/odbc/configuration.asciidoc | 2 +- docs/reference/sql/functions/math.asciidoc | 2 +- .../transform/apis/put-transform.asciidoc | 2 +- .../transform/transforms-at-scale.asciidoc | 2 +- .../upgrade/disable-shard-alloc.asciidoc | 2 +- .../upgrade/rolling_upgrade.asciidoc | 4 +- docs/reference/upgrade/upgrade-node.asciidoc | 2 +- docs/resiliency/index.asciidoc | 44 ++++----- .../rest-api-spec/test/README.asciidoc | 12 +-- .../rest-api/security/create-users.asciidoc | 2 +- .../rest-api/watcher/query-watches.asciidoc | 2 +- .../en/security/auditing/event-types.asciidoc | 8 +- ...onfiguring-active-directory-realm.asciidoc | 2 +- .../configuring-pki-realm.asciidoc | 4 +- .../authentication/ldap-realm.asciidoc | 2 +- .../authentication/oidc-guide.asciidoc | 6 +- .../security/authentication/realms.asciidoc | 2 +- .../authentication/saml-guide.asciidoc | 12 +-- .../custom-authorization.asciidoc | 8 +- .../authorization/managing-roles.asciidoc | 6 +- .../authorization/privileges.asciidoc | 2 +- .../authorization/set-security-user.asciidoc | 2 +- .../cross-cluster.asciidoc | 4 +- .../docs/en/security/configuring-es.asciidoc | 2 +- .../get-started-kibana-users.asciidoc | 2 +- .../en/security/get-started-security.asciidoc | 4 +- x-pack/docs/en/security/limitations.asciidoc | 2 +- .../security-basic-setup.asciidoc | 8 +- .../docs/en/security/troubleshooting.asciidoc | 2 +- x-pack/docs/en/watcher/actions/index.asciidoc | 2 +- x-pack/docs/en/watcher/actions/jira.asciidoc | 2 +- x-pack/docs/en/watcher/actions/slack.asciidoc | 2 +- .../en/watcher/how-watcher-works.asciidoc | 2 +- x-pack/docs/en/watcher/input/http.asciidoc | 2 +- .../docs/en/rest-api/idp-saml-init.asciidoc | 2 +- 282 files changed, 834 insertions(+), 834 deletions(-) diff --git a/TESTING.asciidoc b/TESTING.asciidoc index ebf1633e1d18..0d97b537d4eb 100644 --- a/TESTING.asciidoc +++ b/TESTING.asciidoc @@ -92,7 +92,7 @@ password: `elastic-password`. === Test case filtering. -You can run a single test, provided that you specify the Gradle project. See the documentation on +You can run a single test, provided that you specify the Gradle project. See the documentation on https://docs.gradle.org/current/userguide/userguide_single.html#simple_name_pattern[simple name pattern filtering]. Run a single test case in the `server` project: @@ -385,13 +385,13 @@ vagrant plugin install vagrant-cachier . You can run all of the OS packaging tests with `./gradlew packagingTest`. This task includes our legacy `bats` tests. To run only the OS tests that are written in Java, run `.gradlew distroTest`, will cause Gradle to build the tar, -zip, and deb packages and all the plugins. It will then run the tests on every +zip, and deb packages and all the plugins. It will then run the tests on every available system. This will take a very long time. + Fortunately, the various systems under test have their own Gradle tasks under `qa/os`. To find the systems tested, do a listing of the `qa/os` directory. To find out what packaging combinations can be tested on a system, run -the `tasks` task. For example: +the `tasks` task. For example: + ---------------------------------- ./gradlew :qa:os:ubuntu-1804:tasks @@ -558,7 +558,7 @@ fetching the latest from the remote. == Testing in FIPS 140-2 mode -We have a CI matrix job that periodically runs all our tests with the JVM configured +We have a CI matrix job that periodically runs all our tests with the JVM configured to be FIPS 140-2 compliant with the use of the BouncyCastle FIPS approved Security Provider. FIPS 140-2 imposes certain requirements that affect how our tests should be set up or what can be tested. This section summarizes what one needs to take into consideration so that diff --git a/docs/community-clients/index.asciidoc b/docs/community-clients/index.asciidoc index 363eb8f296db..4a7266aad055 100644 --- a/docs/community-clients/index.asciidoc +++ b/docs/community-clients/index.asciidoc @@ -150,7 +150,7 @@ Also see the {client}/php-api/current/index.html[official Elasticsearch PHP clie * https://github.com/nervetattoo/elasticsearch[elasticsearch] PHP client. -* https://github.com/madewithlove/elasticsearcher[elasticsearcher] Agnostic lightweight package on top of the Elasticsearch PHP client. Its main goal is to allow for easier structuring of queries and indices in your application. It does not want to hide or replace functionality of the Elasticsearch PHP client. +* https://github.com/madewithlove/elasticsearcher[elasticsearcher] Agnostic lightweight package on top of the Elasticsearch PHP client. Its main goal is to allow for easier structuring of queries and indices in your application. It does not want to hide or replace functionality of the Elasticsearch PHP client. [[python]] == Python diff --git a/docs/java-rest/high-level/document/term-vectors.asciidoc b/docs/java-rest/high-level/document/term-vectors.asciidoc index 36c7553d4885..65bb1eb0675f 100644 --- a/docs/java-rest/high-level/document/term-vectors.asciidoc +++ b/docs/java-rest/high-level/document/term-vectors.asciidoc @@ -51,7 +51,7 @@ offsets. payloads. <6> Set `filterSettings` to filter the terms that can be returned based on their tf-idf scores. -<7> Set `perFieldAnalyzer` to specify a different analyzer than +<7> Set `perFieldAnalyzer` to specify a different analyzer than the one that the field has. <8> Set `realtime` to `false` (default is `true`) to retrieve term vectors near realtime. diff --git a/docs/java-rest/high-level/indices/analyze.asciidoc b/docs/java-rest/high-level/indices/analyze.asciidoc index 9464394fd1eb..de3ac07542f7 100644 --- a/docs/java-rest/high-level/indices/analyze.asciidoc +++ b/docs/java-rest/high-level/indices/analyze.asciidoc @@ -20,7 +20,7 @@ The simplest version uses a built-in analyzer: include-tagged::{doc-tests-file}[{api}-builtin-request] --------------------------------------------------- <1> A built-in analyzer -<2> The text to include. Multiple strings are treated as a multi-valued field +<2> The text to include. Multiple strings are treated as a multi-valued field You can configure a custom analyzer: ["source","java",subs="attributes,callouts,macros"] diff --git a/docs/java-rest/high-level/indices/freeze_index.asciidoc b/docs/java-rest/high-level/indices/freeze_index.asciidoc index 2a26fe8bcd47..c3773aee80c3 100644 --- a/docs/java-rest/high-level/indices/freeze_index.asciidoc +++ b/docs/java-rest/high-level/indices/freeze_index.asciidoc @@ -38,7 +38,7 @@ include-tagged::{doc-tests-file}[{api}-request-masterTimeout] -------------------------------------------------- include-tagged::{doc-tests-file}[{api}-request-waitForActiveShards] -------------------------------------------------- -<1> The number of active shard copies to wait for before the freeze index API +<1> The number of active shard copies to wait for before the freeze index API returns a response, as an `ActiveShardCount` ["source","java",subs="attributes,callouts,macros"] diff --git a/docs/java-rest/high-level/indices/get_settings.asciidoc b/docs/java-rest/high-level/indices/get_settings.asciidoc index d0d30f257284..9eb7ec5099ea 100644 --- a/docs/java-rest/high-level/indices/get_settings.asciidoc +++ b/docs/java-rest/high-level/indices/get_settings.asciidoc @@ -25,7 +25,7 @@ The following arguments can optionally be provided: -------------------------------------------------- include-tagged::{doc-tests-file}[{api}-request-names] -------------------------------------------------- -<1> One or more settings that be the only settings retrieved. If unset, all settings will be retrieved +<1> One or more settings that be the only settings retrieved. If unset, all settings will be retrieved ["source","java",subs="attributes,callouts,macros"] -------------------------------------------------- diff --git a/docs/java-rest/high-level/indices/open_index.asciidoc b/docs/java-rest/high-level/indices/open_index.asciidoc index 84f038e154a4..7d0b042ffa07 100644 --- a/docs/java-rest/high-level/indices/open_index.asciidoc +++ b/docs/java-rest/high-level/indices/open_index.asciidoc @@ -43,7 +43,7 @@ include-tagged::{doc-tests-file}[{api}-request-waitForActiveShards] -------------------------------------------------- <1> The number of active shard copies to wait for before the open index API returns a response, as an `int` -<2> The number of active shard copies to wait for before the open index API +<2> The number of active shard copies to wait for before the open index API returns a response, as an `ActiveShardCount` ["source","java",subs="attributes,callouts,macros"] diff --git a/docs/java-rest/high-level/indices/unfreeze_index.asciidoc b/docs/java-rest/high-level/indices/unfreeze_index.asciidoc index 27e98581f0c7..03a4d16c9c57 100644 --- a/docs/java-rest/high-level/indices/unfreeze_index.asciidoc +++ b/docs/java-rest/high-level/indices/unfreeze_index.asciidoc @@ -37,7 +37,7 @@ include-tagged::{doc-tests-file}[{api}-request-masterTimeout] -------------------------------------------------- include-tagged::{doc-tests-file}[{api}-request-waitForActiveShards] -------------------------------------------------- -<1> The number of active shard copies to wait for before the unfreeze index API +<1> The number of active shard copies to wait for before the unfreeze index API returns a response, as an `ActiveShardCount` ["source","java",subs="attributes,callouts,macros"] diff --git a/docs/java-rest/high-level/licensing/start-basic.asciidoc b/docs/java-rest/high-level/licensing/start-basic.asciidoc index 3ff50cfd2db6..30f2c51a1c13 100644 --- a/docs/java-rest/high-level/licensing/start-basic.asciidoc +++ b/docs/java-rest/high-level/licensing/start-basic.asciidoc @@ -20,7 +20,7 @@ license started. If it was not started, it returns an error message describing why. Acknowledgement messages may also be returned if this API was called without -the `acknowledge` flag set to `true`. In this case you need to display the +the `acknowledge` flag set to `true`. In this case you need to display the messages to the end user and if they agree, resubmit the request with the `acknowledge` flag set to `true`. Please note that the response will still return a 200 return code even if it requires an acknowledgement. So, it is diff --git a/docs/java-rest/high-level/licensing/start-trial.asciidoc b/docs/java-rest/high-level/licensing/start-trial.asciidoc index 0f198a391f07..30c75e10f0a1 100644 --- a/docs/java-rest/high-level/licensing/start-trial.asciidoc +++ b/docs/java-rest/high-level/licensing/start-trial.asciidoc @@ -23,7 +23,7 @@ license started. If it was not started, it returns an error message describing why. Acknowledgement messages may also be returned if this API was called without -the `acknowledge` flag set to `true`. In this case you need to display the +the `acknowledge` flag set to `true`. In this case you need to display the messages to the end user and if they agree, resubmit the request with the `acknowledge` flag set to `true`. Please note that the response will still return a 200 return code even if it requires an acknowledgement. So, it is diff --git a/docs/java-rest/high-level/rollup/get_rollup_caps.asciidoc b/docs/java-rest/high-level/rollup/get_rollup_caps.asciidoc index f4c9240f7810..681ea25a18f8 100644 --- a/docs/java-rest/high-level/rollup/get_rollup_caps.asciidoc +++ b/docs/java-rest/high-level/rollup/get_rollup_caps.asciidoc @@ -40,7 +40,7 @@ include-tagged::{doc-tests-file}[x-pack-{api}-execute] The returned +{response}+ holds lists and maps of values which correspond to the capabilities of the target index/index pattern (what jobs were configured for the pattern, where the data is stored, what -aggregations are available, etc). It provides essentially the same data as the original job configuration, +aggregations are available, etc). It provides essentially the same data as the original job configuration, just presented in a different manner. For example, if we had created a job with the following config: diff --git a/docs/java-rest/high-level/rollup/get_rollup_index_caps.asciidoc b/docs/java-rest/high-level/rollup/get_rollup_index_caps.asciidoc index 2e08409d1e2f..06d546fb3c58 100644 --- a/docs/java-rest/high-level/rollup/get_rollup_index_caps.asciidoc +++ b/docs/java-rest/high-level/rollup/get_rollup_index_caps.asciidoc @@ -10,7 +10,7 @@ experimental::[] The Get Rollup Index Capabilities API allows the user to determine if a concrete index or index pattern contains -stored rollup jobs and data. If it contains data stored from rollup jobs, the capabilities of those jobs +stored rollup jobs and data. If it contains data stored from rollup jobs, the capabilities of those jobs are returned. The API accepts a `GetRollupIndexCapsRequest` object as a request and returns a `GetRollupIndexCapsResponse`. [id="{upid}-x-pack-{api}-request"] @@ -40,7 +40,7 @@ include-tagged::{doc-tests-file}[x-pack-{api}-execute] The returned +{response}+ holds lists and maps of values which correspond to the capabilities of the rollup index/index pattern (what jobs are stored in the index, their capabilities, what -aggregations are available, etc). Because multiple jobs can be stored in one index, the +aggregations are available, etc). Because multiple jobs can be stored in one index, the response may include several jobs with different configurations. The capabilities are essentially the same as the original job configuration, just presented in a different diff --git a/docs/java-rest/high-level/security/has-privileges.asciidoc b/docs/java-rest/high-level/security/has-privileges.asciidoc index 7c5f09a171ce..dfd92be6837f 100644 --- a/docs/java-rest/high-level/security/has-privileges.asciidoc +++ b/docs/java-rest/high-level/security/has-privileges.asciidoc @@ -62,7 +62,7 @@ if the privilege was not part of the request). A `Map>>>` where each key is the name of an application (as specified in the +{request}+). For each application, the value is a `Map` keyed by resource name, with -each value being another `Map` from privilege name to a `Boolean`. +each value being another `Map` from privilege name to a `Boolean`. The `Boolean` value is `true` if the user has that privilege on that resource for that application, and `false` otherwise. + diff --git a/docs/java-rest/high-level/snapshot/delete_snapshot.asciidoc b/docs/java-rest/high-level/snapshot/delete_snapshot.asciidoc index a594db5b6025..2f770e35333b 100644 --- a/docs/java-rest/high-level/snapshot/delete_snapshot.asciidoc +++ b/docs/java-rest/high-level/snapshot/delete_snapshot.asciidoc @@ -34,7 +34,7 @@ include-tagged::{doc-tests}/SnapshotClientDocumentationIT.java[delete-snapshot-e [[java-rest-high-snapshot-delete-snapshot-async]] ==== Asynchronous Execution -The asynchronous execution of a delete snapshot request requires both the +The asynchronous execution of a delete snapshot request requires both the `DeleteSnapshotRequest` instance and an `ActionListener` instance to be passed to the asynchronous method: diff --git a/docs/java-rest/low-level/configuration.asciidoc b/docs/java-rest/low-level/configuration.asciidoc index d368d8362f09..b112de6af843 100644 --- a/docs/java-rest/low-level/configuration.asciidoc +++ b/docs/java-rest/low-level/configuration.asciidoc @@ -150,7 +150,7 @@ should be consulted: https://hc.apache.org/httpcomponents-asyncclient-4.1.x/ . NOTE: If your application runs under the security manager you might be subject to the JVM default policies of caching positive hostname resolutions -indefinitely and negative hostname resolutions for ten seconds. If the resolved +indefinitely and negative hostname resolutions for ten seconds. If the resolved addresses of the hosts to which you are connecting the client to vary with time then you might want to modify the default JVM behavior. These can be modified by adding @@ -184,6 +184,6 @@ whenever none of the nodes from the preferred rack is available. WARNING: Node selectors that do not consistently select the same set of nodes will make round-robin behaviour unpredictable and possibly unfair. The -preference example above is fine as it reasons about availability of nodes +preference example above is fine as it reasons about availability of nodes which already affects the predictability of round-robin. Node selection should not depend on other external factors or round-robin will not work properly. diff --git a/docs/java-rest/low-level/sniffer.asciidoc b/docs/java-rest/low-level/sniffer.asciidoc index 84f1510bae43..54485cb7af87 100644 --- a/docs/java-rest/low-level/sniffer.asciidoc +++ b/docs/java-rest/low-level/sniffer.asciidoc @@ -97,7 +97,7 @@ include-tagged::{doc-tests}/SnifferDocumentation.java[sniff-on-failure] failure, but an additional sniffing round is also scheduled sooner than usual, by default one minute after the failure, assuming that things will go back to normal and we want to detect that as soon as possible. Said interval can be -customized at `Sniffer` creation time through the `setSniffAfterFailureDelayMillis` +customized at `Sniffer` creation time through the `setSniffAfterFailureDelayMillis` method. Note that this last configuration parameter has no effect in case sniffing on failure is not enabled like explained above. <3> Set the `Sniffer` instance to the failure listener diff --git a/docs/painless/painless-contexts/painless-bucket-script-agg-context.asciidoc b/docs/painless/painless-contexts/painless-bucket-script-agg-context.asciidoc index 584646680101..5f0dc32305a3 100644 --- a/docs/painless/painless-contexts/painless-bucket-script-agg-context.asciidoc +++ b/docs/painless/painless-contexts/painless-bucket-script-agg-context.asciidoc @@ -24,7 +24,7 @@ The standard <> is available. To run this example, first follow the steps in <>. -The painless context in a `bucket_script` aggregation provides a `params` map. This map contains both +The painless context in a `bucket_script` aggregation provides a `params` map. This map contains both user-specified custom values, as well as the values from other aggregations specified in the `buckets_path` property. @@ -36,7 +36,7 @@ and adds the user-specified base_cost to the result: (params.max - params.min) + params.base_cost -------------------------------------------------- -Note that the values are extracted from the `params` map. In context, the aggregation looks like this: +Note that the values are extracted from the `params` map. In context, the aggregation looks like this: [source,console] -------------------------------------------------- diff --git a/docs/painless/painless-contexts/painless-bucket-selector-agg-context.asciidoc b/docs/painless/painless-contexts/painless-bucket-selector-agg-context.asciidoc index 1c257cf2e72b..fb7b24240e36 100644 --- a/docs/painless/painless-contexts/painless-bucket-selector-agg-context.asciidoc +++ b/docs/painless/painless-contexts/painless-bucket-selector-agg-context.asciidoc @@ -26,7 +26,7 @@ The standard <> is available. To run this example, first follow the steps in <>. -The painless context in a `bucket_selector` aggregation provides a `params` map. This map contains both +The painless context in a `bucket_selector` aggregation provides a `params` map. This map contains both user-specified custom values, as well as the values from other aggregations specified in the `buckets_path` property. @@ -41,7 +41,7 @@ params.max + params.base_cost > 10 -------------------------------------------------- Note that the values are extracted from the `params` map. The script is in the form of an expression -that returns `true` or `false`. In context, the aggregation looks like this: +that returns `true` or `false`. In context, the aggregation looks like this: [source,console] -------------------------------------------------- diff --git a/docs/painless/painless-contexts/painless-metric-agg-init-context.asciidoc b/docs/painless/painless-contexts/painless-metric-agg-init-context.asciidoc index 78ebac79c65e..2d40fcf427a4 100644 --- a/docs/painless/painless-contexts/painless-metric-agg-init-context.asciidoc +++ b/docs/painless/painless-contexts/painless-metric-agg-init-context.asciidoc @@ -19,7 +19,7 @@ full metric aggregation. *Side Effects* `state` (`Map`):: - Add values to this `Map` to for use in a map. Additional values must + Add values to this `Map` to for use in a map. Additional values must be of the type `Map`, `List`, `String` or primitive. *Return* diff --git a/docs/painless/painless-contexts/painless-metric-agg-map-context.asciidoc b/docs/painless/painless-contexts/painless-metric-agg-map-context.asciidoc index 485d4da8439d..4c7ef36ddace 100644 --- a/docs/painless/painless-contexts/painless-metric-agg-map-context.asciidoc +++ b/docs/painless/painless-contexts/painless-metric-agg-map-context.asciidoc @@ -32,7 +32,7 @@ part of a full metric aggregation. primitive. The same `state` `Map` is shared between all aggregated documents on a given shard. If an initialization script is provided as part of the aggregation then values added from the initialization script are - available. If no combine script is specified, values must be + available. If no combine script is specified, values must be directly stored in `state` in a usable form. If no combine script and no <> are specified, the `state` values are used as the result. diff --git a/docs/painless/painless-contexts/painless-score-context.asciidoc b/docs/painless/painless-contexts/painless-score-context.asciidoc index 608764cf3dad..72fc86b0f9b7 100644 --- a/docs/painless/painless-contexts/painless-score-context.asciidoc +++ b/docs/painless/painless-contexts/painless-score-context.asciidoc @@ -11,8 +11,8 @@ score to documents returned from a query. User-defined parameters passed in as part of the query. `doc` (`Map`, read-only):: - Contains the fields of the current document. For single-valued fields, - the value can be accessed via `doc['fieldname'].value`. For multi-valued + Contains the fields of the current document. For single-valued fields, + the value can be accessed via `doc['fieldname'].value`. For multi-valued fields, this returns the first value; other values can be accessed via `doc['fieldname'].get(index)` diff --git a/docs/painless/painless-contexts/painless-similarity-context.asciidoc b/docs/painless/painless-contexts/painless-similarity-context.asciidoc index e48da21195dd..1e73860ec8da 100644 --- a/docs/painless/painless-contexts/painless-similarity-context.asciidoc +++ b/docs/painless/painless-contexts/painless-similarity-context.asciidoc @@ -11,19 +11,19 @@ documents in a query. The weight as calculated by a <> `query.boost` (`float`, read-only):: - The boost value if provided by the query. If this is not provided the + The boost value if provided by the query. If this is not provided the value is `1.0f`. `field.docCount` (`long`, read-only):: The number of documents that have a value for the current field. `field.sumDocFreq` (`long`, read-only):: - The sum of all terms that exist for the current field. If this is not + The sum of all terms that exist for the current field. If this is not available the value is `-1`. `field.sumTotalTermFreq` (`long`, read-only):: The sum of occurrences in the index for all the terms that exist in the - current field. If this is not available the value is `-1`. + current field. If this is not available the value is `-1`. `term.docFreq` (`long`, read-only):: The number of documents that contain the current term in the index. @@ -32,7 +32,7 @@ documents in a query. The total occurrences of the current term in the index. `doc.length` (`long`, read-only):: - The number of tokens the current document has in the current field. This + The number of tokens the current document has in the current field. This is decoded from the stored {ref}/norms.html[norms] and may be approximate for long fields @@ -45,7 +45,7 @@ Note that the `query`, `field`, and `term` variables are also available to the there, as they are constant for all documents. For queries that contain multiple terms, the script is called once for each -term with that term's calculated weight, and the results are summed. Note that some +term with that term's calculated weight, and the results are summed. Note that some terms might have a `doc.freq` value of `0` on a document, for example if a query uses synonyms. diff --git a/docs/painless/painless-contexts/painless-sort-context.asciidoc b/docs/painless/painless-contexts/painless-sort-context.asciidoc index fbcc85448caf..84b3e9ec135a 100644 --- a/docs/painless/painless-contexts/painless-sort-context.asciidoc +++ b/docs/painless/painless-contexts/painless-sort-context.asciidoc @@ -10,8 +10,8 @@ Use a Painless script to User-defined parameters passed in as part of the query. `doc` (`Map`, read-only):: - Contains the fields of the current document. For single-valued fields, - the value can be accessed via `doc['fieldname'].value`. For multi-valued + Contains the fields of the current document. For single-valued fields, + the value can be accessed via `doc['fieldname'].value`. For multi-valued fields, this returns the first value; other values can be accessed via `doc['fieldname'].get(index)` diff --git a/docs/painless/painless-contexts/painless-weight-context.asciidoc b/docs/painless/painless-contexts/painless-weight-context.asciidoc index 44438a1225ea..47b9df0e7cb6 100644 --- a/docs/painless/painless-contexts/painless-weight-context.asciidoc +++ b/docs/painless/painless-contexts/painless-weight-context.asciidoc @@ -3,7 +3,7 @@ Use a Painless script to create a {ref}/index-modules-similarity.html[weight] for use in a -<>. The weight makes up the +<>. The weight makes up the part of the similarity calculation that is independent of the document being scored, and so can be built up front and cached. @@ -12,19 +12,19 @@ Queries that contain multiple terms calculate a separate weight for each term. *Variables* `query.boost` (`float`, read-only):: - The boost value if provided by the query. If this is not provided the + The boost value if provided by the query. If this is not provided the value is `1.0f`. `field.docCount` (`long`, read-only):: The number of documents that have a value for the current field. `field.sumDocFreq` (`long`, read-only):: - The sum of all terms that exist for the current field. If this is not + The sum of all terms that exist for the current field. If this is not available the value is `-1`. `field.sumTotalTermFreq` (`long`, read-only):: The sum of occurrences in the index for all the terms that exist in the - current field. If this is not available the value is `-1`. + current field. If this is not available the value is `-1`. `term.docFreq` (`long`, read-only):: The number of documents that contain the current term in the index. diff --git a/docs/painless/painless-lang-spec/painless-casting.asciidoc b/docs/painless/painless-lang-spec/painless-casting.asciidoc index 25e7e345ba0f..48a82734507e 100644 --- a/docs/painless/painless-lang-spec/painless-casting.asciidoc +++ b/docs/painless/painless-lang-spec/painless-casting.asciidoc @@ -4,7 +4,7 @@ A cast converts the value of an original type to the equivalent value of a target type. An implicit cast infers the target type and automatically occurs during certain <>. An explicit cast specifies -the target type and forcefully occurs as its own operation. Use the `cast +the target type and forcefully occurs as its own operation. Use the `cast operator '()'` to specify an explicit cast. Refer to the <> for a quick reference on all diff --git a/docs/painless/painless-lang-spec/painless-functions.asciidoc b/docs/painless/painless-lang-spec/painless-functions.asciidoc index 20f3e821f1ed..535f3b94ea30 100644 --- a/docs/painless/painless-lang-spec/painless-functions.asciidoc +++ b/docs/painless/painless-lang-spec/painless-functions.asciidoc @@ -8,7 +8,7 @@ to repeat its specific task. A parameter is a named type value available as a function specifies zero-to-many parameters, and when a function is called a value is specified per parameter. An argument is a value passed into a function at the point of call. A function specifies a return type value, though if the -type is <> then no value is returned. Any non-void type return +type is <> then no value is returned. Any non-void type return value is available for use within an <> or is discarded otherwise. diff --git a/docs/painless/painless-lang-spec/painless-literals.asciidoc b/docs/painless/painless-lang-spec/painless-literals.asciidoc index f2e584963804..99c93dc858ad 100644 --- a/docs/painless/painless-lang-spec/painless-literals.asciidoc +++ b/docs/painless/painless-lang-spec/painless-literals.asciidoc @@ -11,7 +11,7 @@ Use an integer literal to specify an integer type value in decimal, octal, or hex notation of a <> `int`, `long`, `float`, or `double`. Use the following single letter designations to specify the primitive type: `l` or `L` for `long`, `f` or `F` for `float`, and `d` or `D` -for `double`. If not specified, the type defaults to `int`. Use `0` as a prefix +for `double`. If not specified, the type defaults to `int`. Use `0` as a prefix to specify an integer literal as octal, and use `0x` or `0X` as a prefix to specify an integer literal as hex. @@ -86,7 +86,7 @@ EXPONENT: ( [eE] [+\-]? [0-9]+ ); Use a string literal to specify a <> value with either single-quotes or double-quotes. Use a `\"` token to include a double-quote as part of a double-quoted string literal. Use a `\'` token to -include a single-quote as part of a single-quoted string literal. Use a `\\` +include a single-quote as part of a single-quoted string literal. Use a `\\` token to include a backslash as part of any string literal. *Grammar* diff --git a/docs/painless/painless-lang-spec/painless-operators-general.asciidoc b/docs/painless/painless-lang-spec/painless-operators-general.asciidoc index 6c17e36b3fc8..14e59e44d643 100644 --- a/docs/painless/painless-lang-spec/painless-operators-general.asciidoc +++ b/docs/painless/painless-lang-spec/painless-operators-general.asciidoc @@ -76,7 +76,7 @@ int z = add(1, 2); <2> ==== Cast An explicit cast converts the value of an original type to the equivalent value -of a target type forcefully as an operation. Use the `cast operator '()'` to +of a target type forcefully as an operation. Use the `cast operator '()'` to specify an explicit cast. Refer to <> for more information. @@ -85,7 +85,7 @@ information. A conditional consists of three expressions. The first expression is evaluated with an expected boolean result type. If the first expression evaluates to true -then the second expression will be evaluated. If the first expression evaluates +then the second expression will be evaluated. If the first expression evaluates to false then the third expression will be evaluated. The second and third expressions will be <> if the evaluated values are not the same type. Use the `conditional operator '? :'` as a shortcut to avoid the need @@ -254,7 +254,7 @@ V = (T)(V op expression); The table below shows the available operators for use in a compound assignment. Each operator follows the casting/promotion rules according to their regular -definition. For numeric operations there is an extra implicit cast when +definition. For numeric operations there is an extra implicit cast when necessary to return the promoted numeric type value to the original numeric type value of the variable/field and can result in data loss. diff --git a/docs/painless/painless-lang-spec/painless-operators-numeric.asciidoc b/docs/painless/painless-lang-spec/painless-operators-numeric.asciidoc index 1b08d9c3361c..f145dca19bc1 100644 --- a/docs/painless/painless-lang-spec/painless-operators-numeric.asciidoc +++ b/docs/painless/painless-lang-spec/painless-operators-numeric.asciidoc @@ -668,7 +668,7 @@ def y = x/2; <2> ==== Remainder Use the `remainder operator '%'` to calculate the REMAINDER for division -between two numeric type values. Rules for NaN values and division by zero follow the JVM +between two numeric type values. Rules for NaN values and division by zero follow the JVM specification. *Errors* @@ -809,7 +809,7 @@ def y = x+2; <2> ==== Subtraction Use the `subtraction operator '-'` to SUBTRACT a right-hand side numeric type -value from a left-hand side numeric type value. Rules for resultant overflow +value from a left-hand side numeric type value. Rules for resultant overflow and NaN values follow the JVM specification. *Errors* @@ -955,7 +955,7 @@ def y = x << 1; <2> Use the `right shift operator '>>'` to SHIFT higher order bits to lower order bits in a left-hand side integer type value by the distance specified in a -right-hand side integer type value. The highest order bit of the left-hand side +right-hand side integer type value. The highest order bit of the left-hand side integer type value is preserved. *Errors* diff --git a/docs/painless/painless-lang-spec/painless-operators.asciidoc b/docs/painless/painless-lang-spec/painless-operators.asciidoc index b105f4ef6faa..47e086e88d90 100644 --- a/docs/painless/painless-lang-spec/painless-operators.asciidoc +++ b/docs/painless/painless-lang-spec/painless-operators.asciidoc @@ -2,10 +2,10 @@ === Operators An operator is the most basic action that can be taken to evaluate values in a -script. An expression is one-to-many consecutive operations. Precedence is the +script. An expression is one-to-many consecutive operations. Precedence is the order in which an operator will be evaluated relative to another operator. Associativity is the direction within an expression in which a specific operator -is evaluated. The following table lists all available operators: +is evaluated. The following table lists all available operators: [cols="<6,<3,^3,^2,^4"] |==== diff --git a/docs/painless/painless-lang-spec/painless-types.asciidoc b/docs/painless/painless-lang-spec/painless-types.asciidoc index 4a9c6d9f3dd4..fca5fed1b12c 100644 --- a/docs/painless/painless-lang-spec/painless-types.asciidoc +++ b/docs/painless/painless-lang-spec/painless-types.asciidoc @@ -259,7 +259,7 @@ during operations. Declare a `def` type <> or access a `def` type member field (from a reference type instance), and assign it any type of value for evaluation during later operations. The default value for a newly-declared -`def` type variable is `null`. A `def` type variable or method/function +`def` type variable is `null`. A `def` type variable or method/function parameter can change the type it represents during the compilation and evaluation of a script. @@ -400,7 +400,7 @@ range `[2, d]` where `d >= 2`, each element within each dimension in the range `[1, d-1]` is also an array type. The element type of each dimension, `n`, is an array type with the number of dimensions equal to `d-n`. For example, consider `int[][][]` with 3 dimensions. Each element in the 3rd dimension, `d-3`, is the -primitive type `int`. Each element in the 2nd dimension, `d-2`, is the array +primitive type `int`. Each element in the 2nd dimension, `d-2`, is the array type `int[]`. And each element in the 1st dimension, `d-1` is the array type `int[][]`. diff --git a/docs/plugins/analysis-icu.asciidoc b/docs/plugins/analysis-icu.asciidoc index a8041e471001..ac5a32a3b535 100644 --- a/docs/plugins/analysis-icu.asciidoc +++ b/docs/plugins/analysis-icu.asciidoc @@ -12,7 +12,7 @@ transliteration. ================================================ From time to time, the ICU library receives updates such as adding new -characters and emojis, and improving collation (sort) orders. These changes +characters and emojis, and improving collation (sort) orders. These changes may or may not affect search and sort orders, depending on which characters sets you are using. @@ -38,11 +38,11 @@ The following parameters are accepted: `method`:: - Normalization method. Accepts `nfkc`, `nfc` or `nfkc_cf` (default) + Normalization method. Accepts `nfkc`, `nfc` or `nfkc_cf` (default) `mode`:: - Normalization mode. Accepts `compose` (default) or `decompose`. + Normalization mode. Accepts `compose` (default) or `decompose`. [[analysis-icu-normalization-charfilter]] ==== ICU Normalization Character Filter @@ -52,7 +52,7 @@ http://userguide.icu-project.org/transforms/normalization[here]. It registers itself as the `icu_normalizer` character filter, which is available to all indices without any further configuration. The type of normalization can be specified with the `name` parameter, which accepts `nfc`, -`nfkc`, and `nfkc_cf` (default). Set the `mode` parameter to `decompose` to +`nfkc`, and `nfkc_cf` (default). Set the `mode` parameter to `decompose` to convert `nfc` to `nfd` or `nfkc` to `nfkd` respectively: Which letters are normalized can be controlled by specifying the @@ -328,7 +328,7 @@ PUT icu_sample [WARNING] ====== -This token filter has been deprecated since Lucene 5.0. Please use +This token filter has been deprecated since Lucene 5.0. Please use <>. ====== @@ -404,7 +404,7 @@ The following parameters are accepted by `icu_collation_keyword` fields: `null_value`:: Accepts a string value which is substituted for any explicit `null` - values. Defaults to `null`, which means the field is treated as missing. + values. Defaults to `null`, which means the field is treated as missing. {ref}/ignore-above.html[`ignore_above`]:: @@ -434,7 +434,7 @@ The strength property determines the minimum level of difference considered significant during comparison. Possible values are : `primary`, `secondary`, `tertiary`, `quaternary` or `identical`. See the https://icu-project.org/apiref/icu4j/com/ibm/icu/text/Collator.html[ICU Collation documentation] -for a more detailed explanation for each value. Defaults to `tertiary` +for a more detailed explanation for each value. Defaults to `tertiary` unless otherwise specified in the collation. `decomposition`:: @@ -483,7 +483,7 @@ Single character or contraction. Controls what is variable for `alternate`. `hiragana_quaternary_mode`:: -Possible values: `true` or `false`. Distinguishing between Katakana and +Possible values: `true` or `false`. Distinguishing between Katakana and Hiragana characters in `quaternary` strength. @@ -495,7 +495,7 @@ case mapping, normalization, transliteration and bidirectional text handling. You can define which transformation you want to apply with the `id` parameter (defaults to `Null`), and specify text direction with the `dir` parameter -which accepts `forward` (default) for LTR and `reverse` for RTL. Custom +which accepts `forward` (default) for LTR and `reverse` for RTL. Custom rulesets are not yet supported. For example: diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index 4b1f3408882f..75bd6cc446d0 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -103,7 +103,7 @@ The `kuromoji_tokenizer` accepts the following settings: -- The tokenization mode determines how the tokenizer handles compound and -unknown words. It can be set to: +unknown words. It can be set to: `normal`:: @@ -403,11 +403,11 @@ form in either katakana or romaji. It accepts the following setting: `use_romaji`:: - Whether romaji reading form should be output instead of katakana. Defaults to `false`. + Whether romaji reading form should be output instead of katakana. Defaults to `false`. When using the pre-defined `kuromoji_readingform` filter, `use_romaji` is set to `true`. The default when defining a custom `kuromoji_readingform`, however, -is `false`. The only reason to use the custom form is if you need the +is `false`. The only reason to use the custom form is if you need the katakana reading form: [source,console] @@ -521,7 +521,7 @@ GET kuromoji_sample/_analyze The `ja_stop` token filter filters out Japanese stopwords (`_japanese_`), and any other custom stopwords specified by the user. This filter only supports -the predefined `_japanese_` stopwords list. If you want to use a different +the predefined `_japanese_` stopwords list. If you want to use a different predefined list, then use the {ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead. diff --git a/docs/plugins/analysis-phonetic.asciidoc b/docs/plugins/analysis-phonetic.asciidoc index 1f43862bac82..a7a4883b46e0 100644 --- a/docs/plugins/analysis-phonetic.asciidoc +++ b/docs/plugins/analysis-phonetic.asciidoc @@ -16,7 +16,7 @@ The `phonetic` token filter takes the following settings: `encoder`:: - Which phonetic encoder to use. Accepts `metaphone` (default), + Which phonetic encoder to use. Accepts `metaphone` (default), `double_metaphone`, `soundex`, `refined_soundex`, `caverphone1`, `caverphone2`, `cologne`, `nysiis`, `koelnerphonetik`, `haasephonetik`, `beider_morse`, `daitch_mokotoff`. @@ -24,7 +24,7 @@ The `phonetic` token filter takes the following settings: `replace`:: Whether or not the original token should be replaced by the phonetic - token. Accepts `true` (default) and `false`. Not supported by + token. Accepts `true` (default) and `false`. Not supported by `beider_morse` encoding. [source,console] @@ -81,7 +81,7 @@ supported: `max_code_len`:: - The maximum length of the emitted metaphone token. Defaults to `4`. + The maximum length of the emitted metaphone token. Defaults to `4`. [discrete] ===== Beider Morse settings diff --git a/docs/plugins/analysis-stempel.asciidoc b/docs/plugins/analysis-stempel.asciidoc index 54118945ab3e..0cbba0451e7d 100644 --- a/docs/plugins/analysis-stempel.asciidoc +++ b/docs/plugins/analysis-stempel.asciidoc @@ -46,7 +46,7 @@ PUT /stempel_example The `polish_stop` token filter filters out Polish stopwords (`_polish_`), and any other custom stopwords specified by the user. This filter only supports -the predefined `_polish_` stopwords list. If you want to use a different +the predefined `_polish_` stopwords list. If you want to use a different predefined list, then use the {ref}/analysis-stop-tokenfilter.html[`stop` token filter] instead. diff --git a/docs/plugins/authors.asciidoc b/docs/plugins/authors.asciidoc index 76a0588ceadf..e908b8f8692f 100644 --- a/docs/plugins/authors.asciidoc +++ b/docs/plugins/authors.asciidoc @@ -14,7 +14,7 @@ The Elasticsearch repository contains examples of: * a https://github.com/elastic/elasticsearch/tree/master/plugins/examples/script-expert-scoring[Java plugin] which contains a script plugin. -These examples provide the bare bones needed to get started. For more +These examples provide the bare bones needed to get started. For more information about how to write a plugin, we recommend looking at the plugins listed in this documentation for inspiration. @@ -74,7 +74,7 @@ in the presence of plugins with the incorrect `elasticsearch.version`. === Testing your plugin When testing a Java plugin, it will only be auto-loaded if it is in the -`plugins/` directory. Use `bin/elasticsearch-plugin install file:///path/to/your/plugin` +`plugins/` directory. Use `bin/elasticsearch-plugin install file:///path/to/your/plugin` to install your plugin for testing. You may also load your plugin within the test framework for integration tests. diff --git a/docs/plugins/discovery-azure-classic.asciidoc b/docs/plugins/discovery-azure-classic.asciidoc index b7a94ea60e27..b9933fc65d82 100644 --- a/docs/plugins/discovery-azure-classic.asciidoc +++ b/docs/plugins/discovery-azure-classic.asciidoc @@ -130,7 +130,7 @@ discovery: We will expose here one strategy which is to hide our Elasticsearch cluster from outside. With this strategy, only VMs behind the same virtual port can talk to each -other. That means that with this mode, you can use Elasticsearch unicast +other. That means that with this mode, you can use Elasticsearch unicast discovery to build a cluster, using the Azure API to retrieve information about your nodes. diff --git a/docs/plugins/discovery-gce.asciidoc b/docs/plugins/discovery-gce.asciidoc index 94d73c0bc4f6..617a59375629 100644 --- a/docs/plugins/discovery-gce.asciidoc +++ b/docs/plugins/discovery-gce.asciidoc @@ -416,7 +416,7 @@ gcloud config set project es-cloud [[discovery-gce-usage-tips-permissions]] ===== Machine Permissions -If you have created a machine without the correct permissions, you will see `403 unauthorized` error messages. To change machine permission on an existing instance, first stop the instance then Edit. Scroll down to `Access Scopes` to change permission. The other way to alter these permissions is to delete the instance (NOT THE DISK). Then create another with the correct permissions. +If you have created a machine without the correct permissions, you will see `403 unauthorized` error messages. To change machine permission on an existing instance, first stop the instance then Edit. Scroll down to `Access Scopes` to change permission. The other way to alter these permissions is to delete the instance (NOT THE DISK). Then create another with the correct permissions. Creating machines with gcloud:: + diff --git a/docs/plugins/mapper-annotated-text.asciidoc b/docs/plugins/mapper-annotated-text.asciidoc index 9307b6aaefe1..157fe538bb08 100644 --- a/docs/plugins/mapper-annotated-text.asciidoc +++ b/docs/plugins/mapper-annotated-text.asciidoc @@ -293,7 +293,7 @@ The annotated highlighter is based on the `unified` highlighter and supports the settings but does not use the `pre_tags` or `post_tags` parameters. Rather than using html-like markup such as `cat` the annotated highlighter uses the same markdown-like syntax used for annotations and injects a key=value annotation where `_hit_term` -is the key and the matched search term is the value e.g. +is the key and the matched search term is the value e.g. The [cat](_hit_term=cat) sat on the [mat](sku3578) diff --git a/docs/plugins/plugin-script.asciidoc b/docs/plugins/plugin-script.asciidoc index 46e472fc50e1..a93102bf0406 100644 --- a/docs/plugins/plugin-script.asciidoc +++ b/docs/plugins/plugin-script.asciidoc @@ -231,7 +231,7 @@ user for confirmation before continuing with installation. When running the plugin install script from another program (e.g. install automation scripts), the plugin script should detect that it is not being called from the console and skip the confirmation response, automatically -granting all requested permissions. If console detection fails, then batch +granting all requested permissions. If console detection fails, then batch mode can be forced by specifying `-b` or `--batch` as follows: [source,shell] @@ -243,7 +243,7 @@ sudo bin/elasticsearch-plugin install --batch [pluginname] === Custom config directory If your `elasticsearch.yml` config file is in a custom location, you will need -to specify the path to the config file when using the `plugin` script. You +to specify the path to the config file when using the `plugin` script. You can do this as follows: [source,sh] diff --git a/docs/plugins/redirects.asciidoc b/docs/plugins/redirects.asciidoc index b0cedb8f97ae..a5de5c9cd993 100644 --- a/docs/plugins/redirects.asciidoc +++ b/docs/plugins/redirects.asciidoc @@ -6,7 +6,7 @@ The following pages have moved or been deleted. [role="exclude",id="discovery-multicast"] === Multicast Discovery Plugin -The `multicast-discovery` plugin has been removed. Instead, configure networking +The `multicast-discovery` plugin has been removed. Instead, configure networking using unicast (see {ref}/modules-network.html[Network settings]) or using one of the <>. diff --git a/docs/plugins/repository-gcs.asciidoc b/docs/plugins/repository-gcs.asciidoc index b6862355d070..25b62df042e1 100644 --- a/docs/plugins/repository-gcs.asciidoc +++ b/docs/plugins/repository-gcs.asciidoc @@ -57,7 +57,7 @@ this configuration (such as Compute Engine, Kubernetes Engine or App Engine). You have to obtain and provide https://cloud.google.com/iam/docs/overview#service_account[service account credentials] manually. -For detailed information about generating JSON service account files, see the https://cloud.google.com/storage/docs/authentication?hl=en#service_accounts[Google Cloud documentation]. +For detailed information about generating JSON service account files, see the https://cloud.google.com/storage/docs/authentication?hl=en#service_accounts[Google Cloud documentation]. Note that the PKCS12 format is not supported by this plugin. Here is a summary of the steps: @@ -88,7 +88,7 @@ A JSON service account file looks like this: ---- // NOTCONSOLE -To provide this file to the plugin, it must be stored in the {ref}/secure-settings.html[Elasticsearch keystore]. You must +To provide this file to the plugin, it must be stored in the {ref}/secure-settings.html[Elasticsearch keystore]. You must add a `file` setting with the name `gcs.client.NAME.credentials_file` using the `add-file` subcommand. `NAME` is the name of the client configuration for the repository. The implicit client name is `default`, but a different client name can be specified in the diff --git a/docs/plugins/repository-s3.asciidoc b/docs/plugins/repository-s3.asciidoc index c8e9ba7e2d9c..6a92b599688f 100644 --- a/docs/plugins/repository-s3.asciidoc +++ b/docs/plugins/repository-s3.asciidoc @@ -312,7 +312,7 @@ include::repository-shared-settings.asciidoc[] https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl[S3 canned ACLs] : `private`, `public-read`, `public-read-write`, `authenticated-read`, `log-delivery-write`, `bucket-owner-read`, - `bucket-owner-full-control`. Defaults to `private`. You could specify a + `bucket-owner-full-control`. Defaults to `private`. You could specify a canned ACL using the `canned_acl` setting. When the S3 repository creates buckets and objects, it adds the canned ACL into the buckets and objects. @@ -324,8 +324,8 @@ include::repository-shared-settings.asciidoc[] Changing this setting on an existing repository only affects the storage class for newly created objects, resulting in a mixed usage of storage classes. Additionally, S3 Lifecycle Policies can be used to manage - the storage class of existing objects. Due to the extra complexity with the - Glacier class lifecycle, it is not currently supported by the plugin. For + the storage class of existing objects. Due to the extra complexity with the + Glacier class lifecycle, it is not currently supported by the plugin. For more information about the different classes, see https://docs.aws.amazon.com/AmazonS3/latest/dev/storage-class-intro.html[AWS Storage Classes Guide] @@ -335,9 +335,9 @@ documented below is considered deprecated, and will be removed in a future version. In addition to the above settings, you may also specify all non-secure client -settings in the repository settings. In this case, the client settings found in +settings in the repository settings. In this case, the client settings found in the repository settings will be merged with those of the named client used by -the repository. Conflicts between client and repository settings are resolved +the repository. Conflicts between client and repository settings are resolved by the repository settings taking precedence over client settings. For example: diff --git a/docs/plugins/repository-shared-settings.asciidoc b/docs/plugins/repository-shared-settings.asciidoc index 13c2716c52d9..2a4753abee45 100644 --- a/docs/plugins/repository-shared-settings.asciidoc +++ b/docs/plugins/repository-shared-settings.asciidoc @@ -9,4 +9,4 @@ `readonly`:: - Makes repository read-only. Defaults to `false`. + Makes repository read-only. Defaults to `false`. diff --git a/docs/reference/aggregations.asciidoc b/docs/reference/aggregations.asciidoc index 57298e337eef..472c028b0c87 100644 --- a/docs/reference/aggregations.asciidoc +++ b/docs/reference/aggregations.asciidoc @@ -28,7 +28,7 @@ other aggregations instead of documents or fields. === Run an aggregation You can run aggregations as part of a <> by specifying the <>'s `aggs` parameter. The -following search runs a +following search runs a <> on `my-field`: diff --git a/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc index 40ea68af27e4..1e0669722d02 100644 --- a/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc @@ -110,7 +110,7 @@ buckets requested. ==== Time Zone -Date-times are stored in Elasticsearch in UTC. By default, all bucketing and +Date-times are stored in Elasticsearch in UTC. By default, all bucketing and rounding is also done in UTC. The `time_zone` parameter can be used to indicate that bucketing should use a different time zone. diff --git a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc index 16a5250ab026..3d3331dbba2f 100644 --- a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc @@ -291,7 +291,7 @@ GET /_search *Time Zone* -Date-times are stored in Elasticsearch in UTC. By default, all bucketing and +Date-times are stored in Elasticsearch in UTC. By default, all bucketing and rounding is also done in UTC. The `time_zone` parameter can be used to indicate that bucketing should use a different time zone. @@ -853,7 +853,7 @@ GET /_search The composite agg is not currently compatible with pipeline aggregations, nor does it make sense in most cases. E.g. due to the paging nature of composite aggs, a single logical partition (one day for example) might be spread -over multiple pages. Since pipeline aggregations are purely post-processing on the final list of buckets, +over multiple pages. Since pipeline aggregations are purely post-processing on the final list of buckets, running something like a derivative on a composite page could lead to inaccurate results as it is only taking into account a "partial" result on that page. diff --git a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc index 29449d22b0ec..328c230071cb 100644 --- a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc @@ -51,7 +51,7 @@ This behavior has been deprecated in favor of two new, explicit fields: `calenda and `fixed_interval`. By forcing a choice between calendar and intervals up front, the semantics of the interval -are clear to the user immediately and there is no ambiguity. The old `interval` field +are clear to the user immediately and there is no ambiguity. The old `interval` field will be removed in the future. ================================== diff --git a/docs/reference/aggregations/bucket/filters-aggregation.asciidoc b/docs/reference/aggregations/bucket/filters-aggregation.asciidoc index d76344de013f..f7cbf231f7fd 100644 --- a/docs/reference/aggregations/bucket/filters-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/filters-aggregation.asciidoc @@ -92,7 +92,7 @@ GET logs/_search // TEST[continued] The filtered buckets are returned in the same order as provided in the -request. The response for this example would be: +request. The response for this example would be: [source,console-result] -------------------------------------------------- diff --git a/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc index 747a8720a115..db397ead3e22 100644 --- a/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/histogram-aggregation.asciidoc @@ -19,7 +19,7 @@ bucket_key = Math.floor((value - offset) / interval) * interval + offset -------------------------------------------------- For range values, a document can fall into multiple buckets. The first bucket is computed from the lower -bound of the range in the same way as a bucket for a single value is computed. The final bucket is computed in the same +bound of the range in the same way as a bucket for a single value is computed. The final bucket is computed in the same way from the upper bound of the range, and the range is counted in all buckets in between and including those two. The `interval` must be a positive decimal, while the `offset` must be a decimal in `[0, interval)` @@ -183,7 +183,7 @@ POST /sales/_search?size=0 -------------------------------------------------- // TEST[setup:sales] -When aggregating ranges, buckets are based on the values of the returned documents. This means the response may include +When aggregating ranges, buckets are based on the values of the returned documents. This means the response may include buckets outside of a query's range. For example, if your query looks for values greater than 100, and you have a range covering 50 to 150, and an interval of 50, that document will land in 3 buckets - 50, 100, and 150. In general, it's best to think of the query and aggregation steps as independent - the query selects a set of documents, and then the diff --git a/docs/reference/aggregations/bucket/range-field-note.asciidoc b/docs/reference/aggregations/bucket/range-field-note.asciidoc index c3e2c2de885e..bb18bd2afd44 100644 --- a/docs/reference/aggregations/bucket/range-field-note.asciidoc +++ b/docs/reference/aggregations/bucket/range-field-note.asciidoc @@ -6,7 +6,7 @@ Since a range represents multiple values, running a bucket aggregation over a range field can result in the same document landing in multiple buckets. This can lead to surprising behavior, such as the sum of bucket counts being higher -than the number of matched documents. For example, consider the following +than the number of matched documents. For example, consider the following index: [source, console] -------------------------------------------------- @@ -184,7 +184,7 @@ calculated over the ranges of all matching documents. // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] Depending on the use case, a `CONTAINS` query could limit the documents to only -those that fall entirely in the queried range. In this example, the one -document would not be included and the aggregation would be empty. Filtering +those that fall entirely in the queried range. In this example, the one +document would not be included and the aggregation would be empty. Filtering the buckets after the aggregation is also an option, for use cases where the document should be counted but the out of bounds data can be safely ignored. diff --git a/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc index 4ff962a4e741..525672be11d1 100644 --- a/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc @@ -5,9 +5,9 @@ ++++ A multi-bucket value source based aggregation which finds "rare" terms -- terms that are at the long-tail -of the distribution and are not frequent. Conceptually, this is like a `terms` aggregation that is -sorted by `_count` ascending. As noted in the <>, -actually ordering a `terms` agg by count ascending has unbounded error. Instead, you should use the `rare_terms` +of the distribution and are not frequent. Conceptually, this is like a `terms` aggregation that is +sorted by `_count` ascending. As noted in the <>, +actually ordering a `terms` agg by count ascending has unbounded error. Instead, you should use the `rare_terms` aggregation ////////////////////////// @@ -78,7 +78,7 @@ A `rare_terms` aggregation looks like this in isolation: |Parameter Name |Description |Required |Default Value |`field` |The field we wish to find rare terms in |Required | |`max_doc_count` |The maximum number of documents a term should appear in. |Optional |`1` -|`precision` |The precision of the internal CuckooFilters. Smaller precision leads to +|`precision` |The precision of the internal CuckooFilters. Smaller precision leads to better approximation, but higher memory usage. Cannot be smaller than `0.00001` |Optional |`0.01` |`include` |Terms that should be included in the aggregation|Optional | |`exclude` |Terms that should be excluded from the aggregation|Optional | @@ -124,7 +124,7 @@ Response: // TESTRESPONSE[s/\.\.\.//] In this example, the only bucket that we see is the "swing" bucket, because it is the only term that appears in -one document. If we increase the `max_doc_count` to `2`, we'll see some more buckets: +one document. If we increase the `max_doc_count` to `2`, we'll see some more buckets: [source,console,id=rare-terms-aggregation-max-doc-count-example] -------------------------------------------------- @@ -169,27 +169,27 @@ This now shows the "jazz" term which has a `doc_count` of 2": [[search-aggregations-bucket-rare-terms-aggregation-max-doc-count]] ==== Maximum document count -The `max_doc_count` parameter is used to control the upper bound of document counts that a term can have. There -is not a size limitation on the `rare_terms` agg like `terms` agg has. This means that terms -which match the `max_doc_count` criteria will be returned. The aggregation functions in this manner to avoid +The `max_doc_count` parameter is used to control the upper bound of document counts that a term can have. There +is not a size limitation on the `rare_terms` agg like `terms` agg has. This means that terms +which match the `max_doc_count` criteria will be returned. The aggregation functions in this manner to avoid the order-by-ascending issues that afflict the `terms` aggregation. -This does, however, mean that a large number of results can be returned if chosen incorrectly. +This does, however, mean that a large number of results can be returned if chosen incorrectly. To limit the danger of this setting, the maximum `max_doc_count` is 100. [[search-aggregations-bucket-rare-terms-aggregation-max-buckets]] ==== Max Bucket Limit The Rare Terms aggregation is more liable to trip the `search.max_buckets` soft limit than other aggregations due -to how it works. The `max_bucket` soft-limit is evaluated on a per-shard basis while the aggregation is collecting -results. It is possible for a term to be "rare" on a shard but become "not rare" once all the shard results are -merged together. This means that individual shards tend to collect more buckets than are truly rare, because -they only have their own local view. This list is ultimately pruned to the correct, smaller list of rare +to how it works. The `max_bucket` soft-limit is evaluated on a per-shard basis while the aggregation is collecting +results. It is possible for a term to be "rare" on a shard but become "not rare" once all the shard results are +merged together. This means that individual shards tend to collect more buckets than are truly rare, because +they only have their own local view. This list is ultimately pruned to the correct, smaller list of rare terms on the coordinating node... but a shard may have already tripped the `max_buckets` soft limit and aborted the request. When aggregating on fields that have potentially many "rare" terms, you may need to increase the `max_buckets` soft -limit. Alternatively, you might need to find a way to filter the results to return fewer rare values (smaller time +limit. Alternatively, you might need to find a way to filter the results to return fewer rare values (smaller time span, filter by category, etc), or re-evaluate your definition of "rare" (e.g. if something appears 100,000 times, is it truly "rare"?) @@ -197,8 +197,8 @@ appears 100,000 times, is it truly "rare"?) ==== Document counts are approximate The naive way to determine the "rare" terms in a dataset is to place all the values in a map, incrementing counts -as each document is visited, then return the bottom `n` rows. This does not scale beyond even modestly sized data -sets. A sharded approach where only the "top n" values are retained from each shard (ala the `terms` aggregation) +as each document is visited, then return the bottom `n` rows. This does not scale beyond even modestly sized data +sets. A sharded approach where only the "top n" values are retained from each shard (ala the `terms` aggregation) fails because the long-tail nature of the problem means it is impossible to find the "top n" bottom values without simply collecting all the values from all shards. @@ -208,16 +208,16 @@ Instead, the Rare Terms aggregation uses a different approximate algorithm: 2. Each addition occurrence of the term increments a counter in the map 3. If the counter > the `max_doc_count` threshold, the term is removed from the map and placed in a https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf[CuckooFilter] -4. The CuckooFilter is consulted on each term. If the value is inside the filter, it is known to be above the +4. The CuckooFilter is consulted on each term. If the value is inside the filter, it is known to be above the threshold already and skipped. -After execution, the map of values is the map of "rare" terms under the `max_doc_count` threshold. This map and CuckooFilter -are then merged with all other shards. If there are terms that are greater than the threshold (or appear in -a different shard's CuckooFilter) the term is removed from the merged list. The final map of values is returned +After execution, the map of values is the map of "rare" terms under the `max_doc_count` threshold. This map and CuckooFilter +are then merged with all other shards. If there are terms that are greater than the threshold (or appear in +a different shard's CuckooFilter) the term is removed from the merged list. The final map of values is returned to the user as the "rare" terms. CuckooFilters have the possibility of returning false positives (they can say a value exists in their collection when -it actually does not). Since the CuckooFilter is being used to see if a term is over threshold, this means a false positive +it actually does not). Since the CuckooFilter is being used to see if a term is over threshold, this means a false positive from the CuckooFilter will mistakenly say a value is common when it is not (and thus exclude it from it final list of buckets). Practically, this means the aggregations exhibits false-negative behavior since the filter is being used "in reverse" of how people generally think of approximate set membership sketches. @@ -230,14 +230,14 @@ Proceedings of the 10th ACM International on Conference on emerging Networking E ==== Precision Although the internal CuckooFilter is approximate in nature, the false-negative rate can be controlled with a -`precision` parameter. This allows the user to trade more runtime memory for more accurate results. +`precision` parameter. This allows the user to trade more runtime memory for more accurate results. The default precision is `0.001`, and the smallest (e.g. most accurate and largest memory overhead) is `0.00001`. Below are some charts which demonstrate how the accuracy of the aggregation is affected by precision and number of distinct terms. The X-axis shows the number of distinct values the aggregation has seen, and the Y-axis shows the percent error. -Each line series represents one "rarity" condition (ranging from one rare item to 100,000 rare items). For example, +Each line series represents one "rarity" condition (ranging from one rare item to 100,000 rare items). For example, the orange "10" line means ten of the values were "rare" (`doc_count == 1`), out of 1-20m distinct values (where the rest of the values had `doc_count > 1`) @@ -258,14 +258,14 @@ degrades in a controlled, linear fashion as the number of distinct values increa The default precision of `0.001` has a memory profile of `1.748⁻⁶ * n` bytes, where `n` is the number of distinct values the aggregation has seen (it can also be roughly eyeballed, e.g. 20 million unique values is about -30mb of memory). The memory usage is linear to the number of distinct values regardless of which precision is chosen, +30mb of memory). The memory usage is linear to the number of distinct values regardless of which precision is chosen, the precision only affects the slope of the memory profile as seen in this chart: image:images/rare_terms/memory.png[] For comparison, an equivalent terms aggregation at 20 million buckets would be roughly `20m * 69b == ~1.38gb` (with 69 bytes being a very optimistic estimate of an empty bucket cost, far lower than what -the circuit breaker accounts for). So although the `rare_terms` agg is relatively heavy, it is still orders of +the circuit breaker accounts for). So although the `rare_terms` agg is relatively heavy, it is still orders of magnitude smaller than the equivalent terms aggregation ==== Filtering Values @@ -347,9 +347,9 @@ GET /_search ==== Nested, RareTerms, and scoring sub-aggregations The RareTerms aggregation has to operate in `breadth_first` mode, since it needs to prune terms as doc count thresholds -are breached. This requirement means the RareTerms aggregation is incompatible with certain combinations of aggregations +are breached. This requirement means the RareTerms aggregation is incompatible with certain combinations of aggregations that require `depth_first`. In particular, scoring sub-aggregations that are inside a `nested` force the entire aggregation tree to run -in `depth_first` mode. This will throw an exception since RareTerms is unable to process `depth_first`. +in `depth_first` mode. This will throw an exception since RareTerms is unable to process `depth_first`. As a concrete example, if `rare_terms` aggregation is the child of a `nested` aggregation, and one of the child aggregations of `rare_terms` needs document scores (like a `top_hits` aggregation), this will throw an exception. \ No newline at end of file diff --git a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc b/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc index da48b8ad2109..6a5e01d5f28f 100644 --- a/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/significantterms-aggregation.asciidoc @@ -305,7 +305,7 @@ If there is the equivalent of a `match_all` query or no query criteria providing top-most aggregation - in this scenario the _foreground_ set is exactly the same as the _background_ set and so there is no difference in document frequencies to observe and from which to make sensible suggestions. -Another consideration is that the significant_terms aggregation produces many candidate results at shard level +Another consideration is that the significant_terms aggregation produces many candidate results at shard level that are only later pruned on the reducing node once all statistics from all shards are merged. As a result, it can be inefficient and costly in terms of RAM to embed large child aggregations under a significant_terms aggregation that later discards many candidate terms. It is advisable in these cases to perform two searches - the first to provide a rationalized list of @@ -374,7 +374,7 @@ Chi square behaves like mutual information and can be configured with the same p ===== Google normalized distance -Google normalized distance as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 (https://arxiv.org/pdf/cs/0412098v3.pdf) can be used as significance score by adding the parameter +Google normalized distance as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 (https://arxiv.org/pdf/cs/0412098v3.pdf) can be used as significance score by adding the parameter [source,js] -------------------------------------------------- @@ -448,13 +448,13 @@ size buckets was not returned). To ensure better accuracy a multiple of the final `size` is used as the number of terms to request from each shard (`2 * (size * 1.5 + 10)`). To take manual control of this setting the `shard_size` parameter -can be used to control the volumes of candidate terms produced by each shard. +can be used to control the volumes of candidate terms produced by each shard. Low-frequency terms can turn out to be the most interesting ones once all results are combined so the significant_terms aggregation can produce higher-quality results when the `shard_size` parameter is set to values significantly higher than the `size` setting. This ensures that a bigger volume of promising candidate terms are given a consolidated review by the reducing node before the final selection. Obviously large candidate term lists -will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. +will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, Elasticsearch will diff --git a/docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc b/docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc index 3a7443c6bdc3..22f582f90594 100644 --- a/docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/significanttext-aggregation.asciidoc @@ -367,13 +367,13 @@ size buckets was not returned). To ensure better accuracy a multiple of the final `size` is used as the number of terms to request from each shard (`2 * (size * 1.5 + 10)`). To take manual control of this setting the `shard_size` parameter -can be used to control the volumes of candidate terms produced by each shard. +can be used to control the volumes of candidate terms produced by each shard. Low-frequency terms can turn out to be the most interesting ones once all results are combined so the significant_terms aggregation can produce higher-quality results when the `shard_size` parameter is set to values significantly higher than the `size` setting. This ensures that a bigger volume of promising candidate terms are given a consolidated review by the reducing node before the final selection. Obviously large candidate term lists -will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. +will cause extra network traffic and RAM usage so this is quality/cost trade off that needs to be balanced. If `shard_size` is set to -1 (the default) then `shard_size` will be automatically estimated based on the number of shards and the `size` parameter. NOTE: `shard_size` cannot be smaller than `size` (as it doesn't make much sense). When it is, elasticsearch will diff --git a/docs/reference/aggregations/bucket/terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/terms-aggregation.asciidoc index 76906458f73f..ee0ddaa6b17d 100644 --- a/docs/reference/aggregations/bucket/terms-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/terms-aggregation.asciidoc @@ -136,7 +136,7 @@ The higher the requested `size` is, the more accurate the results will be, but a compute the final results (both due to bigger priority queues that are managed on a shard level and due to bigger data transfers between the nodes and the client). -The `shard_size` parameter can be used to minimize the extra work that comes with bigger requested `size`. When defined, +The `shard_size` parameter can be used to minimize the extra work that comes with bigger requested `size`. When defined, it will determine how many terms the coordinating node will request from each shard. Once all the shards responded, the coordinating node will then reduce them to a final result which will be based on the `size` parameter - this way, one can increase the accuracy of the returned terms and avoid the overhead of streaming a big list of buckets back to @@ -191,7 +191,7 @@ determined and is given a value of -1 to indicate this. ==== Order The order of the buckets can be customized by setting the `order` parameter. By default, the buckets are ordered by -their `doc_count` descending. It is possible to change this behaviour as documented below: +their `doc_count` descending. It is possible to change this behaviour as documented below: WARNING: Sorting by ascending `_count` or by sub aggregation is discouraged as it increases the <> on document counts. @@ -283,7 +283,7 @@ GET /_search ======================================= <> are run during the -reduce phase after all other aggregations have already completed. For this +reduce phase after all other aggregations have already completed. For this reason, they cannot be used for ordering. ======================================= @@ -606,10 +606,10 @@ WARNING: Partitions cannot be used together with an `exclude` parameter. ==== Multi-field terms aggregation The `terms` aggregation does not support collecting terms from multiple fields -in the same document. The reason is that the `terms` agg doesn't collect the +in the same document. The reason is that the `terms` agg doesn't collect the string term values themselves, but rather uses <> -to produce a list of all of the unique values in the field. Global ordinals +to produce a list of all of the unique values in the field. Global ordinals results in an important performance boost which would not be possible across multiple fields. @@ -618,7 +618,7 @@ multiple fields: <>:: -Use a script to retrieve terms from multiple fields. This disables the global +Use a script to retrieve terms from multiple fields. This disables the global ordinals optimization and will be slower than collecting terms from a single field, but it gives you the flexibility to implement this option at search time. @@ -627,7 +627,7 @@ time. If you know ahead of time that you want to collect the terms from two or more fields, then use `copy_to` in your mapping to create a new dedicated field at -index time which contains the values from both fields. You can aggregate on +index time which contains the values from both fields. You can aggregate on this single field, which will benefit from the global ordinals optimization. <>:: diff --git a/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc b/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc index a4c2c4273726..542a8de728da 100644 --- a/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/boxplot-aggregation.asciidoc @@ -68,15 +68,15 @@ The response will look like this: -------------------------------------------------- // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -In this case, the lower and upper whisker values are equal to the min and max. In general, these values are the 1.5 * -IQR range, which is to say the nearest values to `q1 - (1.5 * IQR)` and `q3 + (1.5 * IQR)`. Since this is an approximation, the given values -may not actually be observed values from the data, but should be within a reasonable error bound of them. While the Boxplot aggregation +In this case, the lower and upper whisker values are equal to the min and max. In general, these values are the 1.5 * +IQR range, which is to say the nearest values to `q1 - (1.5 * IQR)` and `q3 + (1.5 * IQR)`. Since this is an approximation, the given values +may not actually be observed values from the data, but should be within a reasonable error bound of them. While the Boxplot aggregation doesn't directly return outlier points, you can check if `lower > min` or `upper < max` to see if outliers exist on either side, and then query for them directly. ==== Script -The boxplot metric supports scripting. For example, if our load times +The boxplot metric supports scripting. For example, if our load times are in milliseconds but we want values calculated in seconds, we could use a script to convert them on-the-fly: diff --git a/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc b/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc index 409af6cc7dfc..b146a134cb64 100644 --- a/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/cardinality-aggregation.asciidoc @@ -152,8 +152,8 @@ public static void main(String[] args) { image:images/cardinality_error.png[] For all 3 thresholds, counts have been accurate up to the configured threshold. -Although not guaranteed, this is likely to be the case. Accuracy in practice depends -on the dataset in question. In general, most datasets show consistently good +Although not guaranteed, this is likely to be the case. Accuracy in practice depends +on the dataset in question. In general, most datasets show consistently good accuracy. Also note that even with a threshold as low as 100, the error remains very low (1-6% as seen in the above graph) even when counting millions of items. diff --git a/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc b/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc index c6283c3867f7..254b2a225ce9 100644 --- a/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc @@ -63,7 +63,7 @@ The name of the aggregation (`grades_stats` above) also serves as the key by whi ==== Standard Deviation Bounds By default, the `extended_stats` metric will return an object called `std_deviation_bounds`, which provides an interval of plus/minus two standard -deviations from the mean. This can be a useful way to visualize variance of your data. If you want a different boundary, for example +deviations from the mean. This can be a useful way to visualize variance of your data. If you want a different boundary, for example three standard deviations, you can set `sigma` in the request: [source,console] @@ -84,7 +84,7 @@ GET /exams/_search // TEST[setup:exams] <1> `sigma` controls how many standard deviations +/- from the mean should be displayed -`sigma` can be any non-negative double, meaning you can request non-integer values such as `1.5`. A value of `0` is valid, but will simply +`sigma` can be any non-negative double, meaning you can request non-integer values such as `1.5`. A value of `0` is valid, but will simply return the average for both `upper` and `lower` bounds. The `upper` and `lower` bounds are calculated as population metrics so they are always the same as `upper_population` and @@ -93,8 +93,8 @@ The `upper` and `lower` bounds are calculated as population metrics so they are .Standard Deviation and Bounds require normality [NOTE] ===== -The standard deviation and its bounds are displayed by default, but they are not always applicable to all data-sets. Your data must -be normally distributed for the metrics to make sense. The statistics behind standard deviations assumes normally distributed data, so +The standard deviation and its bounds are displayed by default, but they are not always applicable to all data-sets. Your data must +be normally distributed for the metrics to make sense. The statistics behind standard deviations assumes normally distributed data, so if your data is skewed heavily left or right, the value returned will be misleading. ===== diff --git a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc index 460d7c05906c..4ba5263aefac 100644 --- a/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-aggregation.asciidoc @@ -10,19 +10,19 @@ generated by a provided script or extracted from specific numeric or <> in the documents. Percentiles show the point at which a certain percentage of observed values -occur. For example, the 95th percentile is the value which is greater than 95% +occur. For example, the 95th percentile is the value which is greater than 95% of the observed values. -Percentiles are often used to find outliers. In normal distributions, the +Percentiles are often used to find outliers. In normal distributions, the 0.13th and 99.87th percentiles represents three standard deviations from the -mean. Any data which falls outside three standard deviations is often considered +mean. Any data which falls outside three standard deviations is often considered an anomaly. When a range of percentiles are retrieved, they can be used to estimate the data distribution and determine if the data is skewed, bimodal, etc. -Assume your data consists of website load times. The average and median -load times are not overly useful to an administrator. The max may be interesting, +Assume your data consists of website load times. The average and median +load times are not overly useful to an administrator. The max may be interesting, but it can be easily skewed by a single slow response. Let's look at a range of percentiles representing load time: @@ -45,7 +45,7 @@ GET latency/_search <1> The field `load_time` must be a numeric field By default, the `percentile` metric will generate a range of -percentiles: `[ 1, 5, 25, 50, 75, 95, 99 ]`. The response will look like this: +percentiles: `[ 1, 5, 25, 50, 75, 95, 99 ]`. The response will look like this: [source,console-result] -------------------------------------------------- @@ -70,7 +70,7 @@ percentiles: `[ 1, 5, 25, 50, 75, 95, 99 ]`. The response will look like this: // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] As you can see, the aggregation will return a calculated value for each percentile -in the default range. If we assume response times are in milliseconds, it is +in the default range. If we assume response times are in milliseconds, it is immediately obvious that the webpage normally loads in 10-725ms, but occasionally spikes to 945-985ms. @@ -164,7 +164,7 @@ Response: ==== Script -The percentile metric supports scripting. For example, if our load times +The percentile metric supports scripting. For example, if our load times are in milliseconds but we want percentiles calculated in seconds, we could use a script to convert them on-the-fly: @@ -220,12 +220,12 @@ GET latency/_search [[search-aggregations-metrics-percentile-aggregation-approximation]] ==== Percentiles are (usually) approximate -There are many different algorithms to calculate percentiles. The naive -implementation simply stores all the values in a sorted array. To find the 50th +There are many different algorithms to calculate percentiles. The naive +implementation simply stores all the values in a sorted array. To find the 50th percentile, you simply find the value that is at `my_array[count(my_array) * 0.5]`. Clearly, the naive implementation does not scale -- the sorted array grows -linearly with the number of values in your dataset. To calculate percentiles +linearly with the number of values in your dataset. To calculate percentiles across potentially billions of values in an Elasticsearch cluster, _approximate_ percentiles are calculated. @@ -235,12 +235,12 @@ https://github.com/tdunning/t-digest/blob/master/docs/t-digest-paper/histo.pdf[C When using this metric, there are a few guidelines to keep in mind: -- Accuracy is proportional to `q(1-q)`. This means that extreme percentiles (e.g. 99%) +- Accuracy is proportional to `q(1-q)`. This means that extreme percentiles (e.g. 99%) are more accurate than less extreme percentiles, such as the median - For small sets of values, percentiles are highly accurate (and potentially 100% accurate if the data is small enough). - As the quantity of values in a bucket grows, the algorithm begins to approximate -the percentiles. It is effectively trading accuracy for memory savings. The +the percentiles. It is effectively trading accuracy for memory savings. The exact level of inaccuracy is difficult to generalize, since it depends on your data distribution and volume of data being aggregated @@ -291,18 +291,18 @@ GET latency/_search // tag::t-digest[] The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the more nodes available, the higher the accuracy (and large memory footprint) proportional -to the volume of data. The `compression` parameter limits the maximum number of +to the volume of data. The `compression` parameter limits the maximum number of nodes to `20 * compression`. Therefore, by increasing the compression value, you can increase the accuracy of -your percentiles at the cost of more memory. Larger compression values also +your percentiles at the cost of more memory. Larger compression values also make the algorithm slower since the underlying tree data structure grows in size, -resulting in more expensive operations. The default compression value is +resulting in more expensive operations. The default compression value is `100`. A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large amount of data which arrives sorted and in-order) the default settings will produce a -TDigest roughly 64KB in size. In practice data tends to be more random and +TDigest roughly 64KB in size. In practice data tends to be more random and the TDigest will use less memory. // end::t-digest[] diff --git a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc index 6a76226fde3b..9395b8097dea 100644 --- a/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/percentile-rank-aggregation.asciidoc @@ -17,10 +17,10 @@ regarding approximation and memory use of the percentile ranks aggregation ================================================== Percentile rank show the percentage of observed values which are below certain -value. For example, if a value is greater than or equal to 95% of the observed values +value. For example, if a value is greater than or equal to 95% of the observed values it is said to be at the 95th percentile rank. -Assume your data consists of website load times. You may have a service agreement that +Assume your data consists of website load times. You may have a service agreement that 95% of page loads complete within 500ms and 99% of page loads complete within 600ms. Let's look at a range of percentiles representing load time: @@ -120,7 +120,7 @@ Response: ==== Script -The percentile rank metric supports scripting. For example, if our load times +The percentile rank metric supports scripting. For example, if our load times are in milliseconds but we want to specify values in seconds, we could use a script to convert them on-the-fly: diff --git a/docs/reference/aggregations/metrics/t-test-aggregation.asciidoc b/docs/reference/aggregations/metrics/t-test-aggregation.asciidoc index 969ce2510418..1156abda3d82 100644 --- a/docs/reference/aggregations/metrics/t-test-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/t-test-aggregation.asciidoc @@ -142,7 +142,7 @@ indices, the term filter on the <> field can be us ==== Script -The `t_test` metric supports scripting. For example, if we need to adjust out load times for the before values, we could use +The `t_test` metric supports scripting. For example, if we need to adjust out load times for the before values, we could use a script to recalculate them on-the-fly: [source,console] diff --git a/docs/reference/aggregations/metrics/weighted-avg-aggregation.asciidoc b/docs/reference/aggregations/metrics/weighted-avg-aggregation.asciidoc index c9955571b5eb..698a76d8d8de 100644 --- a/docs/reference/aggregations/metrics/weighted-avg-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/weighted-avg-aggregation.asciidoc @@ -7,8 +7,8 @@ A `single-value` metrics aggregation that computes the weighted average of numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric fields in the documents, or provided by a script. -When calculating a regular average, each datapoint has an equal "weight" ... it contributes equally to the final value. Weighted averages, -on the other hand, weight each datapoint differently. The amount that each datapoint contributes to the final value is extracted from the +When calculating a regular average, each datapoint has an equal "weight" ... it contributes equally to the final value. Weighted averages, +on the other hand, weight each datapoint differently. The amount that each datapoint contributes to the final value is extracted from the document, or provided by a script. As a formula, a weighted average is the `∑(value * weight) / ∑(weight)` @@ -35,7 +35,7 @@ The `value` and `weight` objects have per-field specific configuration: |Parameter Name |Description |Required |Default Value |`field` | The field that values should be extracted from |Required | |`missing` | A value to use if the field is missing entirely |Optional | -|`script` | A script which provides the values for the document. This is mutually exclusive with `field` |Optional +|`script` | A script which provides the values for the document. This is mutually exclusive with `field` |Optional |=== [[weight-params]] @@ -45,7 +45,7 @@ The `value` and `weight` objects have per-field specific configuration: |Parameter Name |Description |Required |Default Value |`field` | The field that weights should be extracted from |Required | |`missing` | A weight to use if the field is missing entirely |Optional | -|`script` | A script which provides the weights for the document. This is mutually exclusive with `field` |Optional +|`script` | A script which provides the weights for the document. This is mutually exclusive with `field` |Optional |=== @@ -91,7 +91,7 @@ Which yields a response like: // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] -While multiple values-per-field are allowed, only one weight is allowed. If the aggregation encounters +While multiple values-per-field are allowed, only one weight is allowed. If the aggregation encounters a document that has more than one weight (e.g. the weight field is a multi-valued field) it will throw an exception. If you have this situation, you will need to specify a `script` for the weight field, and use the script to combine the multiple values into a single value to be used. @@ -147,7 +147,7 @@ The aggregation returns `2.0` as the result, which matches what we would expect ==== Script -Both the value and the weight can be derived from a script, instead of a field. As a simple example, the following +Both the value and the weight can be derived from a script, instead of a field. As a simple example, the following will add one to the grade and weight in the document using a script: [source,console] diff --git a/docs/reference/aggregations/pipeline.asciidoc b/docs/reference/aggregations/pipeline.asciidoc index c89cfeb7043d..0f56b48830b5 100644 --- a/docs/reference/aggregations/pipeline.asciidoc +++ b/docs/reference/aggregations/pipeline.asciidoc @@ -19,7 +19,7 @@ parameter to indicate the paths to the required metrics. The syntax for defining <> section below. Pipeline aggregations cannot have sub-aggregations but depending on the type it can reference another pipeline in the `buckets_path` -allowing pipeline aggregations to be chained. For example, you can chain together two derivatives to calculate the second derivative +allowing pipeline aggregations to be chained. For example, you can chain together two derivatives to calculate the second derivative (i.e. a derivative of a derivative). NOTE: Because pipeline aggregations only add to the output, when chaining pipeline aggregations the output of each pipeline aggregation @@ -29,7 +29,7 @@ will be included in the final output. [discrete] === `buckets_path` Syntax -Most pipeline aggregations require another aggregation as their input. The input aggregation is defined via the `buckets_path` +Most pipeline aggregations require another aggregation as their input. The input aggregation is defined via the `buckets_path` parameter, which follows a specific format: // https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form @@ -77,7 +77,7 @@ POST /_search <2> The `buckets_path` refers to the metric via a relative path `"the_sum"` `buckets_path` is also used for Sibling pipeline aggregations, where the aggregation is "next" to a series of buckets -instead of embedded "inside" them. For example, the `max_bucket` aggregation uses the `buckets_path` to specify +instead of embedded "inside" them. For example, the `max_bucket` aggregation uses the `buckets_path` to specify a metric embedded inside a sibling aggregation: [source,console,id=buckets-path-sibling-example] @@ -112,7 +112,7 @@ POST /_search `sales_per_month` date histogram. If a Sibling pipeline agg references a multi-bucket aggregation, such as a `terms` agg, it also has the option to -select specific keys from the multi-bucket. For example, a `bucket_script` could select two specific buckets (via +select specific keys from the multi-bucket. For example, a `bucket_script` could select two specific buckets (via their bucket keys) to perform the calculation: [source,console,id=buckets-path-specific-bucket-example] @@ -160,8 +160,8 @@ instead of fetching all the buckets from `sale_type` aggregation [discrete] === Special Paths -Instead of pathing to a metric, `buckets_path` can use a special `"_count"` path. This instructs -the pipeline aggregation to use the document count as its input. For example, a derivative can be calculated +Instead of pathing to a metric, `buckets_path` can use a special `"_count"` path. This instructs +the pipeline aggregation to use the document count as its input. For example, a derivative can be calculated on the document count of each bucket, instead of a specific metric: [source,console,id=buckets-path-count-example] @@ -246,7 +246,7 @@ may be referred to as: [discrete] === Dealing with gaps in the data -Data in the real world is often noisy and sometimes contains *gaps* -- places where data simply doesn't exist. This can +Data in the real world is often noisy and sometimes contains *gaps* -- places where data simply doesn't exist. This can occur for a variety of reasons, the most common being: * Documents falling into a bucket do not contain a required field @@ -256,11 +256,11 @@ Some pipeline aggregations have specific requirements that must be met (e.g. a d first value because there is no previous value, HoltWinters moving average need "warmup" data to begin calculating, etc) Gap policies are a mechanism to inform the pipeline aggregation about the desired behavior when "gappy" or missing -data is encountered. All pipeline aggregations accept the `gap_policy` parameter. There are currently two gap policies +data is encountered. All pipeline aggregations accept the `gap_policy` parameter. There are currently two gap policies to choose from: _skip_:: - This option treats missing data as if the bucket does not exist. It will skip the bucket and continue + This option treats missing data as if the bucket does not exist. It will skip the bucket and continue calculating using the next available value. _insert_zeros_:: diff --git a/docs/reference/aggregations/pipeline/cumulative-cardinality-aggregation.asciidoc b/docs/reference/aggregations/pipeline/cumulative-cardinality-aggregation.asciidoc index 70f907b7cda0..32439c9def72 100644 --- a/docs/reference/aggregations/pipeline/cumulative-cardinality-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/cumulative-cardinality-aggregation.asciidoc @@ -11,8 +11,8 @@ aggregation. The specified metric must be a cardinality aggregation and the encl must have `min_doc_count` set to `0` (default for `histogram` aggregations). The `cumulative_cardinality` agg is useful for finding "total new items", like the number of new visitors to your -website each day. A regular cardinality aggregation will tell you how many unique visitors came each day, but doesn't -differentiate between "new" or "repeat" visitors. The Cumulative Cardinality aggregation can be used to determine +website each day. A regular cardinality aggregation will tell you how many unique visitors came each day, but doesn't +differentiate between "new" or "repeat" visitors. The Cumulative Cardinality aggregation can be used to determine how many of each day's unique visitors are "new". ==== Syntax @@ -128,14 +128,14 @@ And the following may be the response: Note how the second day, `2019-01-02`, has two distinct users but the `total_new_users` metric generated by the -cumulative pipeline agg only increments to three. This means that only one of the two users that day were -new, the other had already been seen in the previous day. This happens again on the third day, where only +cumulative pipeline agg only increments to three. This means that only one of the two users that day were +new, the other had already been seen in the previous day. This happens again on the third day, where only one of three users is completely new. ==== Incremental cumulative cardinality The `cumulative_cardinality` agg will show you the total, distinct count since the beginning of the time period -being queried. Sometimes, however, it is useful to see the "incremental" count. Meaning, how many new users +being queried. Sometimes, however, it is useful to see the "incremental" count. Meaning, how many new users are added each day, rather than the total cumulative count. This can be accomplished by adding a `derivative` aggregation to our query: diff --git a/docs/reference/aggregations/pipeline/derivative-aggregation.asciidoc b/docs/reference/aggregations/pipeline/derivative-aggregation.asciidoc index 92ea053f01af..f0aa790e4930 100644 --- a/docs/reference/aggregations/pipeline/derivative-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/derivative-aggregation.asciidoc @@ -226,7 +226,7 @@ second derivative ==== Units The derivative aggregation allows the units of the derivative values to be specified. This returns an extra field in the response -`normalized_value` which reports the derivative value in the desired x-axis units. In the below example we calculate the derivative +`normalized_value` which reports the derivative value in the desired x-axis units. In the below example we calculate the derivative of the total sales per month but ask for the derivative of the sales as in the units of sales per day: [source,console] diff --git a/docs/reference/aggregations/pipeline/movfn-aggregation.asciidoc b/docs/reference/aggregations/pipeline/movfn-aggregation.asciidoc index 51afcf8c94eb..b5f131553191 100644 --- a/docs/reference/aggregations/pipeline/movfn-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/movfn-aggregation.asciidoc @@ -5,7 +5,7 @@ ++++ Given an ordered series of data, the Moving Function aggregation will slide a window across the data and allow the user to specify a custom -script that is executed on each window of data. For convenience, a number of common functions are predefined such as min/max, moving averages, +script that is executed on each window of data. For convenience, a number of common functions are predefined such as min/max, moving averages, etc. ==== Syntax @@ -36,7 +36,7 @@ A `moving_fn` aggregation looks like this in isolation: |`shift` |<> of window position. |Optional | 0 |=== -`moving_fn` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be +`moving_fn` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be embedded like any other metric aggregation: [source,console] @@ -69,11 +69,11 @@ POST /_search // TEST[setup:sales] <1> A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-day intervals -<2> A `sum` metric is used to calculate the sum of a field. This could be any numeric metric (sum, min, max, etc) +<2> A `sum` metric is used to calculate the sum of a field. This could be any numeric metric (sum, min, max, etc) <3> Finally, we specify a `moving_fn` aggregation which uses "the_sum" metric as its input. -Moving averages are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally -add numeric metrics, such as a `sum`, inside of that histogram. Finally, the `moving_fn` is embedded inside the histogram. +Moving averages are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally +add numeric metrics, such as a `sum`, inside of that histogram. Finally, the `moving_fn` is embedded inside the histogram. The `buckets_path` parameter is then used to "point" at one of the sibling metrics inside of the histogram (see <> for a description of the syntax for `buckets_path`. @@ -134,9 +134,9 @@ An example response from the above aggregation may look like: ==== Custom user scripting -The Moving Function aggregation allows the user to specify any arbitrary script to define custom logic. The script is invoked each time a -new window of data is collected. These values are provided to the script in the `values` variable. The script should then perform some -kind of calculation and emit a single `double` as the result. Emitting `null` is not permitted, although `NaN` and +/- `Inf` are allowed. +The Moving Function aggregation allows the user to specify any arbitrary script to define custom logic. The script is invoked each time a +new window of data is collected. These values are provided to the script in the `values` variable. The script should then perform some +kind of calculation and emit a single `double` as the result. Emitting `null` is not permitted, although `NaN` and +/- `Inf` are allowed. For example, this script will simply return the first value from the window, or `NaN` if no values are available: @@ -195,7 +195,7 @@ For convenience, a number of functions have been prebuilt and are available insi - `holt()` - `holtWinters()` -The functions are available from the `MovingFunctions` namespace. E.g. `MovingFunctions.max()` +The functions are available from the `MovingFunctions` namespace. E.g. `MovingFunctions.max()` ===== max Function @@ -284,7 +284,7 @@ POST /_search ===== sum Function This function accepts a collection of doubles and returns the sum of the values in that window. `null` and `NaN` values are ignored; -the sum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `0.0` is returned as the result. +the sum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `0.0` is returned as the result. [[sum-params]] .`sum(double[] values)` Parameters @@ -326,7 +326,7 @@ POST /_search ===== stdDev Function This function accepts a collection of doubles and average, then returns the standard deviation of the values in that window. -`null` and `NaN` values are ignored; the sum is only calculated over the real values. If the window is empty, or all values are +`null` and `NaN` values are ignored; the sum is only calculated over the real values. If the window is empty, or all values are `null`/`NaN`, `0.0` is returned as the result. [[stddev-params]] @@ -368,17 +368,17 @@ POST /_search // TEST[setup:sales] The `avg` parameter must be provided to the standard deviation function because different styles of averages can be computed on the window -(simple, linearly weighted, etc). The various moving averages that are detailed below can be used to calculate the average for the +(simple, linearly weighted, etc). The various moving averages that are detailed below can be used to calculate the average for the standard deviation function. ===== unweightedAvg Function -The `unweightedAvg` function calculates the sum of all values in the window, then divides by the size of the window. It is effectively -a simple arithmetic mean of the window. The simple moving average does not perform any time-dependent weighting, which means +The `unweightedAvg` function calculates the sum of all values in the window, then divides by the size of the window. It is effectively +a simple arithmetic mean of the window. The simple moving average does not perform any time-dependent weighting, which means the values from a `simple` moving average tend to "lag" behind the real data. `null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` +`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. [[unweightedavg-params]] @@ -421,7 +421,7 @@ POST /_search ==== linearWeightedAvg Function The `linearWeightedAvg` function assigns a linear weighting to points in the series, such that "older" datapoints (e.g. those at -the beginning of the window) contribute a linearly less amount to the total average. The linear weighting helps reduce +the beginning of the window) contribute a linearly less amount to the total average. The linear weighting helps reduce the "lag" behind the data's mean, since older points have less influence. If the window is empty, or all values are `null`/`NaN`, `NaN` is returned as the result. @@ -467,13 +467,13 @@ POST /_search The `ewma` function (aka "single-exponential") is similar to the `linearMovAvg` function, except older data-points become exponentially less important, -rather than linearly less important. The speed at which the importance decays can be controlled with an `alpha` -setting. Small values make the weight decay slowly, which provides greater smoothing and takes into account a larger -portion of the window. Larger values make the weight decay quickly, which reduces the impact of older values on the -moving average. This tends to make the moving average track the data more closely but with less smoothing. +rather than linearly less important. The speed at which the importance decays can be controlled with an `alpha` +setting. Small values make the weight decay slowly, which provides greater smoothing and takes into account a larger +portion of the window. Larger values make the weight decay quickly, which reduces the impact of older values on the +moving average. This tends to make the moving average track the data more closely but with less smoothing. `null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` +`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. [[ewma-params]] @@ -518,18 +518,18 @@ POST /_search ==== holt Function The `holt` function (aka "double exponential") incorporates a second exponential term which -tracks the data's trend. Single exponential does not perform well when the data has an underlying linear trend. The +tracks the data's trend. Single exponential does not perform well when the data has an underlying linear trend. The double exponential model calculates two values internally: a "level" and a "trend". -The level calculation is similar to `ewma`, and is an exponentially weighted view of the data. The difference is +The level calculation is similar to `ewma`, and is an exponentially weighted view of the data. The difference is that the previously smoothed value is used instead of the raw value, which allows it to stay close to the original series. The trend calculation looks at the difference between the current and last value (e.g. the slope, or trend, of the -smoothed data). The trend value is also exponentially weighted. +smoothed data). The trend value is also exponentially weighted. Values are produced by multiplying the level and trend components. `null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` +`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. [[holt-params]] @@ -572,26 +572,26 @@ POST /_search // TEST[setup:sales] In practice, the `alpha` value behaves very similarly in `holtMovAvg` as `ewmaMovAvg`: small values produce more smoothing -and more lag, while larger values produce closer tracking and less lag. The value of `beta` is often difficult -to see. Small values emphasize long-term trends (such as a constant linear trend in the whole series), while larger +and more lag, while larger values produce closer tracking and less lag. The value of `beta` is often difficult +to see. Small values emphasize long-term trends (such as a constant linear trend in the whole series), while larger values emphasize short-term trends. ==== holtWinters Function The `holtWinters` function (aka "triple exponential") incorporates a third exponential term which -tracks the seasonal aspect of your data. This aggregation therefore smooths based on three components: "level", "trend" +tracks the seasonal aspect of your data. This aggregation therefore smooths based on three components: "level", "trend" and "seasonality". The level and trend calculation is identical to `holt` The seasonal calculation looks at the difference between the current point, and the point one period earlier. -Holt-Winters requires a little more handholding than the other moving averages. You need to specify the "periodicity" -of your data: e.g. if your data has cyclic trends every 7 days, you would set `period = 7`. Similarly if there was -a monthly trend, you would set it to `30`. There is currently no periodicity detection, although that is planned +Holt-Winters requires a little more handholding than the other moving averages. You need to specify the "periodicity" +of your data: e.g. if your data has cyclic trends every 7 days, you would set `period = 7`. Similarly if there was +a monthly trend, you would set it to `30`. There is currently no periodicity detection, although that is planned for future enhancements. `null` and `NaN` values are ignored; the average is only calculated over the real values. If the window is empty, or all values are -`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` +`null`/`NaN`, `NaN` is returned as the result. This means that the count used in the average calculation is count of non-`null`,non-`NaN` values. [[holtwinters-params]] @@ -638,20 +638,20 @@ POST /_search [WARNING] ====== -Multiplicative Holt-Winters works by dividing each data point by the seasonal value. This is problematic if any of -your data is zero, or if there are gaps in the data (since this results in a divid-by-zero). To combat this, the -`mult` Holt-Winters pads all values by a very small amount (1*10^-10^) so that all values are non-zero. This affects -the result, but only minimally. If your data is non-zero, or you prefer to see `NaN` when zero's are encountered, +Multiplicative Holt-Winters works by dividing each data point by the seasonal value. This is problematic if any of +your data is zero, or if there are gaps in the data (since this results in a divid-by-zero). To combat this, the +`mult` Holt-Winters pads all values by a very small amount (1*10^-10^) so that all values are non-zero. This affects +the result, but only minimally. If your data is non-zero, or you prefer to see `NaN` when zero's are encountered, you can disable this behavior with `pad: false` ====== ===== "Cold Start" -Unfortunately, due to the nature of Holt-Winters, it requires two periods of data to "bootstrap" the algorithm. This -means that your `window` must always be *at least* twice the size of your period. An exception will be thrown if it -isn't. It also means that Holt-Winters will not emit a value for the first `2 * period` buckets; the current algorithm +Unfortunately, due to the nature of Holt-Winters, it requires two periods of data to "bootstrap" the algorithm. This +means that your `window` must always be *at least* twice the size of your period. An exception will be thrown if it +isn't. It also means that Holt-Winters will not emit a value for the first `2 * period` buckets; the current algorithm does not backcast. -You'll notice in the above example we have an `if ()` statement checking the size of values. This is checking to make sure +You'll notice in the above example we have an `if ()` statement checking the size of values. This is checking to make sure we have two periods worth of data (`5 * 2`, where 5 is the period specified in the `holtWintersMovAvg` function) before calling the holt-winters function. diff --git a/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc b/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc index 50b099d8e162..10208510a672 100644 --- a/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc @@ -37,7 +37,7 @@ A `moving_percentiles` aggregation looks like this in isolation: |`shift` |<> of window position. |Optional | 0 |=== -`moving_percentiles` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be +`moving_percentiles` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation. They can be embedded like any other metric aggregation: [source,console] @@ -75,8 +75,8 @@ POST /_search <2> A `percentile` metric is used to calculate the percentiles of a field. <3> Finally, we specify a `moving_percentiles` aggregation which uses "the_percentile" sketch as its input. -Moving percentiles are built by first specifying a `histogram` or `date_histogram` over a field. You then add -a percentile metric inside of that histogram. Finally, the `moving_percentiles` is embedded inside the histogram. +Moving percentiles are built by first specifying a `histogram` or `date_histogram` over a field. You then add +a percentile metric inside of that histogram. Finally, the `moving_percentiles` is embedded inside the histogram. The `buckets_path` parameter is then used to "point" at the percentiles aggregation inside of the histogram (see <> for a description of the syntax for `buckets_path`). diff --git a/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc index d3a492536ef0..a899dfe45a58 100644 --- a/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/percentiles-bucket-aggregation.asciidoc @@ -130,5 +130,5 @@ interpolate between data points. The percentiles are calculated exactly and is not an approximation (unlike the Percentiles Metric). This means the implementation maintains an in-memory, sorted list of your data to compute the percentiles, before discarding the -data. You may run into memory pressure issues if you attempt to calculate percentiles over many millions of +data. You may run into memory pressure issues if you attempt to calculate percentiles over many millions of data-points in a single `percentiles_bucket`. diff --git a/docs/reference/aggregations/pipeline/serial-diff-aggregation.asciidoc b/docs/reference/aggregations/pipeline/serial-diff-aggregation.asciidoc index 7ef83e1814ac..8b8ce8c6eece 100644 --- a/docs/reference/aggregations/pipeline/serial-diff-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/serial-diff-aggregation.asciidoc @@ -13,10 +13,10 @@ next. Single periods are useful for removing constant, linear trends. Single periods are also useful for transforming data into a stationary series. In this example, the Dow Jones is plotted over ~250 days. The raw data is not stationary, which would make it difficult to use with some techniques. -By calculating the first-difference, we de-trend the data (e.g. remove a constant, linear trend). We can see that the +By calculating the first-difference, we de-trend the data (e.g. remove a constant, linear trend). We can see that the data becomes a stationary series (e.g. the first difference is randomly distributed around zero, and doesn't seem to exhibit any pattern/behavior). The transformation reveals that the dataset is following a random-walk; the value is the -previous value +/- a random amount. This insight allows selection of further tools for analysis. +previous value +/- a random amount. This insight allows selection of further tools for analysis. [[serialdiff_dow]] .Dow Jones plotted and made stationary with first-differencing @@ -93,10 +93,10 @@ POST /_search -------------------------------------------------- <1> A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-day intervals -<2> A `sum` metric is used to calculate the sum of a field. This could be any metric (sum, min, max, etc) +<2> A `sum` metric is used to calculate the sum of a field. This could be any metric (sum, min, max, etc) <3> Finally, we specify a `serial_diff` aggregation which uses "the_sum" metric as its input. -Serial differences are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally -add normal metrics, such as a `sum`, inside of that histogram. Finally, the `serial_diff` is embedded inside the histogram. +Serial differences are built by first specifying a `histogram` or `date_histogram` over a field. You can then optionally +add normal metrics, such as a `sum`, inside of that histogram. Finally, the `serial_diff` is embedded inside the histogram. The `buckets_path` parameter is then used to "point" at one of the sibling metrics inside of the histogram (see <> for a description of the syntax for `buckets_path`. diff --git a/docs/reference/analysis/analyzers.asciidoc b/docs/reference/analysis/analyzers.asciidoc index 15e8fb435f24..1aacbf62a6d6 100644 --- a/docs/reference/analysis/analyzers.asciidoc +++ b/docs/reference/analysis/analyzers.asciidoc @@ -13,12 +13,12 @@ lowercases terms, and supports removing stop words. <>:: The `simple` analyzer divides text into terms whenever it encounters a -character which is not a letter. It lowercases all terms. +character which is not a letter. It lowercases all terms. <>:: The `whitespace` analyzer divides text into terms whenever it encounters any -whitespace character. It does not lowercase terms. +whitespace character. It does not lowercase terms. <>:: diff --git a/docs/reference/analysis/analyzers/configuring.asciidoc b/docs/reference/analysis/analyzers/configuring.asciidoc index c848004c4f03..0a9b682a7121 100644 --- a/docs/reference/analysis/analyzers/configuring.asciidoc +++ b/docs/reference/analysis/analyzers/configuring.asciidoc @@ -1,8 +1,8 @@ [[configuring-analyzers]] === Configuring built-in analyzers -The built-in analyzers can be used directly without any configuration. Some -of them, however, support configuration options to alter their behaviour. For +The built-in analyzers can be used directly without any configuration. Some +of them, however, support configuration options to alter their behaviour. For instance, the <> can be configured to support a list of stop words: @@ -53,10 +53,10 @@ POST my-index-000001/_analyze <1> We define the `std_english` analyzer to be based on the `standard` analyzer, but configured to remove the pre-defined list of English stopwords. <2> The `my_text` field uses the `standard` analyzer directly, without - any configuration. No stop words will be removed from this field. + any configuration. No stop words will be removed from this field. The resulting terms are: `[ the, old, brown, cow ]` <3> The `my_text.english` field uses the `std_english` analyzer, so - English stop words will be removed. The resulting terms are: + English stop words will be removed. The resulting terms are: `[ old, brown, cow ]` diff --git a/docs/reference/analysis/analyzers/custom-analyzer.asciidoc b/docs/reference/analysis/analyzers/custom-analyzer.asciidoc index 88ed9df36ac9..f2808d4c4ff0 100644 --- a/docs/reference/analysis/analyzers/custom-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/custom-analyzer.asciidoc @@ -38,7 +38,7 @@ The `custom` analyzer accepts the following parameters: When indexing an array of text values, Elasticsearch inserts a fake "gap" between the last term of one value and the first term of the next value to ensure that a phrase query doesn't match two terms from different array - elements. Defaults to `100`. See <> for more. + elements. Defaults to `100`. See <> for more. [discrete] === Example configuration diff --git a/docs/reference/analysis/analyzers/fingerprint-analyzer.asciidoc b/docs/reference/analysis/analyzers/fingerprint-analyzer.asciidoc index f66acb452c41..9c6fc89a1c98 100644 --- a/docs/reference/analysis/analyzers/fingerprint-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/fingerprint-analyzer.asciidoc @@ -9,7 +9,7 @@ https://github.com/OpenRefine/OpenRefine/wiki/Clustering-In-Depth#fingerprint[fi which is used by the OpenRefine project to assist in clustering. Input text is lowercased, normalized to remove extended characters, sorted, -deduplicated and concatenated into a single token. If a stopword list is +deduplicated and concatenated into a single token. If a stopword list is configured, stop words will also be removed. [discrete] @@ -59,17 +59,17 @@ The `fingerprint` analyzer accepts the following parameters: [horizontal] `separator`:: - The character to use to concatenate the terms. Defaults to a space. + The character to use to concatenate the terms. Defaults to a space. `max_output_size`:: - The maximum token size to emit. Defaults to `255`. Tokens larger than + The maximum token size to emit. Defaults to `255`. Tokens larger than this size will be discarded. `stopwords`:: - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_none_`. + A pre-defined stop words list like `_english_` or an array containing a + list of stop words. Defaults to `_none_`. `stopwords_path`:: diff --git a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc index e6c08fe4da17..45cb725492f0 100644 --- a/docs/reference/analysis/analyzers/lang-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/lang-analyzer.asciidoc @@ -55,7 +55,7 @@ more details. ===== Excluding words from stemming The `stem_exclusion` parameter allows you to specify an array -of lowercase words that should not be stemmed. Internally, this +of lowercase words that should not be stemmed. Internally, this functionality is implemented by adding the <> with the `keywords` set to the value of the `stem_exclusion` parameter. @@ -427,7 +427,7 @@ PUT /catalan_example ===== `cjk` analyzer NOTE: You may find that `icu_analyzer` in the ICU analysis plugin works better -for CJK text than the `cjk` analyzer. Experiment with your text and queries. +for CJK text than the `cjk` analyzer. Experiment with your text and queries. The `cjk` analyzer could be reimplemented as a `custom` analyzer as follows: diff --git a/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc b/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc index 7327cee996f3..92c293795a3d 100644 --- a/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/pattern-analyzer.asciidoc @@ -159,8 +159,8 @@ The `pattern` analyzer accepts the following parameters: `stopwords`:: - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_none_`. + A pre-defined stop words list like `_english_` or an array containing a + list of stop words. Defaults to `_none_`. `stopwords_path`:: diff --git a/docs/reference/analysis/analyzers/standard-analyzer.asciidoc b/docs/reference/analysis/analyzers/standard-analyzer.asciidoc index 459d10983418..ea079b871818 100644 --- a/docs/reference/analysis/analyzers/standard-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/standard-analyzer.asciidoc @@ -132,8 +132,8 @@ The `standard` analyzer accepts the following parameters: `stopwords`:: - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_none_`. + A pre-defined stop words list like `_english_` or an array containing a + list of stop words. Defaults to `_none_`. `stopwords_path`:: diff --git a/docs/reference/analysis/analyzers/stop-analyzer.asciidoc b/docs/reference/analysis/analyzers/stop-analyzer.asciidoc index 5dc65268c7b7..0a156cca1add 100644 --- a/docs/reference/analysis/analyzers/stop-analyzer.asciidoc +++ b/docs/reference/analysis/analyzers/stop-analyzer.asciidoc @@ -5,7 +5,7 @@ ++++ The `stop` analyzer is the same as the <> -but adds support for removing stop words. It defaults to using the +but adds support for removing stop words. It defaults to using the `_english_` stop words. [discrete] @@ -111,8 +111,8 @@ The `stop` analyzer accepts the following parameters: [horizontal] `stopwords`:: - A pre-defined stop words list like `_english_` or an array containing a - list of stop words. Defaults to `_english_`. + A pre-defined stop words list like `_english_` or an array containing a + list of stop words. Defaults to `_english_`. `stopwords_path`:: diff --git a/docs/reference/analysis/anatomy.asciidoc b/docs/reference/analysis/anatomy.asciidoc index 22e7ffda667d..f01a22ec4e6e 100644 --- a/docs/reference/analysis/anatomy.asciidoc +++ b/docs/reference/analysis/anatomy.asciidoc @@ -14,7 +14,7 @@ combined to define new <> analyzers. ==== Character filters A _character filter_ receives the original text as a stream of characters and -can transform the stream by adding, removing, or changing characters. For +can transform the stream by adding, removing, or changing characters. For instance, a character filter could be used to convert Hindu-Arabic numerals (٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin equivalents (0123456789), or to strip HTML elements like `` from the stream. @@ -25,10 +25,10 @@ which are applied in order. [[analyzer-anatomy-tokenizer]] ==== Tokenizer -A _tokenizer_ receives a stream of characters, breaks it up into individual +A _tokenizer_ receives a stream of characters, breaks it up into individual _tokens_ (usually individual words), and outputs a stream of _tokens_. For instance, a <> tokenizer breaks -text into tokens whenever it sees any whitespace. It would convert the text +text into tokens whenever it sees any whitespace. It would convert the text `"Quick brown fox!"` into the terms `[Quick, brown, fox!]`. The tokenizer is also responsible for recording the order or _position_ of @@ -41,7 +41,7 @@ An analyzer must have *exactly one* <>. ==== Token filters A _token filter_ receives the token stream and may add, remove, or change -tokens. For example, a <> token +tokens. For example, a <> token filter converts all tokens to lowercase, a <> token filter removes common words (_stop words_) like `the` from the token stream, and a diff --git a/docs/reference/analysis/charfilters.asciidoc b/docs/reference/analysis/charfilters.asciidoc index 97fe4fd266b3..93054cf8e618 100644 --- a/docs/reference/analysis/charfilters.asciidoc +++ b/docs/reference/analysis/charfilters.asciidoc @@ -5,7 +5,7 @@ _Character filters_ are used to preprocess the stream of characters before it is passed to the <>. A character filter receives the original text as a stream of characters and -can transform the stream by adding, removing, or changing characters. For +can transform the stream by adding, removing, or changing characters. For instance, a character filter could be used to convert Hindu-Arabic numerals (٠‎١٢٣٤٥٦٧٨‎٩‎) into their Arabic-Latin equivalents (0123456789), or to strip HTML elements like `` from the stream. diff --git a/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc b/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc index b1f2f0900dcb..ecb73164e6a9 100644 --- a/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc +++ b/docs/reference/analysis/charfilters/mapping-charfilter.asciidoc @@ -4,7 +4,7 @@ Mapping ++++ -The `mapping` character filter accepts a map of keys and values. Whenever it +The `mapping` character filter accepts a map of keys and values. Whenever it encounters a string of characters that is the same as a key, it replaces them with the value associated with that key. diff --git a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc index 178f42d3b9fe..7c0014e17010 100644 --- a/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/multiplexer-tokenfilter.asciidoc @@ -5,7 +5,7 @@ ++++ A token filter of type `multiplexer` will emit multiple tokens at the same position, -each version of the token having been run through a different filter. Identical +each version of the token having been run through a different filter. Identical output tokens at the same position will be removed. WARNING: If the incoming token stream has duplicate tokens, then these will also be @@ -14,8 +14,8 @@ removed by the multiplexer [discrete] === Options [horizontal] -filters:: a list of token filters to apply to incoming tokens. These can be any - token filters defined elsewhere in the index mappings. Filters can be chained +filters:: a list of token filters to apply to incoming tokens. These can be any + token filters defined elsewhere in the index mappings. Filters can be chained using a comma-delimited string, so for example `"lowercase, porter_stem"` would apply the `lowercase` filter and then the `porter_stem` filter to a single token. diff --git a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc index bc288fbf720e..bb53a9d71874 100644 --- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc @@ -8,14 +8,14 @@ The `synonym_graph` token filter allows to easily handle synonyms, including multi-word synonyms correctly during the analysis process. In order to properly handle multi-word synonyms this token filter -creates a <> during processing. For more +creates a <> during processing. For more information on this topic and its various complexities, please read the http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs] blog post. ["NOTE",id="synonym-graph-index-note"] =============================== This token filter is designed to be used as part of a search analyzer -only. If you want to apply synonyms during indexing please use the +only. If you want to apply synonyms during indexing please use the standard <>. =============================== @@ -179,13 +179,13 @@ as well. ==== Parsing synonym files Elasticsearch will use the token filters preceding the synonym filter -in a tokenizer chain to parse the entries in a synonym file. So, for example, if a +in a tokenizer chain to parse the entries in a synonym file. So, for example, if a synonym filter is placed after a stemmer, then the stemmer will also be applied -to the synonym entries. Because entries in the synonym map cannot have stacked -positions, some token filters may cause issues here. Token filters that produce +to the synonym entries. Because entries in the synonym map cannot have stacked +positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms, e.g. `asciifolding` will only produce the folded version of the -token. Others, e.g. `multiplexer`, `word_delimiter_graph` or `ngram` will throw an +token. Others, e.g. `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym diff --git a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc index 77cf7f371dfd..525885c940bc 100644 --- a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc @@ -170,13 +170,13 @@ as well. === Parsing synonym files Elasticsearch will use the token filters preceding the synonym filter -in a tokenizer chain to parse the entries in a synonym file. So, for example, if a +in a tokenizer chain to parse the entries in a synonym file. So, for example, if a synonym filter is placed after a stemmer, then the stemmer will also be applied -to the synonym entries. Because entries in the synonym map cannot have stacked -positions, some token filters may cause issues here. Token filters that produce +to the synonym entries. Because entries in the synonym map cannot have stacked +positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms, e.g. `asciifolding` will only produce the folded version of the -token. Others, e.g. `multiplexer`, `word_delimiter_graph` or `ngram` will throw an +token. Others, e.g. `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym diff --git a/docs/reference/analysis/tokenizers.asciidoc b/docs/reference/analysis/tokenizers.asciidoc index fa47c05e3a00..38e4ebfcabc3 100644 --- a/docs/reference/analysis/tokenizers.asciidoc +++ b/docs/reference/analysis/tokenizers.asciidoc @@ -1,10 +1,10 @@ [[analysis-tokenizers]] == Tokenizer reference -A _tokenizer_ receives a stream of characters, breaks it up into individual +A _tokenizer_ receives a stream of characters, breaks it up into individual _tokens_ (usually individual words), and outputs a stream of _tokens_. For instance, a <> tokenizer breaks -text into tokens whenever it sees any whitespace. It would convert the text +text into tokens whenever it sees any whitespace. It would convert the text `"Quick brown fox!"` into the terms `[Quick, brown, fox!]`. The tokenizer is also responsible for recording the following: @@ -90,7 +90,7 @@ text: <>:: The `keyword` tokenizer is a ``noop'' tokenizer that accepts whatever text it -is given and outputs the exact same text as a single term. It can be combined +is given and outputs the exact same text as a single term. It can be combined with token filters like <> to normalise the analysed terms. diff --git a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc index 74b5e7d4434c..030732b99734 100644 --- a/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/edgengram-tokenizer.asciidoc @@ -14,7 +14,7 @@ Edge N-Grams are useful for _search-as-you-type_ queries. TIP: When you need _search-as-you-type_ for text which has a widely known order, such as movie or song titles, the <> is a much more efficient -choice than edge N-grams. Edge N-grams have the advantage when trying to +choice than edge N-grams. Edge N-grams have the advantage when trying to autocomplete words that can appear in any order. [discrete] @@ -67,7 +67,7 @@ The above sentence would produce the following terms: [ Q, Qu ] --------------------------- -NOTE: These default gram lengths are almost entirely useless. You need to +NOTE: These default gram lengths are almost entirely useless. You need to configure the `edge_ngram` before using it. [discrete] @@ -76,19 +76,19 @@ configure the `edge_ngram` before using it. The `edge_ngram` tokenizer accepts the following parameters: `min_gram`:: - Minimum length of characters in a gram. Defaults to `1`. + Minimum length of characters in a gram. Defaults to `1`. `max_gram`:: + -- -Maximum length of characters in a gram. Defaults to `2`. +Maximum length of characters in a gram. Defaults to `2`. See <>. -- `token_chars`:: - Character classes that should be included in a token. Elasticsearch + Character classes that should be included in a token. Elasticsearch will split on characters that don't belong to the classes specified. Defaults to `[]` (keep all characters). + @@ -106,7 +106,7 @@ Character classes may be any of the following: Custom characters that should be treated as part of a token. For example, setting this to `+-_` will make the tokenizer treat the plus, minus and - underscore sign as part of a token. + underscore sign as part of a token. [discrete] [[max-gram-limits]] diff --git a/docs/reference/analysis/tokenizers/keyword-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/keyword-tokenizer.asciidoc index c4ee77458d83..53782f1907ba 100644 --- a/docs/reference/analysis/tokenizers/keyword-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/keyword-tokenizer.asciidoc @@ -4,8 +4,8 @@ Keyword ++++ -The `keyword` tokenizer is a ``noop'' tokenizer that accepts whatever text it -is given and outputs the exact same text as a single term. It can be combined +The `keyword` tokenizer is a ``noop'' tokenizer that accepts whatever text it +is given and outputs the exact same text as a single term. It can be combined with token filters to normalise output, e.g. lower-casing email addresses. [discrete] @@ -104,6 +104,6 @@ The `keyword` tokenizer accepts the following parameters: `buffer_size`:: The number of characters read into the term buffer in a single pass. - Defaults to `256`. The term buffer will grow by this size until all the - text has been consumed. It is advisable not to change this setting. + Defaults to `256`. The term buffer will grow by this size until all the + text has been consumed. It is advisable not to change this setting. diff --git a/docs/reference/analysis/tokenizers/lowercase-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/lowercase-tokenizer.asciidoc index ffe44292c52b..5a38313fb5d9 100644 --- a/docs/reference/analysis/tokenizers/lowercase-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/lowercase-tokenizer.asciidoc @@ -7,7 +7,7 @@ The `lowercase` tokenizer, like the <> breaks text into terms whenever it encounters a character which is not a letter, but it also -lowercases all terms. It is functionally equivalent to the +lowercases all terms. It is functionally equivalent to the <> combined with the <>, but is more efficient as it performs both steps in a single pass. diff --git a/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc index cd7f2fb7c74e..0c244734a483 100644 --- a/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/ngram-tokenizer.asciidoc @@ -175,14 +175,14 @@ The `ngram` tokenizer accepts the following parameters: [horizontal] `min_gram`:: - Minimum length of characters in a gram. Defaults to `1`. + Minimum length of characters in a gram. Defaults to `1`. `max_gram`:: - Maximum length of characters in a gram. Defaults to `2`. + Maximum length of characters in a gram. Defaults to `2`. `token_chars`:: - Character classes that should be included in a token. Elasticsearch + Character classes that should be included in a token. Elasticsearch will split on characters that don't belong to the classes specified. Defaults to `[]` (keep all characters). + @@ -200,12 +200,12 @@ Character classes may be any of the following: Custom characters that should be treated as part of a token. For example, setting this to `+-_` will make the tokenizer treat the plus, minus and - underscore sign as part of a token. + underscore sign as part of a token. TIP: It usually makes sense to set `min_gram` and `max_gram` to the same -value. The smaller the length, the more documents will match but the lower -the quality of the matches. The longer the length, the more specific the -matches. A tri-gram (length `3`) is a good place to start. +value. The smaller the length, the more documents will match but the lower +the quality of the matches. The longer the length, the more specific the +matches. A tri-gram (length `3`) is a good place to start. The index level setting `index.max_ngram_diff` controls the maximum allowed difference between `max_gram` and `min_gram`. diff --git a/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc index 321d33d6f7ce..293ee15d8f45 100644 --- a/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/pathhierarchy-tokenizer.asciidoc @@ -69,7 +69,7 @@ The `path_hierarchy` tokenizer accepts the following parameters: [horizontal] `delimiter`:: - The character to use as the path separator. Defaults to `/`. + The character to use as the path separator. Defaults to `/`. `replacement`:: An optional replacement character to use for the delimiter. @@ -77,20 +77,20 @@ The `path_hierarchy` tokenizer accepts the following parameters: `buffer_size`:: The number of characters read into the term buffer in a single pass. - Defaults to `1024`. The term buffer will grow by this size until all the - text has been consumed. It is advisable not to change this setting. + Defaults to `1024`. The term buffer will grow by this size until all the + text has been consumed. It is advisable not to change this setting. `reverse`:: - If set to `true`, emits the tokens in reverse order. Defaults to `false`. + If set to `true`, emits the tokens in reverse order. Defaults to `false`. `skip`:: - The number of initial tokens to skip. Defaults to `0`. + The number of initial tokens to skip. Defaults to `0`. [discrete] === Example configuration In this example, we configure the `path_hierarchy` tokenizer to split on `-` -characters, and to replace them with `/`. The first two tokens are skipped: +characters, and to replace them with `/`. The first two tokens are skipped: [source,console] ---------------------------- diff --git a/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc b/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc index 112ba92bf599..75866dff7430 100644 --- a/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc +++ b/docs/reference/analysis/tokenizers/pattern-tokenizer.asciidoc @@ -116,7 +116,7 @@ The `pattern` tokenizer accepts the following parameters: `group`:: - Which capture group to extract as tokens. Defaults to `-1` (split). + Which capture group to extract as tokens. Defaults to `-1` (split). [discrete] === Example configuration @@ -194,7 +194,7 @@ The above example produces the following terms: --------------------------- In the next example, we configure the `pattern` tokenizer to capture values -enclosed in double quotes (ignoring embedded escaped quotes `\"`). The regex +enclosed in double quotes (ignoring embedded escaped quotes `\"`). The regex itself looks like this: "((?:\\"|[^"]|\\")*)" diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc index 5e195a960457..357e7476071e 100644 --- a/docs/reference/api-conventions.asciidoc +++ b/docs/reference/api-conventions.asciidoc @@ -199,7 +199,7 @@ Statistics are returned in a format suitable for humans The human readable values can be turned off by adding `?human=false` to the query string. This makes sense when the stats results are being consumed by a monitoring tool, rather than intended for human -consumption. The default for the `human` flag is +consumption. The default for the `human` flag is `false`. [[date-math]] @@ -499,7 +499,7 @@ of supporting the native JSON number types. ==== Time units Whenever durations need to be specified, e.g. for a `timeout` parameter, the duration must specify -the unit, like `2d` for 2 days. The supported units are: +the unit, like `2d` for 2 days. The supported units are: [horizontal] `d`:: Days diff --git a/docs/reference/cat.asciidoc b/docs/reference/cat.asciidoc index 19eb152d9445..1ad7d7d35856 100644 --- a/docs/reference/cat.asciidoc +++ b/docs/reference/cat.asciidoc @@ -103,14 +103,14 @@ with `queue`. ==== Numeric formats Many commands provide a few types of numeric output, either a byte, size -or a time value. By default, these types are human-formatted, -for example, `3.5mb` instead of `3763212`. The human values are not +or a time value. By default, these types are human-formatted, +for example, `3.5mb` instead of `3763212`. The human values are not sortable numerically, so in order to operate on these values where order is important, you can change it. Say you want to find the largest index in your cluster (storage used -by all the shards, not number of documents). The `/_cat/indices` API -is ideal. You only need to add three things to the API request: +by all the shards, not number of documents). The `/_cat/indices` API +is ideal. You only need to add three things to the API request: . The `bytes` query string parameter with a value of `b` to get byte-level resolution. . The `s` (sort) parameter with a value of `store.size:desc` and a comma with `index:asc` to sort the output diff --git a/docs/reference/cat/nodes.asciidoc b/docs/reference/cat/nodes.asciidoc index b5cd9090d7ef..d59f3c18e7b3 100644 --- a/docs/reference/cat/nodes.asciidoc +++ b/docs/reference/cat/nodes.asciidoc @@ -25,7 +25,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=bytes] include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=http-format] `full_id`:: -(Optional, Boolean) If `true`, return the full node ID. If `false`, return the +(Optional, Boolean) If `true`, return the full node ID. If `false`, return the shortened node ID. Defaults to `false`. include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=cat-h] diff --git a/docs/reference/cluster/allocation-explain.asciidoc b/docs/reference/cluster/allocation-explain.asciidoc index 2bbe7e01b9b2..e32417144706 100644 --- a/docs/reference/cluster/allocation-explain.asciidoc +++ b/docs/reference/cluster/allocation-explain.asciidoc @@ -22,7 +22,7 @@ Provides explanations for shard allocations in the cluster. ==== {api-description-title} The purpose of the cluster allocation explain API is to provide -explanations for shard allocations in the cluster. For unassigned shards, +explanations for shard allocations in the cluster. For unassigned shards, the explain API provides an explanation for why the shard is unassigned. For assigned shards, the explain API provides an explanation for why the shard is remaining on its current node and has not moved or rebalanced to diff --git a/docs/reference/cluster/nodes-hot-threads.asciidoc b/docs/reference/cluster/nodes-hot-threads.asciidoc index 5e1fa9a36a27..e8bdef825999 100644 --- a/docs/reference/cluster/nodes-hot-threads.asciidoc +++ b/docs/reference/cluster/nodes-hot-threads.asciidoc @@ -40,7 +40,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=node-id] `ignore_idle_threads`:: (Optional, Boolean) If true, known idle threads (e.g. waiting in a socket - select, or to get a task from an empty queue) are filtered out. Defaults to + select, or to get a task from an empty queue) are filtered out. Defaults to true. `interval`:: diff --git a/docs/reference/cluster/nodes-info.asciidoc b/docs/reference/cluster/nodes-info.asciidoc index a88b4a7b5e92..45effa530b12 100644 --- a/docs/reference/cluster/nodes-info.asciidoc +++ b/docs/reference/cluster/nodes-info.asciidoc @@ -108,7 +108,7 @@ include::{es-repo-dir}/rest-api/common-parms.asciidoc[tag=node-id] `total_indexing_buffer`:: Total heap allowed to be used to hold recently indexed - documents before they must be written to disk. This size is + documents before they must be written to disk. This size is a shared pool across all shards on this node, and is controlled by <>. diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc index 87f596ced9b9..b72064550557 100644 --- a/docs/reference/cluster/nodes-stats.asciidoc +++ b/docs/reference/cluster/nodes-stats.asciidoc @@ -1199,7 +1199,7 @@ since the {wikipedia}/Unix_time[Unix Epoch]. `open_file_descriptors`:: (integer) -Number of opened file descriptors associated with the current or +Number of opened file descriptors associated with the current or `-1` if not supported. `max_file_descriptors`:: diff --git a/docs/reference/cluster/tasks.asciidoc b/docs/reference/cluster/tasks.asciidoc index 596986172554..94fa143f6258 100644 --- a/docs/reference/cluster/tasks.asciidoc +++ b/docs/reference/cluster/tasks.asciidoc @@ -75,7 +75,7 @@ GET _tasks?nodes=nodeId1,nodeId2&actions=cluster:* <3> // TEST[skip:No tasks to retrieve] <1> Retrieves all tasks currently running on all nodes in the cluster. -<2> Retrieves all tasks running on nodes `nodeId1` and `nodeId2`. See <> for more info about how to select individual nodes. +<2> Retrieves all tasks running on nodes `nodeId1` and `nodeId2`. See <> for more info about how to select individual nodes. <3> Retrieves all cluster-related tasks running on nodes `nodeId1` and `nodeId2`. The API returns the following result: diff --git a/docs/reference/cluster/voting-exclusions.asciidoc b/docs/reference/cluster/voting-exclusions.asciidoc index f08754a9aaf9..8ee35bf3e24e 100644 --- a/docs/reference/cluster/voting-exclusions.asciidoc +++ b/docs/reference/cluster/voting-exclusions.asciidoc @@ -41,7 +41,7 @@ manually. It adds an entry for that node in the voting configuration exclusions list. The cluster then tries to reconfigure the voting configuration to remove that node and to prevent it from returning. -If the API fails, you can safely retry it. Only a successful response +If the API fails, you can safely retry it. Only a successful response guarantees that the node has been removed from the voting configuration and will not be reinstated. diff --git a/docs/reference/commands/node-tool.asciidoc b/docs/reference/commands/node-tool.asciidoc index 17a4e949455d..4582d9b2b7ca 100644 --- a/docs/reference/commands/node-tool.asciidoc +++ b/docs/reference/commands/node-tool.asciidoc @@ -36,11 +36,11 @@ This tool has a number of modes: prevents the cluster state from being loaded. * `elasticsearch-node unsafe-bootstrap` can be used to perform _unsafe cluster - bootstrapping_. It forces one of the nodes to form a brand-new cluster on + bootstrapping_. It forces one of the nodes to form a brand-new cluster on its own, using its local copy of the cluster metadata. * `elasticsearch-node detach-cluster` enables you to move nodes from one - cluster to another. This can be used to move nodes into a new cluster + cluster to another. This can be used to move nodes into a new cluster created with the `elasticsearch-node unsafe-bootstrap` command. If unsafe cluster bootstrapping was not possible, it also enables you to move nodes into a brand-new cluster. @@ -218,7 +218,7 @@ node with the same term, pick the one with the largest version. This information identifies the node with the freshest cluster state, which minimizes the quantity of data that might be lost. For example, if the first node reports `(4, 12)` and a second node reports `(5, 3)`, then the second node is preferred -since its term is larger. However if the second node reports `(3, 17)` then +since its term is larger. However if the second node reports `(3, 17)` then the first node is preferred since its term is larger. If the second node reports `(4, 10)` then it has the same term as the first node, but has a smaller version, so the first node is preferred. diff --git a/docs/reference/commands/shard-tool.asciidoc b/docs/reference/commands/shard-tool.asciidoc index 32e73c757b72..10eb18ea7014 100644 --- a/docs/reference/commands/shard-tool.asciidoc +++ b/docs/reference/commands/shard-tool.asciidoc @@ -7,7 +7,7 @@ shard if a good copy of the shard cannot be recovered automatically or restored from backup. [WARNING] -You will lose the corrupted data when you run `elasticsearch-shard`. This tool +You will lose the corrupted data when you run `elasticsearch-shard`. This tool should only be used as a last resort if there is no way to recover from another copy of the shard or restore a snapshot. diff --git a/docs/reference/data-streams/data-streams.asciidoc b/docs/reference/data-streams/data-streams.asciidoc index faef8d50e7c8..26874290fae5 100644 --- a/docs/reference/data-streams/data-streams.asciidoc +++ b/docs/reference/data-streams/data-streams.asciidoc @@ -35,7 +35,7 @@ backing indices. Every document indexed to a data stream must contain a `@timestamp` field, mapped as a <> or <> field type. If the index template doesn't specify a mapping for the `@timestamp` field, {es} maps -`@timestamp` as a `date` field with default options. +`@timestamp` as a `date` field with default options. // end::timestamp-reqs[] The same index template can be used for multiple data streams. You cannot diff --git a/docs/reference/data-streams/promote-data-stream-api.asciidoc b/docs/reference/data-streams/promote-data-stream-api.asciidoc index 83b732a8dce4..281e9b549abc 100644 --- a/docs/reference/data-streams/promote-data-stream-api.asciidoc +++ b/docs/reference/data-streams/promote-data-stream-api.asciidoc @@ -10,7 +10,7 @@ a data stream that is replicated by CCR into a regular data stream. Via CCR Auto Following, a data stream from a remote cluster -can be replicated to the local cluster. These data streams +can be replicated to the local cluster. These data streams can't be rolled over in the local cluster. Only if the upstream data stream rolls over then these replicated data streams roll over as well. In the event that the remote cluster is no longer diff --git a/docs/reference/docs/data-replication.asciidoc b/docs/reference/docs/data-replication.asciidoc index 64a7ed5220a8..d9cc3c6ab022 100644 --- a/docs/reference/docs/data-replication.asciidoc +++ b/docs/reference/docs/data-replication.asciidoc @@ -89,7 +89,7 @@ This is a valid scenario that can happen due to index configuration or simply because all the replicas have failed. In that case the primary is processing operations without any external validation, which may seem problematic. On the other hand, the primary cannot fail other shards on its own but request the master to do so on its behalf. This means that the master knows that the primary is the only single good copy. We are therefore guaranteed -that the master will not promote any other (out-of-date) shard copy to be a new primary and that any operation indexed +that the master will not promote any other (out-of-date) shard copy to be a new primary and that any operation indexed into the primary will not be lost. Of course, since at that point we are running with only single copy of the data, physical hardware issues can cause data loss. See <> for some mitigation options. ************ diff --git a/docs/reference/docs/delete-by-query.asciidoc b/docs/reference/docs/delete-by-query.asciidoc index 6b6f32225d42..32b362133810 100644 --- a/docs/reference/docs/delete-by-query.asciidoc +++ b/docs/reference/docs/delete-by-query.asciidoc @@ -125,7 +125,7 @@ to disable throttling. Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account. The padding time is the difference between the batch size divided by the -`requests_per_second` and the time spent writing. By default the batch size is +`requests_per_second` and the time spent writing. By default the batch size is `1000`, so if `requests_per_second` is set to `500`: [source,txt] diff --git a/docs/reference/docs/get.asciidoc b/docs/reference/docs/get.asciidoc index a7925ce614bd..e0aea7cfd689 100644 --- a/docs/reference/docs/get.asciidoc +++ b/docs/reference/docs/get.asciidoc @@ -378,7 +378,7 @@ The API returns the following result: // TESTRESPONSE[s/"_seq_no" : \d+/"_seq_no" : $body._seq_no/ s/"_primary_term" : 1/"_primary_term" : $body._primary_term/] Field values fetched from the document itself are always returned as an array. -Since the `counter` field is not stored, the get request ignores it. +Since the `counter` field is not stored, the get request ignores it. You can also retrieve metadata fields like the `_routing` field: diff --git a/docs/reference/docs/index_.asciidoc b/docs/reference/docs/index_.asciidoc index 2198fa848c26..5172af5d9ad8 100644 --- a/docs/reference/docs/index_.asciidoc +++ b/docs/reference/docs/index_.asciidoc @@ -216,7 +216,7 @@ automatically. You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns, or set it to `false` to disable automatic index creation entirely. Specify a comma-separated list of patterns you want to allow, or prefix each pattern with -`+` or `-` to indicate whether it should be allowed or blocked. When a list is +`+` or `-` to indicate whether it should be allowed or blocked. When a list is specified, the default behaviour is to disallow. IMPORTANT: The `action.auto_create_index` setting only affects the automatic diff --git a/docs/reference/docs/termvectors.asciidoc b/docs/reference/docs/termvectors.asciidoc index 649b91a9c86f..8fa6392e08d5 100644 --- a/docs/reference/docs/termvectors.asciidoc +++ b/docs/reference/docs/termvectors.asciidoc @@ -323,7 +323,7 @@ GET /my-index-000001/_termvectors/1 ===== Artificial documents Term vectors can also be generated for artificial documents, -that is for documents not present in the index. For example, the following request would +that is for documents not present in the index. For example, the following request would return the same results as in example 1. The mapping used is determined by the `index`. *If dynamic mapping is turned on (default), the document fields not in the original diff --git a/docs/reference/docs/update-by-query.asciidoc b/docs/reference/docs/update-by-query.asciidoc index 72e52071b9aa..3238f48c95a7 100644 --- a/docs/reference/docs/update-by-query.asciidoc +++ b/docs/reference/docs/update-by-query.asciidoc @@ -119,7 +119,7 @@ to disable throttling. Throttling uses a wait time between batches so that the internal scroll requests can be given a timeout that takes the request padding into account. The padding time is the difference between the batch size divided by the -`requests_per_second` and the time spent writing. By default the batch size is +`requests_per_second` and the time spent writing. By default the batch size is `1000`, so if `requests_per_second` is set to `500`: [source,txt] diff --git a/docs/reference/docs/update.asciidoc b/docs/reference/docs/update.asciidoc index a30fd0e32e68..cffbe2037c9e 100644 --- a/docs/reference/docs/update.asciidoc +++ b/docs/reference/docs/update.asciidoc @@ -191,7 +191,7 @@ POST test/_update/1 // TEST[continued] Instead of updating the document, you can also change the operation that is -executed from within the script. For example, this request deletes the doc if +executed from within the script. For example, this request deletes the doc if the `tags` field contains `green`, otherwise it does nothing (`noop`): [source,console] @@ -285,7 +285,7 @@ POST test/_update/1 ===== Upsert If the document does not already exist, the contents of the `upsert` element -are inserted as a new document. If the document exists, the +are inserted as a new document. If the document exists, the `script` is executed: [source,console] diff --git a/docs/reference/eql/functions.asciidoc b/docs/reference/eql/functions.asciidoc index 187bc37d5f86..67429ca53645 100644 --- a/docs/reference/eql/functions.asciidoc +++ b/docs/reference/eql/functions.asciidoc @@ -654,7 +654,7 @@ multiply() + -- (Required, integer or float or `null`) -Factor to multiply. If `null`, the function returns `null`. +Factor to multiply. If `null`, the function returns `null`. Two factors are required. No more than two factors can be provided. diff --git a/docs/reference/ilm/ilm-skip-rollover.asciidoc b/docs/reference/ilm/ilm-skip-rollover.asciidoc index feaea73fec8c..1e28a2de4378 100644 --- a/docs/reference/ilm/ilm-skip-rollover.asciidoc +++ b/docs/reference/ilm/ilm-skip-rollover.asciidoc @@ -9,7 +9,7 @@ It's set automatically by {ilm-init} when the rollover action completes successf You can set it manually to skip rollover if you need to make an exception to your normal lifecycle policy and update the alias to force a roll over, but want {ilm-init} to continue to manage the index. -If you use the rollover API. It is not necessary to configure this setting manually. +If you use the rollover API. It is not necessary to configure this setting manually. If an index's lifecycle policy is removed, this setting is also removed. diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index 4358ffd74026..d17cfcf8643e 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -11,7 +11,7 @@ an index. [[index-modules-settings-description]] // tag::index-modules-settings-description-tag[] -Index level settings can be set per-index. Settings may be: +Index level settings can be set per-index. Settings may be: _static_:: @@ -50,7 +50,7 @@ NOTE: The number of shards are limited to `1024` per index. This limitation is a Number of routing shards used to <> an index. For example, a 5 shard index with `number_of_routing_shards` set to `30` (`5 x -2 x 3`) could be split by a factor of `2` or `3`. In other words, it could be +2 x 3`) could be split by a factor of `2` or `3`. In other words, it could be split as follows: * `5` -> `10` -> `30` (split by 2, then by 3) @@ -140,7 +140,7 @@ specific index module: [[dynamic-index-number-of-replicas]] `index.number_of_replicas`:: - The number of replicas each primary shard has. Defaults to 1. + The number of replicas each primary shard has. Defaults to 1. `index.auto_expand_replicas`:: Auto-expand the number of replicas based on the number of data nodes in the @@ -165,7 +165,7 @@ are ignored for this index. `index.refresh_interval`:: How often to perform a refresh operation, which makes recent changes to the - index visible to search. Defaults to `1s`. Can be set to `-1` to disable + index visible to search. Defaults to `1s`. Can be set to `-1` to disable refresh. If this setting is not explicitly set, shards that haven't seen search traffic for at least `index.search.idle.after` seconds will not receive background refreshes until they receive a search request. Searches that hit an diff --git a/docs/reference/index-modules/allocation/delayed.asciidoc b/docs/reference/index-modules/allocation/delayed.asciidoc index 5dee9444668c..fd199a88376d 100644 --- a/docs/reference/index-modules/allocation/delayed.asciidoc +++ b/docs/reference/index-modules/allocation/delayed.asciidoc @@ -26,7 +26,7 @@ this scenario: * The master rebalances the cluster by allocating shards to Node 5. If the master had just waited for a few minutes, then the missing shards could -have been re-allocated to Node 5 with the minimum of network traffic. This +have been re-allocated to Node 5 with the minimum of network traffic. This process would be even quicker for idle shards (shards not receiving indexing requests) which have been automatically <>. @@ -65,7 +65,7 @@ Also, in case of a master failover situation, elapsed delay time is forgotten ==== Cancellation of shard relocation If delayed allocation times out, the master assigns the missing shards to -another node which will start recovery. If the missing node rejoins the +another node which will start recovery. If the missing node rejoins the cluster, and its shards still have the same sync-id as the primary, shard relocation will be cancelled and the synced shard will be used for recovery instead. diff --git a/docs/reference/index-modules/allocation/prioritization.asciidoc b/docs/reference/index-modules/allocation/prioritization.asciidoc index 5a864b657bad..2de47868b9c2 100644 --- a/docs/reference/index-modules/allocation/prioritization.asciidoc +++ b/docs/reference/index-modules/allocation/prioritization.asciidoc @@ -11,7 +11,7 @@ Indices are sorted into priority order as follows: This means that, by default, newer indices will be recovered before older indices. Use the per-index dynamically updatable `index.priority` setting to customise -the index prioritization order. For instance: +the index prioritization order. For instance: [source,console] ------------------------------ diff --git a/docs/reference/index-modules/allocation/total_shards.asciidoc b/docs/reference/index-modules/allocation/total_shards.asciidoc index 265ef79564d0..1d8c498a3e85 100644 --- a/docs/reference/index-modules/allocation/total_shards.asciidoc +++ b/docs/reference/index-modules/allocation/total_shards.asciidoc @@ -2,7 +2,7 @@ === Total shards per node The cluster-level shard allocator tries to spread the shards of a single index -across as many nodes as possible. However, depending on how many shards and +across as many nodes as possible. However, depending on how many shards and indices you have, and how big they are, it may not always be possible to spread shards evenly. @@ -13,7 +13,7 @@ number of shards from a single index allowed per node: `index.routing.allocation.total_shards_per_node`:: The maximum number of shards (replicas and primaries) that will be - allocated to a single node. Defaults to unbounded. + allocated to a single node. Defaults to unbounded. You can also limit the amount of shards a node can have regardless of the index: diff --git a/docs/reference/index-modules/merge.asciidoc b/docs/reference/index-modules/merge.asciidoc index 3a262b0678e4..b4c785446ca4 100644 --- a/docs/reference/index-modules/merge.asciidoc +++ b/docs/reference/index-modules/merge.asciidoc @@ -15,7 +15,7 @@ resources between merging and other activities like search. === Merge scheduling The merge scheduler (ConcurrentMergeScheduler) controls the execution of merge -operations when they are needed. Merges run in separate threads, and when the +operations when they are needed. Merges run in separate threads, and when the maximum number of threads is reached, further merges will wait until a merge thread becomes available. @@ -26,6 +26,6 @@ The merge scheduler supports the following _dynamic_ setting: The maximum number of threads on a single shard that may be merging at once. Defaults to `Math.max(1, Math.min(4, <> / 2))` which - works well for a good solid-state-disk (SSD). If your index is on spinning + works well for a good solid-state-disk (SSD). If your index is on spinning platter drives instead, decrease this to 1. diff --git a/docs/reference/indices/clone-index.asciidoc b/docs/reference/indices/clone-index.asciidoc index 414c3929d265..5724ee0bafff 100644 --- a/docs/reference/indices/clone-index.asciidoc +++ b/docs/reference/indices/clone-index.asciidoc @@ -142,7 +142,7 @@ can be allocated on that node. Once the primary shard is allocated, it moves to state `initializing`, and the clone process begins. When the clone operation completes, the shard will -become `active`. At that point, {es} will try to allocate any +become `active`. At that point, {es} will try to allocate any replicas and may decide to relocate the primary shard to another node. [[clone-wait-active-shards]] diff --git a/docs/reference/indices/rollover-index.asciidoc b/docs/reference/indices/rollover-index.asciidoc index 0489d034912f..fd4f54826ece 100644 --- a/docs/reference/indices/rollover-index.asciidoc +++ b/docs/reference/indices/rollover-index.asciidoc @@ -388,7 +388,7 @@ POST /my_alias/_rollover/my_new_index_name If the rollover target is an index alias, it can be useful to use <> to name the rollover index according to the date that the index rolled over, e.g. -`logstash-2016.02.03`. The rollover API supports date math, but requires the +`logstash-2016.02.03`. The rollover API supports date math, but requires the index name to end with a dash followed by a number, e.g. `logstash-2016.02.03-1` which is incremented every time the index is rolled over. For instance: @@ -448,7 +448,7 @@ GET _alias ////////////////////////// These indices can then be referenced as described in the -<>. For example, to search +<>. For example, to search over indices created in the last three days, you could do the following: [source,console] diff --git a/docs/reference/indices/shrink-index.asciidoc b/docs/reference/indices/shrink-index.asciidoc index 7a87317a890d..7c343ec74099 100644 --- a/docs/reference/indices/shrink-index.asciidoc +++ b/docs/reference/indices/shrink-index.asciidoc @@ -62,7 +62,7 @@ PUT /my_source_index/_settings the index, are still allowed. -It can take a while to relocate the source index. Progress can be tracked +It can take a while to relocate the source index. Progress can be tracked with the <>, or the <> can be used to wait until all shards have relocated with the `wait_for_no_relocating_shards` parameter. @@ -195,7 +195,7 @@ can't be allocated on the shrink node, its primary shard will remain Once the primary shard is allocated, it moves to state `initializing`, and the shrink process begins. When the shrink operation completes, the shard will -become `active`. At that point, Elasticsearch will try to allocate any +become `active`. At that point, Elasticsearch will try to allocate any replicas and may decide to relocate the primary shard to another node. diff --git a/docs/reference/indices/split-index.asciidoc b/docs/reference/indices/split-index.asciidoc index c59aca57efff..7baffb46d28e 100644 --- a/docs/reference/indices/split-index.asciidoc +++ b/docs/reference/indices/split-index.asciidoc @@ -72,7 +72,7 @@ original shard can be split into) is determined by the specifies the hashing space that is used internally to distribute documents across shards with consistent hashing. For instance, a 5 shard index with `number_of_routing_shards` set to `30` (`5 x 2 x 3`) could be split by a -factor of `2` or `3`. In other words, it could be split as follows: +factor of `2` or `3`. In other words, it could be split as follows: * `5` -> `10` -> `30` (split by 2, then by 3) * `5` -> `15` -> `30` (split by 3, then by 2) @@ -112,7 +112,7 @@ maximum of 640 shards (with a single split action or multiple split actions). If the original index contains one primary shard (or a multi-shard index has been <> down to a single primary shard), then the -index may by split into an arbitrary number of shards greater than 1. The +index may by split into an arbitrary number of shards greater than 1. The properties of the default number of routing shards will then apply to the newly split index. @@ -244,7 +244,7 @@ can be allocated on that node. Once the primary shard is allocated, it moves to state `initializing`, and the split process begins. When the split operation completes, the shard will -become `active`. At that point, Elasticsearch will try to allocate any +become `active`. At that point, Elasticsearch will try to allocate any replicas and may decide to relocate the primary shard to another node. diff --git a/docs/reference/ingest/enrich.asciidoc b/docs/reference/ingest/enrich.asciidoc index 2899c54f3989..e7ba886261e9 100644 --- a/docs/reference/ingest/enrich.asciidoc +++ b/docs/reference/ingest/enrich.asciidoc @@ -208,13 +208,13 @@ using your ingest pipeline. Once created, you can't update or change an enrich policy. Instead, you can: -. Create and <> a new enrich policy. +. Create and <> a new enrich policy. -. Replace the previous enrich policy +. Replace the previous enrich policy with the new enrich policy in any in-use enrich processors. -. Use the <> API +. Use the <> API to delete the previous enrich policy. // end::update-enrich-policy[] diff --git a/docs/reference/ingest/processors/dissect.asciidoc b/docs/reference/ingest/processors/dissect.asciidoc index b7c5fbaf952c..6dff72af1848 100644 --- a/docs/reference/ingest/processors/dissect.asciidoc +++ b/docs/reference/ingest/processors/dissect.asciidoc @@ -6,7 +6,7 @@ Similar to the <>, dissect also extracts structured fields out of a single text field -within a document. However unlike the <>, dissect does not use +within a document. However unlike the <>, dissect does not use {wikipedia}/Regular_expression[Regular Expressions]. This allows dissect's syntax to be simple and for some cases faster than the <>. diff --git a/docs/reference/ingest/processors/sort.asciidoc b/docs/reference/ingest/processors/sort.asciidoc index 81999d2d903b..969a93ebd18f 100644 --- a/docs/reference/ingest/processors/sort.asciidoc +++ b/docs/reference/ingest/processors/sort.asciidoc @@ -4,7 +4,7 @@ Sort ++++ -Sorts the elements of an array ascending or descending. Homogeneous arrays of numbers will be sorted +Sorts the elements of an array ascending or descending. Homogeneous arrays of numbers will be sorted numerically, while arrays of strings or heterogeneous arrays of strings + numbers will be sorted lexicographically. Throws an error when the field is not an array. diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc index 00f3cb9c695b..a86ef841756e 100644 --- a/docs/reference/intro.asciidoc +++ b/docs/reference/intro.asciidoc @@ -95,7 +95,7 @@ metadata, the real power comes from being able to easily access the full suite of search capabilities built on the Apache Lucene search engine library. {es} provides a simple, coherent REST API for managing your cluster and indexing -and searching your data. For testing purposes, you can easily submit requests +and searching your data. For testing purposes, you can easily submit requests directly from the command line or through the Developer Console in {kib}. From your applications, you can use the https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] diff --git a/docs/reference/licensing/delete-license.asciidoc b/docs/reference/licensing/delete-license.asciidoc index 97b12ece9ae5..8411230c66a0 100644 --- a/docs/reference/licensing/delete-license.asciidoc +++ b/docs/reference/licensing/delete-license.asciidoc @@ -16,7 +16,7 @@ This API enables you to delete licensing information. [discrete] ==== Description -When your license expires, {xpack} operates in a degraded mode. For more +When your license expires, {xpack} operates in a degraded mode. For more information, see {kibana-ref}/managing-licenses.html#license-expiration[License expiration]. diff --git a/docs/reference/licensing/start-basic.asciidoc b/docs/reference/licensing/start-basic.asciidoc index 199e917a2921..e921db354987 100644 --- a/docs/reference/licensing/start-basic.asciidoc +++ b/docs/reference/licensing/start-basic.asciidoc @@ -19,7 +19,7 @@ This API starts an indefinite basic license. The `start basic` API enables you to initiate an indefinite basic license, which gives access to all the basic features. If the basic license does not support all of the features that are available with your current license, however, you are -notified in the response. You must then re-submit the API request with the +notified in the response. You must then re-submit the API request with the `acknowledge` parameter set to `true`. To check the status of your basic license, use the following API: diff --git a/docs/reference/mapping/dynamic/field-mapping.asciidoc b/docs/reference/mapping/dynamic/field-mapping.asciidoc index 309a231a60af..c3687c9d7e2c 100644 --- a/docs/reference/mapping/dynamic/field-mapping.asciidoc +++ b/docs/reference/mapping/dynamic/field-mapping.asciidoc @@ -49,7 +49,7 @@ fields, use <>. If `date_detection` is enabled (default), then new string fields are checked to see whether their contents match any of the date patterns specified in -`dynamic_date_formats`. If a match is found, a new <> field is +`dynamic_date_formats`. If a match is found, a new <> field is added with the corresponding format. The default value for `dynamic_date_formats` is: diff --git a/docs/reference/mapping/dynamic/templates.asciidoc b/docs/reference/mapping/dynamic/templates.asciidoc index 80760e2a85cb..48c75ef6ae4f 100644 --- a/docs/reference/mapping/dynamic/templates.asciidoc +++ b/docs/reference/mapping/dynamic/templates.asciidoc @@ -30,7 +30,7 @@ Dynamic templates are specified as an array of named objects: "dynamic_templates": [ { "my_template_name": { <1> - ... match conditions ... <2> + ... match conditions ... <2> "mapping": { ... } <3> } }, @@ -287,7 +287,7 @@ PUT my-index-000001/_doc/2 ==== Template variables The `{name}` and `{dynamic_type}` placeholders are replaced in the `mapping` -with the field name and detected dynamic type. The following example sets all +with the field name and detected dynamic type. The following example sets all string fields to use an <> with the same name as the field, and disables <> for all non-string fields: diff --git a/docs/reference/mapping/fields.asciidoc b/docs/reference/mapping/fields.asciidoc index d2625c506f56..8f602f0e2ca7 100644 --- a/docs/reference/mapping/fields.asciidoc +++ b/docs/reference/mapping/fields.asciidoc @@ -2,7 +2,7 @@ == Metadata fields Each document has metadata associated with it, such as the `_index` -and `_id` metadata fields. The behavior of some of these metadata +and `_id` metadata fields. The behavior of some of these metadata fields can be customized when a mapping is created. [discrete] diff --git a/docs/reference/mapping/fields/field-names-field.asciidoc b/docs/reference/mapping/fields/field-names-field.asciidoc index 999d3f7049df..fe3fd55c279b 100644 --- a/docs/reference/mapping/fields/field-names-field.asciidoc +++ b/docs/reference/mapping/fields/field-names-field.asciidoc @@ -2,7 +2,7 @@ === `_field_names` field The `_field_names` field used to index the names of every field in a document that -contains any value other than `null`. This field was used by the +contains any value other than `null`. This field was used by the <> query to find documents that either have or don't have any non-+null+ value for a particular field. diff --git a/docs/reference/mapping/fields/index-field.asciidoc b/docs/reference/mapping/fields/index-field.asciidoc index 87e55e992c29..89f004915a19 100644 --- a/docs/reference/mapping/fields/index-field.asciidoc +++ b/docs/reference/mapping/fields/index-field.asciidoc @@ -58,7 +58,7 @@ GET index_1,index_2/_search <4> Accessing the `_index` field in scripts The `_index` field is exposed virtually -- it is not added to the Lucene index -as a real field. This means that you can use the `_index` field in a `term` or +as a real field. This means that you can use the `_index` field in a `term` or `terms` query (or any query that is rewritten to a `term` query, such as the `match`, `query_string` or `simple_query_string` query), as well as `prefix` and `wildcard` queries. However, it does not support `regexp` and `fuzzy` diff --git a/docs/reference/mapping/fields/routing-field.asciidoc b/docs/reference/mapping/fields/routing-field.asciidoc index 0da9f2469e70..a53d460e4f36 100644 --- a/docs/reference/mapping/fields/routing-field.asciidoc +++ b/docs/reference/mapping/fields/routing-field.asciidoc @@ -9,7 +9,7 @@ formula: The default value used for `_routing` is the document's <>. Custom routing patterns can be implemented by specifying a custom `routing` -value per document. For instance: +value per document. For instance: [source,console] ------------------------------ @@ -48,7 +48,7 @@ appropriate backing index for the stream. ==== Searching with custom routing -Custom routing can reduce the impact of searches. Instead of having to fan +Custom routing can reduce the impact of searches. Instead of having to fan out a search request to all the shards in an index, the request can be sent to just the shard that matches the specific routing value (or values): @@ -74,7 +74,7 @@ whenever <>, <>, <>, or <> a document. Forgetting the routing value can lead to a document being indexed on more than -one shard. As a safeguard, the `_routing` field can be configured to make a +one shard. As a safeguard, the `_routing` field can be configured to make a custom `routing` value required for all CRUD operations: [source,console] diff --git a/docs/reference/mapping/fields/source-field.asciidoc b/docs/reference/mapping/fields/source-field.asciidoc index 43b1fc3b1e47..0720a7758b04 100644 --- a/docs/reference/mapping/fields/source-field.asciidoc +++ b/docs/reference/mapping/fields/source-field.asciidoc @@ -2,7 +2,7 @@ === `_source` field The `_source` field contains the original JSON document body that was passed -at index time. The `_source` field itself is not indexed (and thus is not +at index time. The `_source` field itself is not indexed (and thus is not searchable), but it is stored so that it can be returned when executing _fetch_ requests, like <> or <>. @@ -29,7 +29,7 @@ PUT my-index-000001 ================================================== Users often disable the `_source` field without thinking about the -consequences, and then live to regret it. If the `_source` field isn't +consequences, and then live to regret it. If the `_source` field isn't available then a number of features are not supported: * The <>, <>, diff --git a/docs/reference/mapping/params/coerce.asciidoc b/docs/reference/mapping/params/coerce.asciidoc index bd048a288018..fde4163238ee 100644 --- a/docs/reference/mapping/params/coerce.asciidoc +++ b/docs/reference/mapping/params/coerce.asciidoc @@ -1,9 +1,9 @@ [[coerce]] === `coerce` -Data is not always clean. Depending on how it is produced a number might be +Data is not always clean. Depending on how it is produced a number might be rendered in the JSON body as a true JSON number, e.g. `5`, but it might also -be rendered as a string, e.g. `"5"`. Alternatively, a number that should be +be rendered as a string, e.g. `"5"`. Alternatively, a number that should be an integer might instead be rendered as a floating point, e.g. `5.0`, or even `"5.0"`. diff --git a/docs/reference/mapping/params/doc-values.asciidoc b/docs/reference/mapping/params/doc-values.asciidoc index e065537daa47..000a97a73ef5 100644 --- a/docs/reference/mapping/params/doc-values.asciidoc +++ b/docs/reference/mapping/params/doc-values.asciidoc @@ -7,7 +7,7 @@ unique sorted list of terms, and from that immediately have access to the list of documents that contain the term. Sorting, aggregations, and access to field values in scripts requires a -different data access pattern. Instead of looking up the term and finding +different data access pattern. Instead of looking up the term and finding documents, we need to be able to look up the document and find the terms that it has in a field. diff --git a/docs/reference/mapping/params/dynamic.asciidoc b/docs/reference/mapping/params/dynamic.asciidoc index 1bb845885c43..7e7d055d72d2 100644 --- a/docs/reference/mapping/params/dynamic.asciidoc +++ b/docs/reference/mapping/params/dynamic.asciidoc @@ -89,4 +89,4 @@ accepts the following parameters: or searchable, but will still appear in the `_source` field of returned hits. These fields will not be added to the mapping, and new fields must be added explicitly. `strict`:: If new fields are detected, an exception is thrown and the document - is rejected. New fields must be explicitly added to the mapping. + is rejected. New fields must be explicitly added to the mapping. diff --git a/docs/reference/mapping/params/enabled.asciidoc b/docs/reference/mapping/params/enabled.asciidoc index cd8555d952a0..3c4407b64e12 100644 --- a/docs/reference/mapping/params/enabled.asciidoc +++ b/docs/reference/mapping/params/enabled.asciidoc @@ -2,14 +2,14 @@ === `enabled` Elasticsearch tries to index all of the fields you give it, but sometimes you -want to just store the field without indexing it. For instance, imagine that -you are using Elasticsearch as a web session store. You may want to index the +want to just store the field without indexing it. For instance, imagine that +you are using Elasticsearch as a web session store. You may want to index the session ID and last update time, but you don't need to query or run aggregations on the session data itself. The `enabled` setting, which can be applied only to the top-level mapping definition and to <> fields, causes Elasticsearch to skip -parsing of the contents of the field entirely. The JSON can still be retrieved +parsing of the contents of the field entirely. The JSON can still be retrieved from the <> field, but it is not searchable or stored in any other way: diff --git a/docs/reference/mapping/params/format.asciidoc b/docs/reference/mapping/params/format.asciidoc index df66a3b64f9c..9d3468e38a5a 100644 --- a/docs/reference/mapping/params/format.asciidoc +++ b/docs/reference/mapping/params/format.asciidoc @@ -31,7 +31,7 @@ down to the nearest day. [[custom-date-formats]] ==== Custom date formats -Completely customizable date formats are supported. The syntax for these is explained +Completely customizable date formats are supported. The syntax for these is explained https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html[DateTimeFormatter docs]. [[built-in-date-formats]] diff --git a/docs/reference/mapping/params/ignore-malformed.asciidoc b/docs/reference/mapping/params/ignore-malformed.asciidoc index 1db2a8cde897..e103be1c98db 100644 --- a/docs/reference/mapping/params/ignore-malformed.asciidoc +++ b/docs/reference/mapping/params/ignore-malformed.asciidoc @@ -1,12 +1,12 @@ [[ignore-malformed]] === `ignore_malformed` -Sometimes you don't have much control over the data that you receive. One +Sometimes you don't have much control over the data that you receive. One user may send a `login` field that is a <>, and another sends a `login` field that is an email address. Trying to index the wrong data type into a field throws an exception by -default, and rejects the whole document. The `ignore_malformed` parameter, if +default, and rejects the whole document. The `ignore_malformed` parameter, if set to `true`, allows the exception to be ignored. The malformed field is not indexed, but other fields in the document are processed normally. diff --git a/docs/reference/mapping/params/index-phrases.asciidoc b/docs/reference/mapping/params/index-phrases.asciidoc index 1b169a33dcc2..5bf6b4212812 100644 --- a/docs/reference/mapping/params/index-phrases.asciidoc +++ b/docs/reference/mapping/params/index-phrases.asciidoc @@ -2,7 +2,7 @@ === `index_phrases` If enabled, two-term word combinations ('shingles') are indexed into a separate -field. This allows exact phrase queries (no slop) to run more efficiently, at the expense -of a larger index. Note that this works best when stopwords are not removed, +field. This allows exact phrase queries (no slop) to run more efficiently, at the expense +of a larger index. Note that this works best when stopwords are not removed, as phrases containing stopwords will not use the subsidiary field and will fall -back to a standard phrase query. Accepts `true` or `false` (default). \ No newline at end of file +back to a standard phrase query. Accepts `true` or `false` (default). \ No newline at end of file diff --git a/docs/reference/mapping/params/index-prefixes.asciidoc b/docs/reference/mapping/params/index-prefixes.asciidoc index 1184245ca154..a143c5531c81 100644 --- a/docs/reference/mapping/params/index-prefixes.asciidoc +++ b/docs/reference/mapping/params/index-prefixes.asciidoc @@ -2,17 +2,17 @@ === `index_prefixes` The `index_prefixes` parameter enables the indexing of term prefixes to speed -up prefix searches. It accepts the following optional settings: +up prefix searches. It accepts the following optional settings: [horizontal] `min_chars`:: - The minimum prefix length to index. Must be greater than 0, and defaults - to 2. The value is inclusive. + The minimum prefix length to index. Must be greater than 0, and defaults + to 2. The value is inclusive. `max_chars`:: - The maximum prefix length to index. Must be less than 20, and defaults to 5. + The maximum prefix length to index. Must be less than 20, and defaults to 5. The value is inclusive. This example creates a text field using the default prefix length settings: diff --git a/docs/reference/mapping/params/meta.asciidoc b/docs/reference/mapping/params/meta.asciidoc index 16f4394ba57f..b78a61f93e51 100644 --- a/docs/reference/mapping/params/meta.asciidoc +++ b/docs/reference/mapping/params/meta.asciidoc @@ -43,7 +43,7 @@ unit:: metric_type:: - The metric type of a numeric field: `"gauge"` or `"counter"`. A gauge is a + The metric type of a numeric field: `"gauge"` or `"counter"`. A gauge is a single-value measurement that can go up or down over time, such as a temperature. A counter is a single-value cumulative counter that only goes up, such as the number of requests processed by a web server. By default, diff --git a/docs/reference/mapping/params/multi-fields.asciidoc b/docs/reference/mapping/params/multi-fields.asciidoc index 3045103d82d5..e18ee3b1823c 100644 --- a/docs/reference/mapping/params/multi-fields.asciidoc +++ b/docs/reference/mapping/params/multi-fields.asciidoc @@ -2,7 +2,7 @@ === `fields` It is often useful to index the same field in different ways for different -purposes. This is the purpose of _multi-fields_. For instance, a `string` +purposes. This is the purpose of _multi-fields_. For instance, a `string` field could be mapped as a `text` field for full-text search, and as a `keyword` field for sorting or aggregations: @@ -117,12 +117,12 @@ GET my-index-000001/_search <4> Query both the `text` and `text.english` fields and combine the scores. The `text` field contains the term `fox` in the first document and `foxes` in -the second document. The `text.english` field contains `fox` for both +the second document. The `text.english` field contains `fox` for both documents, because `foxes` is stemmed to `fox`. The query string is also analyzed by the `standard` analyzer for the `text` -field, and by the `english` analyzer for the `text.english` field. The +field, and by the `english` analyzer for the `text.english` field. The stemmed field allows a query for `foxes` to also match the document containing -just `fox`. This allows us to match as many documents as possible. By also +just `fox`. This allows us to match as many documents as possible. By also querying the unstemmed `text` field, we improve the relevance score of the document which matches `foxes` exactly. diff --git a/docs/reference/mapping/params/norms.asciidoc b/docs/reference/mapping/params/norms.asciidoc index ce1097e29c9d..be60daebfbe8 100644 --- a/docs/reference/mapping/params/norms.asciidoc +++ b/docs/reference/mapping/params/norms.asciidoc @@ -8,7 +8,7 @@ Although useful for scoring, norms also require quite a lot of disk (typically in the order of one byte per document per field in your index, even for documents that don't have this specific field). As a consequence, if you don't need scoring on a specific field, you should disable norms on that -field. In particular, this is the case for fields that are used solely for +field. In particular, this is the case for fields that are used solely for filtering or aggregations. TIP: Norms can be disabled on existing fields using diff --git a/docs/reference/mapping/params/null-value.asciidoc b/docs/reference/mapping/params/null-value.asciidoc index f1737ed5ace2..d1ae7afc4ab6 100644 --- a/docs/reference/mapping/params/null-value.asciidoc +++ b/docs/reference/mapping/params/null-value.asciidoc @@ -1,12 +1,12 @@ [[null-value]] === `null_value` -A `null` value cannot be indexed or searched. When a field is set to `null`, +A `null` value cannot be indexed or searched. When a field is set to `null`, (or an empty array or an array of `null` values) it is treated as though that field has no values. The `null_value` parameter allows you to replace explicit `null` values with -the specified value so that it can be indexed and searched. For instance: +the specified value so that it can be indexed and searched. For instance: [source,console] -------------------------------------------------- @@ -46,7 +46,7 @@ GET my-index-000001/_search <2> An empty array does not contain an explicit `null`, and so won't be replaced with the `null_value`. <3> A query for `NULL` returns document 1, but not document 2. -IMPORTANT: The `null_value` needs to be the same data type as the field. For +IMPORTANT: The `null_value` needs to be the same data type as the field. For instance, a `long` field cannot have a string `null_value`. NOTE: The `null_value` only influences how data is indexed, it doesn't modify diff --git a/docs/reference/mapping/params/position-increment-gap.asciidoc b/docs/reference/mapping/params/position-increment-gap.asciidoc index 39b7b87cb3b3..79b7616a7c6b 100644 --- a/docs/reference/mapping/params/position-increment-gap.asciidoc +++ b/docs/reference/mapping/params/position-increment-gap.asciidoc @@ -47,7 +47,7 @@ GET my-index-000001/_search are in separate strings, because `slop` > `position_increment_gap`. -The `position_increment_gap` can be specified in the mapping. For instance: +The `position_increment_gap` can be specified in the mapping. For instance: [source,console] -------------------------------------------------- diff --git a/docs/reference/mapping/params/properties.asciidoc b/docs/reference/mapping/params/properties.asciidoc index 399e79d81306..106c3bb9c977 100644 --- a/docs/reference/mapping/params/properties.asciidoc +++ b/docs/reference/mapping/params/properties.asciidoc @@ -3,7 +3,7 @@ Type mappings, <> and <> contain sub-fields, called `properties`. These properties may be of any -<>, including `object` and `nested`. Properties can +<>, including `object` and `nested`. Properties can be added: * explicitly by defining them when <>. @@ -62,7 +62,7 @@ PUT my-index-000001/_doc/1 <4> <4> An example document which corresponds to the above mapping. TIP: The `properties` setting is allowed to have different settings for fields -of the same name in the same index. New properties can be added to existing +of the same name in the same index. New properties can be added to existing fields using the <>. ==== Dot notation diff --git a/docs/reference/mapping/params/store.asciidoc b/docs/reference/mapping/params/store.asciidoc index c66c19812d6b..a8e274a25e6b 100644 --- a/docs/reference/mapping/params/store.asciidoc +++ b/docs/reference/mapping/params/store.asciidoc @@ -2,16 +2,16 @@ === `store` By default, field values are <> to make them searchable, -but they are not _stored_. This means that the field can be queried, but the +but they are not _stored_. This means that the field can be queried, but the original field value cannot be retrieved. -Usually this doesn't matter. The field value is already part of the +Usually this doesn't matter. The field value is already part of the <>, which is stored by default. If you only want to retrieve the value of a single field or of a few fields, instead of the whole `_source`, then this can be achieved with <>. -In certain situations it can make sense to `store` a field. For instance, if +In certain situations it can make sense to `store` a field. For instance, if you have a document with a `title`, a `date`, and a very large `content` field, you may want to retrieve just the `title` and the `date` without having to extract those fields from a large `_source` field: diff --git a/docs/reference/mapping/removal_of_types.asciidoc b/docs/reference/mapping/removal_of_types.asciidoc index acb521bd7990..a931f8d0c1f7 100644 --- a/docs/reference/mapping/removal_of_types.asciidoc +++ b/docs/reference/mapping/removal_of_types.asciidoc @@ -1,6 +1,6 @@ [[removal-of-types]] == Removal of mapping types -Elasticsearch 8.0.0 no longer supports mapping types. For details on how to +Elasticsearch 8.0.0 no longer supports mapping types. For details on how to migrate your clusters away from mapping types, see the {ref-7x}/removal-of-types.html[removal of types] documentation for the 7.x release. diff --git a/docs/reference/mapping/types.asciidoc b/docs/reference/mapping/types.asciidoc index a12b932776e7..106315963b6b 100644 --- a/docs/reference/mapping/types.asciidoc +++ b/docs/reference/mapping/types.asciidoc @@ -119,7 +119,7 @@ same field type. See <>. It is often useful to index the same field in different ways for different purposes. For instance, a `string` field could be mapped as a `text` field for full-text search, and as a `keyword` field for -sorting or aggregations. Alternatively, you could index a text field with +sorting or aggregations. Alternatively, you could index a text field with the <>, the <> analyzer, and the <>. diff --git a/docs/reference/mapping/types/alias.asciidoc b/docs/reference/mapping/types/alias.asciidoc index 78d9b7497213..203d2929964d 100644 --- a/docs/reference/mapping/types/alias.asciidoc +++ b/docs/reference/mapping/types/alias.asciidoc @@ -4,7 +4,7 @@ Alias ++++ -An `alias` mapping defines an alternate name for a field in the index. +An `alias` mapping defines an alternate name for a field in the index. The alias can be used in place of the target field in <> requests, and selected other APIs like <>. diff --git a/docs/reference/mapping/types/array.asciidoc b/docs/reference/mapping/types/array.asciidoc index 38eabb14c195..d1eb867f41f9 100644 --- a/docs/reference/mapping/types/array.asciidoc +++ b/docs/reference/mapping/types/array.asciidoc @@ -1,7 +1,7 @@ [[array]] === Arrays -In Elasticsearch, there is no dedicated `array` data type. Any field can contain +In Elasticsearch, there is no dedicated `array` data type. Any field can contain zero or more values by default, however, all values in the array must be of the same data type. For instance: @@ -15,7 +15,7 @@ same data type. For instance: ==================================================== Arrays of objects do not work as you would expect: you cannot query each -object independently of the other objects in the array. If you need to be +object independently of the other objects in the array. If you need to be able to do this then you should use the <> data type instead of the <> data type. @@ -24,14 +24,14 @@ This is explained in more detail in <>. When adding a field dynamically, the first value in the array determines the -field `type`. All subsequent values must be of the same data type or it must +field `type`. All subsequent values must be of the same data type or it must at least be possible to <> subsequent values to the same data type. Arrays with a mixture of data types are _not_ supported: [ `10`, `"some string"` ] An array may contain `null` values, which are either replaced by the -configured <> or skipped entirely. An empty array +configured <> or skipped entirely. An empty array `[]` is treated as a missing field -- a field with no values. Nothing needs to be pre-configured in order to use arrays in documents, they @@ -86,13 +86,13 @@ GET my-index-000001/_search **************************************************** The fact that all field types support multi-value fields out of the box is a -consequence of the origins of Lucene. Lucene was designed to be a full text -search engine. In order to be able to search for individual words within a +consequence of the origins of Lucene. Lucene was designed to be a full text +search engine. In order to be able to search for individual words within a big block of text, Lucene tokenizes the text into individual terms, and adds each term to the inverted index separately. This means that even a simple text field must be able to support multiple -values by default. When other data types were added, such as numbers and +values by default. When other data types were added, such as numbers and dates, they used the same data structure as strings, and so got multi-values for free. diff --git a/docs/reference/mapping/types/boolean.asciidoc b/docs/reference/mapping/types/boolean.asciidoc index 1e9e0608ed53..7d3ab30c012e 100644 --- a/docs/reference/mapping/types/boolean.asciidoc +++ b/docs/reference/mapping/types/boolean.asciidoc @@ -51,7 +51,7 @@ GET my-index-000001/_search Aggregations like the <> use `1` and `0` for the `key`, and the strings `"true"` and -`"false"` for the `key_as_string`. Boolean fields when used in scripts, +`"false"` for the `key_as_string`. Boolean fields when used in scripts, return `1` and `0`: [source,console] @@ -106,7 +106,7 @@ The following parameters are accepted by `boolean` fields: <>:: Accepts any of the true or false values listed above. The value is - substituted for any explicit `null` values. Defaults to `null`, which + substituted for any explicit `null` values. Defaults to `null`, which means the field is treated as missing. <>:: diff --git a/docs/reference/mapping/types/date.asciidoc b/docs/reference/mapping/types/date.asciidoc index 4c134326934a..a52d66965d1a 100644 --- a/docs/reference/mapping/types/date.asciidoc +++ b/docs/reference/mapping/types/date.asciidoc @@ -72,7 +72,7 @@ GET my-index-000001/_search ==== Multiple date formats Multiple formats can be specified by separating them with `||` as a separator. -Each format will be tried in turn until a matching format is found. The first +Each format will be tried in turn until a matching format is found. The first format will be used to convert the _milliseconds-since-the-epoch_ value back into a string. @@ -106,7 +106,7 @@ The following parameters are accepted by `date` fields: <>:: - The date format(s) that can be parsed. Defaults to + The date format(s) that can be parsed. Defaults to `strict_date_optional_time||epoch_millis`. `locale`:: @@ -127,7 +127,7 @@ The following parameters are accepted by `date` fields: <>:: Accepts a date value in one of the configured +format+'s as the field - which is substituted for any explicit `null` values. Defaults to `null`, + which is substituted for any explicit `null` values. Defaults to `null`, which means the field is treated as missing. <>:: diff --git a/docs/reference/mapping/types/geo-shape.asciidoc b/docs/reference/mapping/types/geo-shape.asciidoc index d90ae2a2a736..6f829b802f16 100644 --- a/docs/reference/mapping/types/geo-shape.asciidoc +++ b/docs/reference/mapping/types/geo-shape.asciidoc @@ -189,8 +189,8 @@ number of levels for the quad trees in Elasticsearch is 29; the default is 21. [discrete] ===== Spatial strategies deprecated[6.6, PrefixTrees no longer used] The indexing implementation -selected relies on a SpatialStrategy for choosing how to decompose the shapes -(either as grid squares or a tessellated triangular mesh). Each strategy +selected relies on a SpatialStrategy for choosing how to decompose the shapes +(either as grid squares or a tessellated triangular mesh). Each strategy answers the following: * What type of Shapes can be indexed? @@ -342,7 +342,7 @@ POST /example/_doc A `linestring` defined by an array of two or more positions. By specifying only two points, the `linestring` will represent a straight -line. Specifying more than two points creates an arbitrary path. The +line. Specifying more than two points creates an arbitrary path. The following is an example of a LineString in GeoJSON. [source,console] @@ -443,7 +443,7 @@ but for polygons that do cross the dateline (or for other polygons wider than GeoJSON specifications. Otherwise, an unintended polygon may be created and unexpected query/filter results will be returned. -The following provides an example of an ambiguous polygon. Elasticsearch will +The following provides an example of an ambiguous polygon. Elasticsearch will apply the GeoJSON standard to eliminate ambiguity resulting in a polygon that crosses the dateline. @@ -463,7 +463,7 @@ POST /example/_doc // TEST[catch:/mapper_parsing_exception/] An `orientation` parameter can be defined when setting the geo_shape mapping (see <>). This will define vertex -order for the coordinate list on the mapped geo_shape field. It can also be overridden on each document. The following is an example for +order for the coordinate list on the mapped geo_shape field. It can also be overridden on each document. The following is an example for overriding the orientation on a document: [source,console] diff --git a/docs/reference/mapping/types/histogram.asciidoc b/docs/reference/mapping/types/histogram.asciidoc index e47ea875e1ac..3fd0f604bbdb 100644 --- a/docs/reference/mapping/types/histogram.asciidoc +++ b/docs/reference/mapping/types/histogram.asciidoc @@ -6,7 +6,7 @@ Histogram ++++ -A field to store pre-aggregated numerical data representing a histogram. +A field to store pre-aggregated numerical data representing a histogram. This data is defined using two paired arrays: * A `values` array of <> numbers, representing the buckets for diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index 53de733d88c4..d34b60c56a94 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -76,7 +76,7 @@ The following parameters are accepted by `keyword` fields: <>:: - Do not index any string longer than this value. Defaults to `2147483647` + Do not index any string longer than this value. Defaults to `2147483647` so that all values would be accepted. Please however note that default dynamic mapping rules create a sub `keyword` field that overrides this default by setting `ignore_above: 256`. @@ -99,7 +99,7 @@ The following parameters are accepted by `keyword` fields: <>:: Accepts a string value which is substituted for any explicit `null` - values. Defaults to `null`, which means the field is treated as missing. + values. Defaults to `null`, which means the field is treated as missing. <>:: diff --git a/docs/reference/mapping/types/nested.asciidoc b/docs/reference/mapping/types/nested.asciidoc index 0aec51d2b6d3..8611205cb749 100644 --- a/docs/reference/mapping/types/nested.asciidoc +++ b/docs/reference/mapping/types/nested.asciidoc @@ -51,7 +51,7 @@ The previous document would be transformed internally into a document that looks // NOTCONSOLE The `user.first` and `user.last` fields are flattened into multi-value fields, -and the association between `alice` and `white` is lost. This document would +and the association between `alice` and `white` is lost. This document would incorrectly match a query for `alice AND smith`: [source,console] @@ -172,13 +172,13 @@ Nested documents can be: ============================================= Because nested documents are indexed as separate documents, they can only be -accessed within the scope of the `nested` query, the +accessed within the scope of the `nested` query, the `nested`/`reverse_nested` aggregations, or <>. For instance, if a string field within a nested document has <> set to `offsets` to allow use of the postings during the highlighting, these offsets will not be available during the main highlighting -phase. Instead, highlighting needs to be performed via +phase. Instead, highlighting needs to be performed via <>. The same consideration applies when loading fields during a search through <> or <>. @@ -193,7 +193,7 @@ The following parameters are accepted by `nested` fields: <>:: (Optional, string) Whether or not new `properties` should be added dynamically to an existing -nested object. Accepts `true` (default), `false` and `strict`. +nested object. Accepts `true` (default), `false` and `strict`. <>:: (Optional, object) diff --git a/docs/reference/mapping/types/numeric.asciidoc b/docs/reference/mapping/types/numeric.asciidoc index b0377eeec9a1..8bb0ed66e505 100644 --- a/docs/reference/mapping/types/numeric.asciidoc +++ b/docs/reference/mapping/types/numeric.asciidoc @@ -138,25 +138,25 @@ The following parameters are accepted by numeric types: <>:: Accepts a numeric value of the same `type` as the field which is - substituted for any explicit `null` values. Defaults to `null`, which - means the field is treated as missing. Note that this cannot be set + substituted for any explicit `null` values. Defaults to `null`, which + means the field is treated as missing. Note that this cannot be set if the `script` parameter is used. `on_script_error`:: Defines what to do if the script defined by the `script` parameter - throws an error at indexing time. Accepts `reject` (default), which + throws an error at indexing time. Accepts `reject` (default), which will cause the entire document to be rejected, and `ignore`, which will register the field in the document's <> metadata field and continue - indexing. This parameter can only be set if the `script` field is + indexing. This parameter can only be set if the `script` field is also set. `script`:: If this parameter is set, then the field will index values generated by this script, rather than reading the values directly from the - source. Scripts are in the same format as their + source. Scripts are in the same format as their <>. Scripts can only be configured on `long` and `double` field types. diff --git a/docs/reference/mapping/types/object.asciidoc b/docs/reference/mapping/types/object.asciidoc index 0a68bb3b2e7f..7aa276393af8 100644 --- a/docs/reference/mapping/types/object.asciidoc +++ b/docs/reference/mapping/types/object.asciidoc @@ -82,7 +82,7 @@ The following parameters are accepted by `object` fields: <>:: Whether or not new `properties` should be added dynamically - to an existing object. Accepts `true` (default), `false` + to an existing object. Accepts `true` (default), `false` and `strict`. <>:: diff --git a/docs/reference/mapping/types/percolator.asciidoc b/docs/reference/mapping/types/percolator.asciidoc index ce3d3d93ff5d..f3a6e5bd4f86 100644 --- a/docs/reference/mapping/types/percolator.asciidoc +++ b/docs/reference/mapping/types/percolator.asciidoc @@ -414,7 +414,7 @@ This results in a response like this: [discrete] ==== Optimizing wildcard queries. -Wildcard queries are more expensive than other queries for the percolator, +Wildcard queries are more expensive than other queries for the percolator, especially if the wildcard expressions are large. In the case of `wildcard` queries with prefix wildcard expressions or just the `prefix` query, diff --git a/docs/reference/mapping/types/shape.asciidoc b/docs/reference/mapping/types/shape.asciidoc index 9599da3325a5..a2236ffc8b38 100644 --- a/docs/reference/mapping/types/shape.asciidoc +++ b/docs/reference/mapping/types/shape.asciidoc @@ -28,7 +28,7 @@ fields to the shape type. |Option |Description| Default |`orientation` |Optionally define how to interpret vertex order for -polygons / multipolygons. This parameter defines one of two coordinate +polygons / multipolygons. This parameter defines one of two coordinate system rules (Right-hand or Left-hand) each of which can be specified in three different ways. 1. Right-hand rule: `right`, `ccw`, `counterclockwise`, 2. Left-hand rule: `left`, `cw`, `clockwise`. The default orientation @@ -164,7 +164,7 @@ POST /example/_doc A `linestring` defined by an array of two or more positions. By specifying only two points, the `linestring` will represent a straight -line. Specifying more than two points creates an arbitrary path. The +line. Specifying more than two points creates an arbitrary path. The following is an example of a LineString in GeoJSON. [source,console] diff --git a/docs/reference/mapping/types/text.asciidoc b/docs/reference/mapping/types/text.asciidoc index 9276468f0887..499752a8a6e9 100644 --- a/docs/reference/mapping/types/text.asciidoc +++ b/docs/reference/mapping/types/text.asciidoc @@ -8,7 +8,7 @@ A field to index full-text values, such as the body of an email or the description of a product. These fields are `analyzed`, that is they are passed through an <> to convert the string into a list of individual terms before being indexed. The analysis process allows Elasticsearch to search for -individual words _within_ each full text field. Text fields are not +individual words _within_ each full text field. Text fields are not used for sorting and seldom used for aggregations (although the <> is a notable exception). @@ -93,16 +93,16 @@ The following parameters are accepted by `text` fields: <>:: If enabled, term prefixes of between 2 and 5 characters are indexed into a - separate field. This allows prefix searches to run more efficiently, at + separate field. This allows prefix searches to run more efficiently, at the expense of a larger index. <>:: If enabled, two-term word combinations ('shingles') are indexed into a separate - field. This allows exact phrase queries (no slop) to run more efficiently, at the expense - of a larger index. Note that this works best when stopwords are not removed, + field. This allows exact phrase queries (no slop) to run more efficiently, at the expense + of a larger index. Note that this works best when stopwords are not removed, as phrases containing stopwords will not use the subsidiary field and will fall - back to a standard phrase query. Accepts `true` or `false` (default). + back to a standard phrase query. Accepts `true` or `false` (default). <>:: @@ -153,7 +153,7 @@ The following parameters are accepted by `text` fields: aggregations, sorting, or scripting. If you try to sort, aggregate, or access values from a script on a `text` field, you will see this exception: -Fielddata is disabled on text fields by default. Set `fielddata=true` on +Fielddata is disabled on text fields by default. Set `fielddata=true` on `your_field_name` in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. diff --git a/docs/reference/mapping/types/token-count.asciidoc b/docs/reference/mapping/types/token-count.asciidoc index 65aa19b5525b..23bbc775243a 100644 --- a/docs/reference/mapping/types/token-count.asciidoc +++ b/docs/reference/mapping/types/token-count.asciidoc @@ -83,7 +83,7 @@ Defaults to `true`. <>:: Accepts a numeric value of the same `type` as the field which is - substituted for any explicit `null` values. Defaults to `null`, which + substituted for any explicit `null` values. Defaults to `null`, which means the field is treated as missing. <>:: diff --git a/docs/reference/mapping/types/wildcard.asciidoc b/docs/reference/mapping/types/wildcard.asciidoc index ecd1ff851df2..a0bb12b10fd7 100644 --- a/docs/reference/mapping/types/wildcard.asciidoc +++ b/docs/reference/mapping/types/wildcard.asciidoc @@ -119,11 +119,11 @@ The following parameters are accepted by `wildcard` fields: <>:: Accepts a string value which is substituted for any explicit `null` - values. Defaults to `null`, which means the field is treated as missing. + values. Defaults to `null`, which means the field is treated as missing. <>:: - Do not index any string longer than this value. Defaults to `2147483647` + Do not index any string longer than this value. Defaults to `2147483647` so that all values would be accepted. [discrete] diff --git a/docs/reference/migration/migrate_8_0.asciidoc b/docs/reference/migration/migrate_8_0.asciidoc index a00dc9cb37fa..6b83f3fa3473 100644 --- a/docs/reference/migration/migrate_8_0.asciidoc +++ b/docs/reference/migration/migrate_8_0.asciidoc @@ -60,7 +60,7 @@ enable <>. [%collapsible] ==== *Details* + -Elasticsearch 8.0 can read indices created in version 7.0 or above. An +Elasticsearch 8.0 can read indices created in version 7.0 or above. An Elasticsearch 8.0 node will not start in the presence of indices created in a version of Elasticsearch before 7.0. diff --git a/docs/reference/migration/migrate_8_0/mappings.asciidoc b/docs/reference/migration/migrate_8_0/mappings.asciidoc index 4195b1888aeb..adb239a1b38f 100644 --- a/docs/reference/migration/migrate_8_0/mappings.asciidoc +++ b/docs/reference/migration/migrate_8_0/mappings.asciidoc @@ -69,13 +69,13 @@ Disabling _field_names is not necessary because it no longer carries a large ind ==== *Details* + Index-time boosts have been deprecated since the 5x line, but it was still possible -to declare field-specific boosts in the mappings. This is now removed completely. +to declare field-specific boosts in the mappings. This is now removed completely. Indexes built in 7x that contain mapping boosts will emit warnings, and the boosts -will have no effect in 8.0. New indexes will not permit boosts to be set in their +will have no effect in 8.0. New indexes will not permit boosts to be set in their mappings at all. *Impact* + -The `boost` setting should be removed from templates and mappings. Use boosts +The `boost` setting should be removed from templates and mappings. Use boosts directly on queries instead. ==== @@ -103,13 +103,13 @@ time migration guide]. *Details* + The only permissible values for the `strategy` parameter on `geo_shape` mappings are `term` and `recursive`. In 7.x if a non-permissible value was used as a -parameter here, the mapping would silently fall back to using `recursive`. The +parameter here, the mapping would silently fall back to using `recursive`. The mapping will now be rejected instead. *Impact* + This will have no impact on existing mappings created with non-permissible strategy values, as they will already be serializing themselves as if they -had been configured as `recursive`. New indexes will need to use one of the +had been configured as `recursive`. New indexes will need to use one of the permissible strategies, or preferably not define a strategy at all and use the far more efficient BKD index. ==== diff --git a/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc index eb66321c4341..a4cb2b8cb867 100644 --- a/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc @@ -52,7 +52,7 @@ the job. NOTE: If you use the `force` query parameter, the request returns without performing the associated actions such as flushing buffers and persisting the model snapshots. Therefore, do not use this parameter if you want the job to be in a consistent state -after the close job API returns. The `force` query parameter should only be used in +after the close job API returns. The `force` query parameter should only be used in situations where the job has already failed, or where you are not interested in results the job might have recently produced or might produce in the future. diff --git a/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc index cee3710c872b..6e01e4dd857b 100644 --- a/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc @@ -91,7 +91,7 @@ The `last_finalized_bucket_end` provides the timestamp (in milliseconds-since-the-epoch) of the end of the last bucket that was processed. If you want to flush the job to a specific timestamp, you can use the -`advance_time` or `skip_time` parameters. For example, to advance to 11 AM GMT +`advance_time` or `skip_time` parameters. For example, to advance to 11 AM GMT on January 1, 2018: [source,console] diff --git a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc index eac0a3c83d64..f00b4e9acc41 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-ml-info.asciidoc @@ -29,8 +29,8 @@ privileges. See <>, <> and This endpoint is designed to be used by a user interface that needs to fully understand machine learning configurations where some options are not specified, -meaning that the defaults should be used. This endpoint may be used to find out -what those defaults are. It also provides information about the maximum size +meaning that the defaults should be used. This endpoint may be used to find out +what those defaults are. It also provides information about the maximum size of {ml} jobs that could run in the current cluster configuration. [[get-ml-info-example]] diff --git a/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc index 2f883228f412..ccbc9b48a829 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc @@ -47,7 +47,7 @@ fine-tune the `overall_score` so that it is more or less sensitive to the number of jobs that detect an anomaly at the same time. For example, if you set `top_n` to `1`, the `overall_score` is the maximum bucket score in the overall bucket. Alternatively, if you set `top_n` to the number of jobs, the `overall_score` is -high only when all jobs detect anomalies in that overall bucket. If you set +high only when all jobs detect anomalies in that overall bucket. If you set the `bucket_span` parameter (to a value greater than its default), the `overall_score` is the maximum `overall_score` of the overall buckets that have a span equal to the jobs' largest bucket span. diff --git a/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc index 451634fe0b08..d2af0c70278d 100644 --- a/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/start-datafeed.asciidoc @@ -30,7 +30,7 @@ cluster privileges to use this API. See A {dfeed} must be started in order to retrieve data from {es}. A {dfeed} can be started and stopped multiple times throughout its lifecycle. -When you start a {dfeed}, you can specify a start time. This enables you to +When you start a {dfeed}, you can specify a start time. This enables you to include a training period, providing you have this data available in {es}. If you want to analyze from the beginning of a dataset, you can specify any date earlier than that beginning date. @@ -41,7 +41,7 @@ available. When you start a {dfeed}, you can also specify an end time. If you do so, the job analyzes data from the start time until the end time, at which point the -analysis stops. This scenario is useful for a one-off batch analysis. If you +analysis stops. This scenario is useful for a one-off batch analysis. If you do not specify an end time, the {dfeed} runs continuously. The `start` and `end` times can be specified by using one of the diff --git a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc index d9c08b51fad5..e84617c35f67 100644 --- a/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc +++ b/docs/reference/ml/df-analytics/apis/put-dfanalytics.asciidoc @@ -461,8 +461,8 @@ may contain documents that don't have an {olscore}. * {regression-cap} supports fields that are numeric, `boolean`, `text`, `keyword`, and `ip`. It is also tolerant of missing values. Fields that are supported are included in the analysis, other fields are ignored. Documents -where included fields contain an array with two or more values are also -ignored. Documents in the `dest` index that don’t contain a results field are +where included fields contain an array with two or more values are also +ignored. Documents in the `dest` index that don’t contain a results field are not included in the {reganalysis}. * {classification-cap} supports fields that are numeric, `boolean`, `text`, `keyword`, and `ip`. It is also tolerant of missing values. Fields that are @@ -470,7 +470,7 @@ supported are included in the analysis, other fields are ignored. Documents where included fields contain an array with two or more values are also ignored. Documents in the `dest` index that don’t contain a results field are not included in the {classanalysis}. {classanalysis-cap} can be improved by mapping -ordinal variable values to a single number. For example, in case of age ranges, +ordinal variable values to a single number. For example, in case of age ranges, you can model the values as "0-14" = 0, "15-24" = 1, "25-34" = 2, and so on. If `analyzed_fields` is not set, only the relevant fields will be included. For diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index 2e1285d4989e..b31fff8eaa12 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -229,7 +229,7 @@ end::categorization-analyzer[] tag::categorization-examples-limit[] The maximum number of examples stored per category in memory and in the results -data store. The default value is 4. If you increase this value, more examples +data store. The default value is 4. If you increase this value, more examples are available, however it requires that you have more storage available. If you set this value to `0`, no examples are stored. + diff --git a/docs/reference/modules/cluster/disk_allocator.asciidoc b/docs/reference/modules/cluster/disk_allocator.asciidoc index fc6d9f15eb09..3405167966b2 100644 --- a/docs/reference/modules/cluster/disk_allocator.asciidoc +++ b/docs/reference/modules/cluster/disk_allocator.asciidoc @@ -36,7 +36,7 @@ If a node is filling up its disk faster than {es} can move shards elsewhere then there is a risk that the disk will completely fill up. To prevent this, as a last resort, once the disk usage reaches the _flood-stage_ watermark {es} will block writes to indices with a shard on the affected node. It will also -continue to move shards onto the other nodes in the cluster. When disk usage +continue to move shards onto the other nodes in the cluster. When disk usage on the affected node drops below the high watermark, {es} automatically removes the write block. @@ -65,7 +65,7 @@ You can use the following settings to control disk-based allocation: // tag::cluster-routing-disk-threshold-tag[] `cluster.routing.allocation.disk.threshold_enabled` {ess-icon}:: (<>) -Defaults to `true`. Set to `false` to disable the disk allocation decider. +Defaults to `true`. Set to `false` to disable the disk allocation decider. // end::cluster-routing-disk-threshold-tag[] [[cluster-routing-watermark-low]] diff --git a/docs/reference/modules/cluster/misc.asciidoc b/docs/reference/modules/cluster/misc.asciidoc index a12e8ca390a6..65afd0e65cd0 100644 --- a/docs/reference/modules/cluster/misc.asciidoc +++ b/docs/reference/modules/cluster/misc.asciidoc @@ -83,7 +83,7 @@ setting. User-defined metadata can be stored and retrieved using the Cluster Settings API. This can be used to store arbitrary, infrequently-changing data about the cluster without the need to create an index to store it. This data may be stored using -any key prefixed with `cluster.metadata.`. For example, to store the email +any key prefixed with `cluster.metadata.`. For example, to store the email address of the administrator of a cluster under the key `cluster.metadata.administrator`, issue this request: @@ -107,7 +107,7 @@ metadata will be viewable by anyone with access to the ===== Index tombstones The cluster state maintains index tombstones to explicitly denote indices that -have been deleted. The number of tombstones maintained in the cluster state is +have been deleted. The number of tombstones maintained in the cluster state is controlled by the following setting: `cluster.indices.tombstones.size`:: @@ -128,7 +128,7 @@ this situation. ===== Logger The settings which control logging can be updated <> with the -`logger.` prefix. For instance, to increase the logging level of the +`logger.` prefix. For instance, to increase the logging level of the `indices.recovery` module to `DEBUG`, issue this request: [source,console] diff --git a/docs/reference/modules/cluster/shards_allocation.asciidoc b/docs/reference/modules/cluster/shards_allocation.asciidoc index 6b83cd670ebd..91833fc364aa 100644 --- a/docs/reference/modules/cluster/shards_allocation.asciidoc +++ b/docs/reference/modules/cluster/shards_allocation.asciidoc @@ -16,7 +16,7 @@ Enable or disable allocation for specific kinds of shards: * `none` - No shard allocations of any kind are allowed for any indices. This setting does not affect the recovery of local primary shards when -restarting a node. A restarted node that has a copy of an unassigned primary +restarting a node. A restarted node that has a copy of an unassigned primary shard will recover that primary immediately, assuming that its allocation id matches one of the active allocation ids in the cluster state. @@ -43,7 +43,7 @@ one of the active allocation ids in the cluster state. While the recovery of replicas happens over the network, the recovery of an unassigned primary after node restart uses data from the local disk. These should be fast so more initial primary recoveries can happen in - parallel on the same node. Defaults to `4`. + parallel on the same node. Defaults to `4`. [[cluster-routing-allocation-same-shard-host]] `cluster.routing.allocation.same_shard.host`:: @@ -119,20 +119,20 @@ calculations. `cluster.routing.allocation.balance.shard`:: (<>) Defines the weight factor for the total number of shards allocated on a node - (float). Defaults to `0.45f`. Raising this raises the tendency to + (float). Defaults to `0.45f`. Raising this raises the tendency to equalize the number of shards across all nodes in the cluster. `cluster.routing.allocation.balance.index`:: (<>) Defines the weight factor for the number of shards per index allocated - on a specific node (float). Defaults to `0.55f`. Raising this raises the + on a specific node (float). Defaults to `0.55f`. Raising this raises the tendency to equalize the number of shards per index across all nodes in the cluster. `cluster.routing.allocation.balance.threshold`:: (<>) Minimal optimization value of operations that should be performed (non - negative float). Defaults to `1.0f`. Raising this will cause the cluster + negative float). Defaults to `1.0f`. Raising this will cause the cluster to be less aggressive about optimizing the shard balance. diff --git a/docs/reference/modules/discovery/bootstrapping.asciidoc b/docs/reference/modules/discovery/bootstrapping.asciidoc index 510dd92c9898..c6e21fe70b98 100644 --- a/docs/reference/modules/discovery/bootstrapping.asciidoc +++ b/docs/reference/modules/discovery/bootstrapping.asciidoc @@ -113,7 +113,7 @@ automatically bootstrap a cluster based on the nodes that could be discovered to be running on the same host within a short time after startup. This means that by default it is possible to start up several nodes on a single machine and have them automatically form a cluster which is very useful for development -environments and experimentation. However, since nodes may not always +environments and experimentation. However, since nodes may not always successfully discover each other quickly enough this automatic bootstrapping cannot be relied upon and cannot be used in production deployments. diff --git a/docs/reference/modules/discovery/discovery-settings.asciidoc b/docs/reference/modules/discovery/discovery-settings.asciidoc index 6c3efa94de4c..f0ecd22f27f9 100644 --- a/docs/reference/modules/discovery/discovery-settings.asciidoc +++ b/docs/reference/modules/discovery/discovery-settings.asciidoc @@ -113,7 +113,7 @@ to elect a master node. `cluster.election.duration`:: (<>) Sets how long each election is allowed to take before a node considers it to -have failed and schedules a retry. This defaults to `500ms`. Changing this +have failed and schedules a retry. This defaults to `500ms`. Changing this setting from the default may cause your cluster to fail to elect a master node. `cluster.election.initial_timeout`:: diff --git a/docs/reference/modules/discovery/discovery.asciidoc b/docs/reference/modules/discovery/discovery.asciidoc index 0c5057486def..f55f42a6aead 100644 --- a/docs/reference/modules/discovery/discovery.asciidoc +++ b/docs/reference/modules/discovery/discovery.asciidoc @@ -32,7 +32,7 @@ these occur quickly enough then the node will retry after By default the cluster formation module offers two seed hosts providers to configure the list of seed nodes: a _settings_-based and a _file_-based seed -hosts provider. It can be extended to support cloud environments and other +hosts provider. It can be extended to support cloud environments and other forms of seed hosts providers via {plugins}/discovery.html[discovery plugins]. Seed hosts providers are configured using the `discovery.seed_providers` setting, which defaults to the _settings_-based hosts provider. This setting @@ -107,7 +107,7 @@ supplied in `unicast_hosts.txt`. The `unicast_hosts.txt` file contains one node entry per line. Each node entry consists of the host (host name or IP address) and an optional transport port number. If the port number is specified, is must come immediately after the -host (on the same line) separated by a `:`. If the port number is not +host (on the same line) separated by a `:`. If the port number is not specified, {es} will implicitly use the first port in the port range given by `transport.profiles.default.port`, or by `transport.port` if `transport.profiles.default.port` is not set. diff --git a/docs/reference/modules/discovery/publishing.asciidoc b/docs/reference/modules/discovery/publishing.asciidoc index 8452f0cd04f8..208386946d3f 100644 --- a/docs/reference/modules/discovery/publishing.asciidoc +++ b/docs/reference/modules/discovery/publishing.asciidoc @@ -5,7 +5,7 @@ The master node is the only node in a cluster that can make changes to the cluster state. The master node processes one batch of cluster state updates at a time, computing the required changes and publishing the updated cluster state to all the other nodes in the cluster. Each publication starts with the master -broadcasting the updated cluster state to all nodes in the cluster. Each node +broadcasting the updated cluster state to all nodes in the cluster. Each node responds with an acknowledgement but does not yet apply the newly-received state. Once the master has collected acknowledgements from enough master-eligible nodes, the new cluster state is said to be _committed_ and the diff --git a/docs/reference/modules/discovery/quorums.asciidoc b/docs/reference/modules/discovery/quorums.asciidoc index 5cf9438544c6..11aa90645857 100644 --- a/docs/reference/modules/discovery/quorums.asciidoc +++ b/docs/reference/modules/discovery/quorums.asciidoc @@ -22,7 +22,7 @@ tolerance by updating the cluster's <>, which is the set of master-eligible nodes whose responses are counted when making decisions such as electing a new master or committing a new cluster state. A decision is made only after more than half of the nodes in the -voting configuration have responded. Usually the voting configuration is the +voting configuration have responded. Usually the voting configuration is the same as the set of all the master-eligible nodes that are currently in the cluster. However, there are some situations in which they may be different. diff --git a/docs/reference/modules/indices/indexing_buffer.asciidoc b/docs/reference/modules/indices/indexing_buffer.asciidoc index 0269221f8a38..ab5e511267c0 100644 --- a/docs/reference/modules/indices/indexing_buffer.asciidoc +++ b/docs/reference/modules/indices/indexing_buffer.asciidoc @@ -1,7 +1,7 @@ [[indexing-buffer]] === Indexing buffer settings -The indexing buffer is used to store newly indexed documents. When it fills +The indexing buffer is used to store newly indexed documents. When it fills up, the documents in the buffer are written to a segment on disk. It is divided between all shards on the node. @@ -17,9 +17,9 @@ indexing buffer size shared across all shards. `indices.memory.min_index_buffer_size`:: (<>) If the `index_buffer_size` is specified as a percentage, then this -setting can be used to specify an absolute minimum. Defaults to `48mb`. +setting can be used to specify an absolute minimum. Defaults to `48mb`. `indices.memory.max_index_buffer_size`:: (<>) If the `index_buffer_size` is specified as a percentage, then this -setting can be used to specify an absolute maximum. Defaults to unbounded. +setting can be used to specify an absolute maximum. Defaults to unbounded. diff --git a/docs/reference/modules/indices/request_cache.asciidoc b/docs/reference/modules/indices/request_cache.asciidoc index 4504f1f4f368..920d071eaf36 100644 --- a/docs/reference/modules/indices/request_cache.asciidoc +++ b/docs/reference/modules/indices/request_cache.asciidoc @@ -80,7 +80,7 @@ PUT /my-index-000001/_settings ==== Enabling and disabling caching per request The `request_cache` query-string parameter can be used to enable or disable -caching on a *per-request* basis. If set, it overrides the index-level setting: +caching on a *per-request* basis. If set, it overrides the index-level setting: [source,console] ----------------------------- @@ -105,7 +105,7 @@ query-string parameter detailed here. [discrete] ==== Cache key -The whole JSON body is used as the cache key. This means that if the JSON +The whole JSON body is used as the cache key. This means that if the JSON changes -- for instance if keys are output in a different order -- then the cache key will not be recognised. @@ -117,7 +117,7 @@ the application to ensure that a request is always serialized in the same way. ==== Cache settings The cache is managed at the node level, and has a default maximum size of `1%` -of the heap. This can be changed in the `config/elasticsearch.yml` file with: +of the heap. This can be changed in the `config/elasticsearch.yml` file with: [source,yaml] -------------------------------- @@ -125,7 +125,7 @@ indices.requests.cache.size: 2% -------------------------------- Also, you can use the +indices.requests.cache.expire+ setting to specify a TTL -for cached results, but there should be no reason to do so. Remember that +for cached results, but there should be no reason to do so. Remember that stale results are automatically invalidated when the index is refreshed. This setting is provided for completeness' sake only. diff --git a/docs/reference/modules/node.asciidoc b/docs/reference/modules/node.asciidoc index bb3ee34940aa..c9dce3b88554 100644 --- a/docs/reference/modules/node.asciidoc +++ b/docs/reference/modules/node.asciidoc @@ -97,8 +97,8 @@ phases which are coordinated by the node which receives the client request -- the _coordinating node_. In the _scatter_ phase, the coordinating node forwards the request to the data -nodes which hold the data. Each data node executes the request locally and -returns its results to the coordinating node. In the _gather_ phase, the +nodes which hold the data. Each data node executes the request locally and +returns its results to the coordinating node. In the _gather_ phase, the coordinating node reduces each data node's results into a single global result set. @@ -308,7 +308,7 @@ can only route requests, handle the search reduce phase, and distribute bulk indexing. Essentially, coordinating only nodes behave as smart load balancers. Coordinating only nodes can benefit large clusters by offloading the -coordinating node role from data and master-eligible nodes. They join the +coordinating node role from data and master-eligible nodes. They join the cluster and receive the full <>, like every other node, and they use the cluster state to route requests directly to the appropriate place(s). diff --git a/docs/reference/modules/threadpool.asciidoc b/docs/reference/modules/threadpool.asciidoc index 54e6fb5d133f..43c4b3ecf01f 100644 --- a/docs/reference/modules/threadpool.asciidoc +++ b/docs/reference/modules/threadpool.asciidoc @@ -168,7 +168,7 @@ setting: {es} to size its thread pools as if it only has a fraction of the CPU, you should override the `node.processors` setting to the desired fraction, for example, if you're running two instances of {es} on a 16-core machine, set -`node.processors` to 8. Note that this is an expert-level use case and there's +`node.processors` to 8. Note that this is an expert-level use case and there's a lot more involved than just setting the `node.processors` setting as there are other considerations like changing the number of garbage collector threads, pinning processes to cores, and so on. diff --git a/docs/reference/monitoring/configuring-metricbeat.asciidoc b/docs/reference/monitoring/configuring-metricbeat.asciidoc index cd51a8677483..f4d19669eaf7 100644 --- a/docs/reference/monitoring/configuring-metricbeat.asciidoc +++ b/docs/reference/monitoring/configuring-metricbeat.asciidoc @@ -43,7 +43,7 @@ For more information, see <> and < -- . {metricbeat-ref}/metricbeat-installation-configuration.html[Install {metricbeat}] on each -{es} node in the production cluster. Failure to install on each node may result in incomplete or missing results. +{es} node in the production cluster. Failure to install on each node may result in incomplete or missing results. . Enable the {es} module in {metricbeat} on each {es} node. + @@ -190,7 +190,7 @@ PUT _cluster/settings ---------------------------------- If {es} {security-features} are enabled, you must have `monitor` cluster -privileges to view the cluster settings and `manage` cluster privileges +privileges to view the cluster settings and `manage` cluster privileges to change them. -- diff --git a/docs/reference/query-dsl.asciidoc b/docs/reference/query-dsl.asciidoc index 6e7d3527f91a..8c261d65d158 100644 --- a/docs/reference/query-dsl.asciidoc +++ b/docs/reference/query-dsl.asciidoc @@ -12,7 +12,7 @@ Leaf query clauses:: Leaf query clauses look for a particular value in a particular field, such as the <>, <> or -<> queries. These queries can be used +<> queries. These queries can be used by themselves. Compound query clauses:: diff --git a/docs/reference/query-dsl/bool-query.asciidoc b/docs/reference/query-dsl/bool-query.asciidoc index 1a78e131e01a..3eaee4335f02 100644 --- a/docs/reference/query-dsl/bool-query.asciidoc +++ b/docs/reference/query-dsl/bool-query.asciidoc @@ -23,7 +23,7 @@ and clauses are considered for caching. |`should` |The clause (query) should appear in the matching document. |`must_not` |The clause (query) must not appear in the matching -documents. Clauses are executed in <> meaning +documents. Clauses are executed in <> meaning that scoring is ignored and clauses are considered for caching. Because scoring is ignored, a score of `0` for all documents is returned. |======================================================================= @@ -77,8 +77,8 @@ For other valid values, see the ==== Scoring with `bool.filter` Queries specified under the `filter` element have no effect on scoring -- -scores are returned as `0`. Scores are only affected by the query that has -been specified. For instance, all three of the following queries return +scores are returned as `0`. Scores are only affected by the query that has +been specified. For instance, all three of the following queries return all documents where the `status` field contains the term `active`. This first query assigns a score of `0` to all documents, as no scoring diff --git a/docs/reference/query-dsl/compound-queries.asciidoc b/docs/reference/query-dsl/compound-queries.asciidoc index d156950e3557..4bc8276ddb3e 100644 --- a/docs/reference/query-dsl/compound-queries.asciidoc +++ b/docs/reference/query-dsl/compound-queries.asciidoc @@ -9,7 +9,7 @@ The queries in this group are: <>:: The default query for combining multiple leaf or compound query clauses, as -`must`, `should`, `must_not`, or `filter` clauses. The `must` and `should` +`must`, `should`, `must_not`, or `filter` clauses. The `must` and `should` clauses have their scores combined -- the more matching clauses, the better -- while the `must_not` and `filter` clauses are executed in filter context. @@ -18,12 +18,12 @@ Return documents which match a `positive` query, but reduce the score of documents which also match a `negative` query. <>:: -A query which wraps another query, but executes it in filter context. All +A query which wraps another query, but executes it in filter context. All matching documents are given the same ``constant'' `_score`. <>:: A query which accepts multiple queries, and returns any documents which match -any of the query clauses. While the `bool` query combines the scores from all +any of the query clauses. While the `bool` query combines the scores from all matching queries, the `dis_max` query uses the score of the single best- matching query clause. diff --git a/docs/reference/query-dsl/function-score-query.asciidoc b/docs/reference/query-dsl/function-score-query.asciidoc index 9e742c90a552..e377bb74b014 100644 --- a/docs/reference/query-dsl/function-score-query.asciidoc +++ b/docs/reference/query-dsl/function-score-query.asciidoc @@ -536,7 +536,7 @@ In this case your *origin* for the location field is the town center and the *scale* is ~2km. If your budget is low, you would probably prefer something cheap above -something expensive. For the price field, the *origin* would be 0 Euros +something expensive. For the price field, the *origin* would be 0 Euros and the *scale* depends on how much you are willing to pay, for example 20 Euros. In this example, the fields might be called "price" for the price of the diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 724e329090e0..63ba4046a395 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -182,7 +182,7 @@ The `pattern` is normalized using the search analyzer from this field, unless ==== `fuzzy` rule parameters The `fuzzy` rule matches terms that are similar to the provided term, within an -edit distance defined by <>. If the fuzzy expansion matches more than +edit distance defined by <>. If the fuzzy expansion matches more than 128 terms, {es} returns an error. `term`:: @@ -198,7 +198,7 @@ adjacent characters (ab → ba). Defaults to `true`. `fuzziness`:: (Optional, string) Maximum edit distance allowed for matching. See <> -for valid values and more information. Defaults to `auto`. +for valid values and more information. Defaults to `auto`. `analyzer`:: (Optional, string) <> used to normalize the `term`. diff --git a/docs/reference/query-dsl/joining-queries.asciidoc b/docs/reference/query-dsl/joining-queries.asciidoc index f10c2bda6037..c5d043c3b80c 100644 --- a/docs/reference/query-dsl/joining-queries.asciidoc +++ b/docs/reference/query-dsl/joining-queries.asciidoc @@ -2,7 +2,7 @@ == Joining queries Performing full SQL-style joins in a distributed system like Elasticsearch is -prohibitively expensive. Instead, Elasticsearch offers two forms of join +prohibitively expensive. Instead, Elasticsearch offers two forms of join which are designed to scale horizontally. <>:: diff --git a/docs/reference/query-dsl/multi-match-query.asciidoc b/docs/reference/query-dsl/multi-match-query.asciidoc index 6375d6acc145..f11d3f2140f7 100644 --- a/docs/reference/query-dsl/multi-match-query.asciidoc +++ b/docs/reference/query-dsl/multi-match-query.asciidoc @@ -80,20 +80,20 @@ parameter, which can be set to: [horizontal] `best_fields`:: (*default*) Finds documents which match any field, but - uses the `_score` from the best field. See <>. + uses the `_score` from the best field. See <>. `most_fields`:: Finds documents which match any field and combines - the `_score` from each field. See <>. + the `_score` from each field. See <>. `cross_fields`:: Treats fields with the same `analyzer` as though they were one big field. Looks for each word in *any* field. See <>. `phrase`:: Runs a `match_phrase` query on each field and uses the `_score` - from the best field. See <>. + from the best field. See <>. `phrase_prefix`:: Runs a `match_phrase_prefix` query on each field and uses - the `_score` from the best field. See <>. + the `_score` from the best field. See <>. `bool_prefix`:: Creates a `match_bool_prefix` query on each field and combines the `_score` from each field. See @@ -108,7 +108,7 @@ field is more meaningful than ``brown'' in one field and ``fox'' in the other. The `best_fields` type generates a <> for each field and wraps them in a <> query, to -find the single best matching field. For instance, this query: +find the single best matching field. For instance, this query: [source,console] -------------------------------------------------- @@ -161,7 +161,7 @@ as explained in <>. =================================================== The `best_fields` and `most_fields` types are _field-centric_ -- they generate -a `match` query *per field*. This means that the `operator` and +a `match` query *per field*. This means that the `operator` and `minimum_should_match` parameters are applied to each field individually, which is probably not what you want. @@ -200,7 +200,7 @@ See <> for a better solution. ==== `most_fields` The `most_fields` type is most useful when querying multiple fields that -contain the same text analyzed in different ways. For instance, the main +contain the same text analyzed in different ways. For instance, the main field may contain synonyms, stemming and terms without diacritics. A second field may contain the original terms, and a third field might contain shingles. By combining scores from all three fields we can match as many @@ -302,7 +302,7 @@ The `fuzziness` parameter cannot be used with the `phrase` or `phrase_prefix` ty ==== `cross_fields` The `cross_fields` type is particularly useful with structured documents where -multiple fields *should* match. For instance, when querying the `first_name` +multiple fields *should* match. For instance, when querying the `first_name` and `last_name` fields for ``Will Smith'', the best match is likely to have ``Will'' in one field and ``Smith'' in the other. @@ -314,7 +314,7 @@ with that approach. The first problem is that `operator` and <>). The second problem is to do with relevance: the different term frequencies in -the `first_name` and `last_name` fields can produce unexpected results. +the `first_name` and `last_name` fields can produce unexpected results. For instance, imagine we have two people: ``Will Smith'' and ``Smith Jones''. ``Smith'' as a last name is very common (and so is of low importance) but @@ -328,11 +328,11 @@ probably appear above the better matching ``Will Smith'' because the score of **** One way of dealing with these types of queries is simply to index the -`first_name` and `last_name` fields into a single `full_name` field. Of +`first_name` and `last_name` fields into a single `full_name` field. Of course, this can only be done at index time. The `cross_field` type tries to solve these problems at query time by taking a -_term-centric_ approach. It first analyzes the query string into individual +_term-centric_ approach. It first analyzes the query string into individual terms, then looks for each term in any of the fields, as though they were one big field. @@ -355,7 +355,7 @@ GET /_search is executed as: - +(first_name:will last_name:will) + +(first_name:will last_name:will) +(first_name:smith last_name:smith) In other words, *all terms* must be present *in at least one field* for a @@ -390,7 +390,7 @@ Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`, The `cross_field` type can only work in term-centric mode on fields that have the same analyzer. Fields with the same analyzer are grouped together as in -the example above. If there are multiple groups, the query will use the best +the example above. If there are multiple groups, the query will use the best score from any group. For instance, if we have a `first` and `last` field which have diff --git a/docs/reference/query-dsl/percolate-query.asciidoc b/docs/reference/query-dsl/percolate-query.asciidoc index 8e543f63232b..684b0b571f14 100644 --- a/docs/reference/query-dsl/percolate-query.asciidoc +++ b/docs/reference/query-dsl/percolate-query.asciidoc @@ -660,7 +660,7 @@ evaluate. The reason the `percolate` query can do this is because during indexin terms are being extracted and indexed with the percolator query. Unfortunately the percolator cannot extract terms from all queries (for example the `wildcard` or `geo_shape` query) and as a result of that in certain cases the percolator can't do the selecting optimization (for example if an unsupported query is defined in a required clause of a boolean query -or the unsupported query is the only query in the percolator document). These queries are marked by the percolator and +or the unsupported query is the only query in the percolator document). These queries are marked by the percolator and can be found by running the following search: diff --git a/docs/reference/query-dsl/query-string-syntax.asciidoc b/docs/reference/query-dsl/query-string-syntax.asciidoc index 52c9030acb27..17d53365e31e 100644 --- a/docs/reference/query-dsl/query-string-syntax.asciidoc +++ b/docs/reference/query-dsl/query-string-syntax.asciidoc @@ -79,7 +79,7 @@ value like the following: ======= Allowing a wildcard at the beginning of a word (eg `"*ing"`) is particularly heavy, because all terms in the index need to be examined, just in case -they match. Leading wildcards can be disabled by setting +they match. Leading wildcards can be disabled by setting `allow_leading_wildcard` to `false`. ======= @@ -105,7 +105,7 @@ The supported regular expression syntax is explained in <>. [WARNING] ======= The `allow_leading_wildcard` parameter does not have any control over -regular expressions. A query string such as the following would force +regular expressions. A query string such as the following would force Elasticsearch to visit every term in the index: /.*n/ @@ -148,7 +148,7 @@ you can search for `app~1` (fuzzy) or `app*` (wildcard), but searches for While a phrase query (eg `"john smith"`) expects all of the terms in exactly the same order, a proximity query allows the specified words to be further -apart or in a different order. In the same way that fuzzy queries can +apart or in a different order. In the same way that fuzzy queries can specify a maximum edit distance for characters in a word, a proximity search allows us to specify a maximum edit distance of words in a phrase: @@ -230,9 +230,9 @@ Boosts can also be applied to phrases or to groups: ====== Boolean operators -By default, all terms are optional, as long as one term matches. A search +By default, all terms are optional, as long as one term matches. A search for `foo bar baz` will find any document that contains one or more of -`foo` or `bar` or `baz`. We have already discussed the `default_operator` +`foo` or `bar` or `baz`. We have already discussed the `default_operator` above which allows you to force all terms to be required, but there are also _boolean operators_ which can be used in the query string itself to provide more control. diff --git a/docs/reference/query-dsl/query_filter_context.asciidoc b/docs/reference/query-dsl/query_filter_context.asciidoc index 0aa0eb994cb7..5cbd77423249 100644 --- a/docs/reference/query-dsl/query_filter_context.asciidoc +++ b/docs/reference/query-dsl/query_filter_context.asciidoc @@ -31,7 +31,7 @@ parameter, such as the `query` parameter in the === Filter context In a filter context, a query clause answers the question ``__Does this document match this query clause?__'' The answer is a simple Yes or No -- no -scores are calculated. Filter context is mostly used for filtering structured +scores are calculated. Filter context is mostly used for filtering structured data, e.g. * __Does this +timestamp+ fall into the range 2015 to 2016?__ @@ -50,7 +50,7 @@ parameter, such as the `filter` or `must_not` parameters in the [[query-filter-context-ex]] === Example of query and filter contexts Below is an example of query clauses being used in query and filter context -in the `search` API. This query will match documents where all of the following +in the `search` API. This query will match documents where all of the following conditions are met: * The `title` field contains the word `search`. diff --git a/docs/reference/query-dsl/special-queries.asciidoc b/docs/reference/query-dsl/special-queries.asciidoc index 06f7cc98a734..cad9b28cbfdb 100644 --- a/docs/reference/query-dsl/special-queries.asciidoc +++ b/docs/reference/query-dsl/special-queries.asciidoc @@ -22,7 +22,7 @@ A query that computes scores based on the values of numeric features and is able to efficiently skip non-competitive hits. <>:: -This query allows a script to act as a filter. Also see the +This query allows a script to act as a filter. Also see the <>. <>:: diff --git a/docs/reference/redirects.asciidoc b/docs/reference/redirects.asciidoc index 455d538086a7..eee33c8a7478 100644 --- a/docs/reference/redirects.asciidoc +++ b/docs/reference/redirects.asciidoc @@ -147,7 +147,7 @@ See <>. [role="exclude",id="indices-upgrade"] === Upgrade API -The `_upgrade` API is no longer useful and will be removed. Instead, see +The `_upgrade` API is no longer useful and will be removed. Instead, see <>. [role="exclude",id="mapping-parent-field"] @@ -1124,7 +1124,7 @@ See <>. [role="exclude",id="indices-status"] === Index status API -The index `_status` API has been replaced with the <> and <> APIs. [role="exclude",id="search-facets"] diff --git a/docs/reference/rest-api/common-parms.asciidoc b/docs/reference/rest-api/common-parms.asciidoc index 8abbc8af7a6d..b39d3085477d 100644 --- a/docs/reference/rest-api/common-parms.asciidoc +++ b/docs/reference/rest-api/common-parms.asciidoc @@ -427,7 +427,7 @@ tag::include-segment-file-sizes[] `include_segment_file_sizes`:: (Optional, Boolean) If `true`, the call reports the aggregated disk usage of -each one of the Lucene index files (only applies if segment stats are +each one of the Lucene index files (only applies if segment stats are requested). Defaults to `false`. end::include-segment-file-sizes[] diff --git a/docs/reference/rollup/apis/get-job.asciidoc b/docs/reference/rollup/apis/get-job.asciidoc index f29938d054f7..8a0eba4fa9ad 100644 --- a/docs/reference/rollup/apis/get-job.asciidoc +++ b/docs/reference/rollup/apis/get-job.asciidoc @@ -75,7 +75,7 @@ rollup documents. When in this state, any subsequent cron interval triggers will be ignored because the job is already active with the prior trigger. - `abort` is a transient state, which is usually not witnessed by the user. It is used if the task needs to be shut down for some reason (job has been deleted, -an unrecoverable error has been encountered, etc). Shortly after the `abort` +an unrecoverable error has been encountered, etc). Shortly after the `abort` state is set, the job will remove itself from the cluster. ==== diff --git a/docs/reference/rollup/apis/rollup-caps.asciidoc b/docs/reference/rollup/apis/rollup-caps.asciidoc index 1d0e620a94f5..740b0a1d84bb 100644 --- a/docs/reference/rollup/apis/rollup-caps.asciidoc +++ b/docs/reference/rollup/apis/rollup-caps.asciidoc @@ -48,8 +48,8 @@ can be performed, and where does the data live? [[rollup-get-rollup-example]] ==== {api-examples-title} -Imagine we have an index named `sensor-1` full of raw data. We know that the -data will grow over time, so there will be a `sensor-2`, `sensor-3`, etc. Let's +Imagine we have an index named `sensor-1` full of raw data. We know that the +data will grow over time, so there will be a `sensor-2`, `sensor-3`, etc. Let's create a {rollup-job} that targets the index pattern `sensor-*` to accommodate this future scaling: @@ -143,7 +143,7 @@ Which will yield the following response: ---- The response that is returned contains information that is similar to the -original rollup configuration, but formatted differently. First, there are some +original rollup configuration, but formatted differently. First, there are some house-keeping details: the {rollup-job} ID, the index that holds the rolled data, and the index pattern that the job was targeting. @@ -183,7 +183,7 @@ GET _rollup/data/sensor-1 ---- Why is this? The original {rollup-job} was configured against a specific index -pattern (`sensor-*`) not a concrete index (`sensor-1`). So while the index +pattern (`sensor-*`) not a concrete index (`sensor-1`). So while the index belongs to the pattern, the {rollup-job} is only valid across the entirety of the pattern not just one of it's containing indices. So for that reason, the get rollup capabilities API only returns information based on the originally diff --git a/docs/reference/rollup/apis/rollup-index-caps.asciidoc b/docs/reference/rollup/apis/rollup-index-caps.asciidoc index b8cce32db5d1..2027c971928d 100644 --- a/docs/reference/rollup/apis/rollup-index-caps.asciidoc +++ b/docs/reference/rollup/apis/rollup-index-caps.asciidoc @@ -45,7 +45,7 @@ Wildcard (`*`) expressions are supported. [[rollup-get-rollup-index-caps-example]] ==== {api-examples-title} -Imagine we have an index named `sensor-1` full of raw data. We know that the +Imagine we have an index named `sensor-1` full of raw data. We know that the data will grow over time, so there will be a `sensor-2`, `sensor-3`, etc. Let's create a {rollup-job} that stores its data in `sensor_rollup`: @@ -91,7 +91,7 @@ GET /sensor_rollup/_rollup/data // TEST[continued] Note how we are requesting the concrete rollup index name (`sensor_rollup`) as -the first part of the URL. This will yield the following response: +the first part of the URL. This will yield the following response: [source,console-result] ---- diff --git a/docs/reference/rollup/index.asciidoc b/docs/reference/rollup/index.asciidoc index a3ec2001f136..a9e6735309ab 100644 --- a/docs/reference/rollup/index.asciidoc +++ b/docs/reference/rollup/index.asciidoc @@ -6,7 +6,7 @@ experimental[] Keeping historical data around for analysis is extremely useful but often avoided due to the financial cost of -archiving massive amounts of data. Retention periods are thus driven by financial realities rather than by the +archiving massive amounts of data. Retention periods are thus driven by financial realities rather than by the usefulness of extensive historical data. // tag::rollup-intro[] diff --git a/docs/reference/rollup/overview.asciidoc b/docs/reference/rollup/overview.asciidoc index 20fe0fd59f5b..35db3cd372bf 100644 --- a/docs/reference/rollup/overview.asciidoc +++ b/docs/reference/rollup/overview.asciidoc @@ -9,34 +9,34 @@ experimental[] Time-based data (documents that are predominantly identified by their timestamp) often have associated retention policies -to manage data growth. For example, your system may be generating 500 documents every second. That will generate +to manage data growth. For example, your system may be generating 500 documents every second. That will generate 43 million documents per day, and nearly 16 billion documents a year. While your analysts and data scientists may wish you stored that data indefinitely for analysis, time is never-ending and -so your storage requirements will continue to grow without bound. Retention policies are therefore often dictated +so your storage requirements will continue to grow without bound. Retention policies are therefore often dictated by the simple calculation of storage costs over time, and what the organization is willing to pay to retain historical data. Often these policies start deleting data after a few months or years. -Storage cost is a fixed quantity. It takes X money to store Y data. But the utility of a piece of data often changes -with time. Sensor data gathered at millisecond granularity is extremely useful right now, reasonably useful if from a +Storage cost is a fixed quantity. It takes X money to store Y data. But the utility of a piece of data often changes +with time. Sensor data gathered at millisecond granularity is extremely useful right now, reasonably useful if from a few weeks ago, and only marginally useful if older than a few months. So while the cost of storing a millisecond of sensor data from ten years ago is fixed, the value of that individual sensor -reading often diminishes with time. It's not useless -- it could easily contribute to a useful analysis -- but it's reduced +reading often diminishes with time. It's not useless -- it could easily contribute to a useful analysis -- but it's reduced value often leads to deletion rather than paying the fixed storage cost. [discrete] ==== Rollup stores historical data at reduced granularity -That's where Rollup comes into play. The Rollup functionality summarizes old, high-granularity data into a reduced -granularity format for long-term storage. By "rolling" the data up into a single summary document, historical data +That's where Rollup comes into play. The Rollup functionality summarizes old, high-granularity data into a reduced +granularity format for long-term storage. By "rolling" the data up into a single summary document, historical data can be compressed greatly compared to the raw data. -For example, consider the system that's generating 43 million documents every day. The second-by-second data is useful +For example, consider the system that's generating 43 million documents every day. The second-by-second data is useful for real-time analysis, but historical analysis looking over ten years of data are likely to be working at a larger interval such as hourly or daily trends. -If we compress the 43 million documents into hourly summaries, we can save vast amounts of space. The Rollup feature +If we compress the 43 million documents into hourly summaries, we can save vast amounts of space. The Rollup feature automates this process of summarizing historical data. Details about setting up and configuring Rollup are covered in <>. @@ -45,11 +45,11 @@ Details about setting up and configuring Rollup are covered in <>. But if your queries, aggregations and dashboards only use the available functionality, redirecting them to historical @@ -61,24 +61,24 @@ data is trivial. A useful feature of Rollup is the ability to query both "live", realtime data in addition to historical "rolled" data in a single query. -For example, your system may keep a month of raw data. After a month, it is rolled up into historical summaries using +For example, your system may keep a month of raw data. After a month, it is rolled up into historical summaries using Rollup and the raw data is deleted. -If you were to query the raw data, you'd only see the most recent month. And if you were to query the rolled up data, you -would only see data older than a month. The RollupSearch endpoint, however, supports querying both at the same time. -It will take the results from both data sources and merge them together. If there is overlap between the "live" and +If you were to query the raw data, you'd only see the most recent month. And if you were to query the rolled up data, you +would only see data older than a month. The RollupSearch endpoint, however, supports querying both at the same time. +It will take the results from both data sources and merge them together. If there is overlap between the "live" and "rolled" data, live data is preferred to increase accuracy. [discrete] ==== Rollup is multi-interval aware -Finally, Rollup is capable of intelligently utilizing the best interval available. If you've worked with summarizing -features of other products, you'll find that they can be limiting. If you configure rollups at daily intervals... your -queries and charts can only work with daily intervals. If you need a monthly interval, you have to create another rollup +Finally, Rollup is capable of intelligently utilizing the best interval available. If you've worked with summarizing +features of other products, you'll find that they can be limiting. If you configure rollups at daily intervals... your +queries and charts can only work with daily intervals. If you need a monthly interval, you have to create another rollup that explicitly stores monthly averages, etc. The Rollup feature stores data in such a way that queries can identify the smallest available interval and use that -for their processing. If you store rollups at a daily interval, queries can be executed on daily or longer intervals -(weekly, monthly, etc) without the need to explicitly configure a new rollup job. This helps alleviate one of the major +for their processing. If you store rollups at a daily interval, queries can be executed on daily or longer intervals +(weekly, monthly, etc) without the need to explicitly configure a new rollup job. This helps alleviate one of the major disadvantages of a rollup system; reduced flexibility relative to raw data. diff --git a/docs/reference/rollup/rollup-agg-limitations.asciidoc b/docs/reference/rollup/rollup-agg-limitations.asciidoc index 8390c5b80a5a..169fb8a7f0d9 100644 --- a/docs/reference/rollup/rollup-agg-limitations.asciidoc +++ b/docs/reference/rollup/rollup-agg-limitations.asciidoc @@ -5,7 +5,7 @@ experimental[] -There are some limitations to how fields can be rolled up / aggregated. This page highlights the major limitations so that +There are some limitations to how fields can be rolled up / aggregated. This page highlights the major limitations so that you are aware of them. [discrete] diff --git a/docs/reference/rollup/rollup-getting-started.asciidoc b/docs/reference/rollup/rollup-getting-started.asciidoc index c52ee5c55e1a..f67b70101c36 100644 --- a/docs/reference/rollup/rollup-getting-started.asciidoc +++ b/docs/reference/rollup/rollup-getting-started.asciidoc @@ -8,10 +8,10 @@ experimental[] -To use the Rollup feature, you need to create one or more "Rollup Jobs". These jobs run continuously in the background +To use the Rollup feature, you need to create one or more "Rollup Jobs". These jobs run continuously in the background and rollup the index or indices that you specify, placing the rolled documents in a secondary index (also of your choosing). -Imagine you have a series of daily indices that hold sensor data (`sensor-2017-01-01`, `sensor-2017-01-02`, etc). A sample document might +Imagine you have a series of daily indices that hold sensor data (`sensor-2017-01-01`, `sensor-2017-01-02`, etc). A sample document might look like this: [source,js] @@ -29,7 +29,7 @@ look like this: ==== Creating a rollup job We'd like to rollup these documents into hourly summaries, which will allow us to generate reports and dashboards with any time interval -one hour or greater. A rollup job might look like this: +one hour or greater. A rollup job might look like this: [source,console] -------------------------------------------------- @@ -65,11 +65,11 @@ PUT _rollup/job/sensor We give the job the ID of "sensor" (in the url: `PUT _rollup/job/sensor`), and tell it to rollup the index pattern `"sensor-*"`. This job will find and rollup any index that matches that pattern. Rollup summaries are then stored in the `"sensor_rollup"` index. -The `cron` parameter controls when and how often the job activates. When a rollup job's cron schedule triggers, it will begin rolling up -from where it left off after the last activation. So if you configure the cron to run every 30 seconds, the job will process the last 30 +The `cron` parameter controls when and how often the job activates. When a rollup job's cron schedule triggers, it will begin rolling up +from where it left off after the last activation. So if you configure the cron to run every 30 seconds, the job will process the last 30 seconds worth of data that was indexed into the `sensor-*` indices. -If instead the cron was configured to run once a day at midnight, the job would process the last 24 hours worth of data. The choice is largely +If instead the cron was configured to run once a day at midnight, the job would process the last 24 hours worth of data. The choice is largely preference, based on how "realtime" you want the rollups, and if you wish to process continuously or move it to off-peak hours. Next, we define a set of `groups`. Essentially, we are defining the dimensions @@ -81,14 +81,14 @@ the `node` field. .Date histogram interval vs cron schedule ********************************** You'll note that the job's cron is configured to run every 30 seconds, but the date_histogram is configured to -rollup at 60 minute intervals. How do these relate? +rollup at 60 minute intervals. How do these relate? -The date_histogram controls the granularity of the saved data. Data will be rolled up into hourly intervals, and you will be unable -to query with finer granularity. The cron simply controls when the process looks for new data to rollup. Every 30 seconds it will see -if there is a new hour's worth of data and roll it up. If not, the job goes back to sleep. +The date_histogram controls the granularity of the saved data. Data will be rolled up into hourly intervals, and you will be unable +to query with finer granularity. The cron simply controls when the process looks for new data to rollup. Every 30 seconds it will see +if there is a new hour's worth of data and roll it up. If not, the job goes back to sleep. Often, it doesn't make sense to define such a small cron (30s) on a large interval (1h), because the majority of the activations will -simply go back to sleep. But there's nothing wrong with it either, the job will do the right thing. +simply go back to sleep. But there's nothing wrong with it either, the job will do the right thing. ********************************** @@ -130,7 +130,7 @@ After you execute the above command and create the job, you'll receive the follo [discrete] ==== Starting the job -After the job is created, it will be sitting in an inactive state. Jobs need to be started before they begin processing data (this allows +After the job is created, it will be sitting in an inactive state. Jobs need to be started before they begin processing data (this allows you to stop them later as a way to temporarily pause, without deleting the configuration). To start the job, execute this command: @@ -144,7 +144,7 @@ POST _rollup/job/sensor/_start [discrete] ==== Searching the rolled results -After the job has run and processed some data, we can use the <> endpoint to do some searching. The Rollup feature is designed +After the job has run and processed some data, we can use the <> endpoint to do some searching. The Rollup feature is designed so that you can use the same Query DSL syntax that you are accustomed to... it just happens to run on the rolled up data instead. For example, take this query: @@ -165,8 +165,8 @@ GET /sensor_rollup/_rollup_search -------------------------------------------------- // TEST[setup:sensor_prefab_data] -It's a simple aggregation that calculates the maximum of the `temperature` field. But you'll notice that it is being sent to the `sensor_rollup` -index instead of the raw `sensor-*` indices. And you'll also notice that it is using the `_rollup_search` endpoint. Otherwise the syntax +It's a simple aggregation that calculates the maximum of the `temperature` field. But you'll notice that it is being sent to the `sensor_rollup` +index instead of the raw `sensor-*` indices. And you'll also notice that it is using the `_rollup_search` endpoint. Otherwise the syntax is exactly as you'd expect. If you were to execute that query, you'd receive a result that looks like a normal aggregation response: @@ -197,11 +197,11 @@ If you were to execute that query, you'd receive a result that looks like a norm // TESTRESPONSE[s/"_shards" : \.\.\. /"_shards" : $body.$_path/] The only notable difference is that Rollup search results have zero `hits`, because we aren't really searching the original, live data any -more. Otherwise it's identical syntax. +more. Otherwise it's identical syntax. -There are a few interesting takeaways here. Firstly, even though the data was rolled up with hourly intervals and partitioned by -node name, the query we ran is just calculating the max temperature across all documents. The `groups` that were configured in the job -are not mandatory elements of a query, they are just extra dimensions you can partition on. Second, the request and response syntax +There are a few interesting takeaways here. Firstly, even though the data was rolled up with hourly intervals and partitioned by +node name, the query we ran is just calculating the max temperature across all documents. The `groups` that were configured in the job +are not mandatory elements of a query, they are just extra dimensions you can partition on. Second, the request and response syntax is nearly identical to normal DSL, making it easy to integrate into dashboards and applications. Finally, we can use those grouping fields we defined to construct a more complicated query: @@ -319,6 +319,6 @@ the date_histogram uses a `7d` interval instead of `60m`. [discrete] ==== Conclusion -This quickstart should have provided a concise overview of the core functionality that Rollup exposes. There are more tips and things -to consider when setting up Rollups, which you can find throughout the rest of this section. You may also explore the <> +This quickstart should have provided a concise overview of the core functionality that Rollup exposes. There are more tips and things +to consider when setting up Rollups, which you can find throughout the rest of this section. You may also explore the <> for an overview of what is available. diff --git a/docs/reference/rollup/rollup-search-limitations.asciidoc b/docs/reference/rollup/rollup-search-limitations.asciidoc index adc597d02e9c..5e03942c22b9 100644 --- a/docs/reference/rollup/rollup-search-limitations.asciidoc +++ b/docs/reference/rollup/rollup-search-limitations.asciidoc @@ -5,7 +5,7 @@ experimental[] -While we feel the Rollup function is extremely flexible, the nature of summarizing data means there will be some limitations. Once +While we feel the Rollup function is extremely flexible, the nature of summarizing data means there will be some limitations. Once live data is thrown away, you will always lose some flexibility. This page highlights the major limitations so that you are aware of them. @@ -13,32 +13,32 @@ This page highlights the major limitations so that you are aware of them. [discrete] ==== Only one {rollup} index per search -When using the <> endpoint, the `index` parameter accepts one or more indices. These can be a mix of regular, non-rollup -indices and rollup indices. However, only one rollup index can be specified. The exact list of rules for the `index` parameter are as +When using the <> endpoint, the `index` parameter accepts one or more indices. These can be a mix of regular, non-rollup +indices and rollup indices. However, only one rollup index can be specified. The exact list of rules for the `index` parameter are as follows: -- At least one index/index-pattern must be specified. This can be either a rollup or non-rollup index. Omitting the index parameter, +- At least one index/index-pattern must be specified. This can be either a rollup or non-rollup index. Omitting the index parameter, or using `_all`, is not permitted - Multiple non-rollup indices may be specified -- Only one rollup index may be specified. If more than one are supplied an exception will be thrown +- Only one rollup index may be specified. If more than one are supplied an exception will be thrown - Index patterns may be used, but if they match more than one rollup index an exception will be thrown. -This limitation is driven by the logic that decides which jobs are the "best" for any given query. If you have ten jobs stored in a single +This limitation is driven by the logic that decides which jobs are the "best" for any given query. If you have ten jobs stored in a single index, which cover the source data with varying degrees of completeness and different intervals, the query needs to determine which set -of jobs to actually search. Incorrect decisions can lead to inaccurate aggregation results (e.g. over-counting doc counts, or bad metrics). +of jobs to actually search. Incorrect decisions can lead to inaccurate aggregation results (e.g. over-counting doc counts, or bad metrics). Needless to say, this is a technically challenging piece of code. -To help simplify the problem, we have limited search to just one rollup index at a time (which may contain multiple jobs). In the future we +To help simplify the problem, we have limited search to just one rollup index at a time (which may contain multiple jobs). In the future we may be able to open this up to multiple rollup jobs. [discrete] [[aggregate-stored-only]] ==== Can only aggregate what's been stored -A perhaps obvious limitation, but rollups can only aggregate on data that has been stored in the rollups. If you don't configure the +A perhaps obvious limitation, but rollups can only aggregate on data that has been stored in the rollups. If you don't configure the rollup job to store metrics about the `price` field, you won't be able to use the `price` field in any query or aggregation. -For example, the `temperature` field in the following query has been stored in a rollup job... but not with an `avg` metric. Which means +For example, the `temperature` field in the following query has been stored in a rollup job... but not with an `avg` metric. Which means the usage of `avg` here is not allowed: [source,console] @@ -83,18 +83,18 @@ The response will tell you that the field and aggregation were not possible, bec [discrete] ==== Interval granularity -Rollups are stored at a certain granularity, as defined by the `date_histogram` group in the configuration. This means you +Rollups are stored at a certain granularity, as defined by the `date_histogram` group in the configuration. This means you can only search/aggregate the rollup data with an interval that is greater-than or equal to the configured rollup interval. For example, if data is rolled up at hourly intervals, the <> API can aggregate on any time interval -hourly or greater. Intervals that are less than an hour will throw an exception, since the data simply doesn't +hourly or greater. Intervals that are less than an hour will throw an exception, since the data simply doesn't exist for finer granularities. [[rollup-search-limitations-intervals]] .Requests must be multiples of the config ********************************** Perhaps not immediately apparent, but the interval specified in an aggregation request must be a whole -multiple of the configured interval. If the job was configured to rollup on `3d` intervals, you can only +multiple of the configured interval. If the job was configured to rollup on `3d` intervals, you can only query and aggregate on multiples of three (`3d`, `6d`, `9d`, etc). A non-multiple wouldn't work, since the rolled up data wouldn't cleanly "overlap" with the buckets generated @@ -113,7 +113,7 @@ with the largest interval to satisfy the search request. [discrete] ==== Limited querying components -The Rollup functionality allows `query`'s in the search request, but with a limited subset of components. The queries currently allowed are: +The Rollup functionality allows `query`'s in the search request, but with a limited subset of components. The queries currently allowed are: - Term Query - Terms Query @@ -125,11 +125,11 @@ Furthermore, these queries can only use fields that were also saved in the rollu If you wish to filter on a keyword `hostname` field, that field must have been configured in the rollup job under a `terms` grouping. If you attempt to use an unsupported query, or the query references a field that wasn't configured in the rollup job, an exception will be -thrown. We expect the list of support queries to grow over time as more are implemented. +thrown. We expect the list of support queries to grow over time as more are implemented. [discrete] ==== Timezones -Rollup documents are stored in the timezone of the `date_histogram` group configuration in the job. If no timezone is specified, the default +Rollup documents are stored in the timezone of the `date_histogram` group configuration in the job. If no timezone is specified, the default is to rollup timestamps in `UTC`. diff --git a/docs/reference/rollup/understanding-groups.asciidoc b/docs/reference/rollup/understanding-groups.asciidoc index face43cf9667..d740d59ba064 100644 --- a/docs/reference/rollup/understanding-groups.asciidoc +++ b/docs/reference/rollup/understanding-groups.asciidoc @@ -5,17 +5,17 @@ experimental[] -To preserve flexibility, Rollup Jobs are defined based on how future queries may need to use the data. Traditionally, systems force -the admin to make decisions about what metrics to rollup and on what interval. E.g. The average of `cpu_time` on an hourly basis. This +To preserve flexibility, Rollup Jobs are defined based on how future queries may need to use the data. Traditionally, systems force +the admin to make decisions about what metrics to rollup and on what interval. E.g. The average of `cpu_time` on an hourly basis. This is limiting; if, in the future, the admin wishes to see the average of `cpu_time` on an hourly basis _and_ partitioned by `host_name`, they are out of luck. Of course, the admin can decide to rollup the `[hour, host]` tuple on an hourly basis, but as the number of grouping keys grows, so do the -number of tuples the admin needs to configure. Furthermore, these `[hours, host]` tuples are only useful for hourly rollups... daily, weekly, +number of tuples the admin needs to configure. Furthermore, these `[hours, host]` tuples are only useful for hourly rollups... daily, weekly, or monthly rollups all require new configurations. Rather than force the admin to decide ahead of time which individual tuples should be rolled up, Elasticsearch's Rollup jobs are configured -based on which groups are potentially useful to future queries. For example, this configuration: +based on which groups are potentially useful to future queries. For example, this configuration: [source,js] -------------------------------------------------- @@ -39,7 +39,7 @@ based on which groups are potentially useful to future queries. For example, th Allows `date_histogram` to be used on the `"timestamp"` field, `terms` aggregations to be used on the `"hostname"` and `"datacenter"` fields, and `histograms` to be used on any of `"load"`, `"net_in"`, `"net_out"` fields. -Importantly, these aggs/fields can be used in any combination. This aggregation: +Importantly, these aggs/fields can be used in any combination. This aggregation: [source,js] -------------------------------------------------- @@ -100,8 +100,8 @@ is just as valid as this aggregation: You'll notice that the second aggregation is not only substantially larger, it also swapped the position of the terms aggregation on -`"hostname"`, illustrating how the order of aggregations does not matter to rollups. Similarly, while the `date_histogram` is required -for rolling up data, it isn't required while querying (although often used). For example, this is a valid aggregation for +`"hostname"`, illustrating how the order of aggregations does not matter to rollups. Similarly, while the `date_histogram` is required +for rolling up data, it isn't required while querying (although often used). For example, this is a valid aggregation for Rollup Search to execute: @@ -118,7 +118,7 @@ Rollup Search to execute: // NOTCONSOLE Ultimately, when configuring `groups` for a job, think in terms of how you might wish to partition data in a query at a future date... -then include those in the config. Because Rollup Search allows any order or combination of the grouped fields, you just need to decide +then include those in the config. Because Rollup Search allows any order or combination of the grouped fields, you just need to decide if a field is useful for aggregating later, and how you might wish to use it (terms, histogram, etc). [[rollup-understanding-group-intervals]] @@ -171,13 +171,13 @@ time in the future. ==== Grouping limitations with heterogeneous indices There was previously a limitation in how Rollup could handle indices that had heterogeneous mappings (multiple, unrelated/non-overlapping -mappings). The recommendation at the time was to configure a separate job per data "type". For example, you might configure a separate +mappings). The recommendation at the time was to configure a separate job per data "type". For example, you might configure a separate job for each Beats module that you had enabled (one for `process`, another for `filesystem`, etc). This recommendation was driven by internal implementation details that caused document counts to be potentially incorrect if a single "merged" job was used. -This limitation has since been alleviated. As of 6.4.0, it is now considered best practice to combine all rollup configurations +This limitation has since been alleviated. As of 6.4.0, it is now considered best practice to combine all rollup configurations into a single job. As an example, if your index has two types of documents: @@ -242,7 +242,7 @@ PUT _rollup/job/combined ==== Doc counts and overlapping jobs There was previously an issue with document counts on "overlapping" job configurations, driven by the same internal implementation detail. -If there were two Rollup jobs saving to the same index, where one job is a "subset" of another job, it was possible that document counts +If there were two Rollup jobs saving to the same index, where one job is a "subset" of another job, it was possible that document counts could be incorrect for certain aggregation arrangements. This issue has also since been eliminated in 6.4.0. diff --git a/docs/reference/scripting/fields.asciidoc b/docs/reference/scripting/fields.asciidoc index e83b54c93288..b8b62eabe72d 100644 --- a/docs/reference/scripting/fields.asciidoc +++ b/docs/reference/scripting/fields.asciidoc @@ -121,7 +121,7 @@ It cannot return JSON objects. The `doc['field']` will throw an error if `field` is missing from the mappings. In `painless`, a check can first be done with `doc.containsKey('field')` to guard -accessing the `doc` map. Unfortunately, there is no way to check for the +accessing the `doc` map. Unfortunately, there is no way to check for the existence of the field in mappings in an `expression` script. =================================================== @@ -133,7 +133,7 @@ existence of the field in mappings in an `expression` script. The `doc['field']` syntax can also be used for <> if <> is enabled, but *BEWARE*: enabling fielddata on a `text` field requires loading all of the terms into the JVM heap, which can be -very expensive both in terms of memory and CPU. It seldom makes sense to +very expensive both in terms of memory and CPU. It seldom makes sense to access `text` fields from scripts. =================================================== @@ -250,7 +250,7 @@ GET my-index-000001/_search ======================================================= The `_source` field is just a special stored field, so the performance is -similar to that of other stored fields. The `_source` provides access to the +similar to that of other stored fields. The `_source` provides access to the original document body that was indexed (including the ability to distinguish `null` values from empty fields, single-value arrays from plain scalars, etc). diff --git a/docs/reference/scripting/security.asciidoc b/docs/reference/scripting/security.asciidoc index a544812daa5c..db81f57a7d75 100644 --- a/docs/reference/scripting/security.asciidoc +++ b/docs/reference/scripting/security.asciidoc @@ -101,9 +101,9 @@ script.allowed_types: inline <1> [discrete] === Allowed script contexts setting -By default all script contexts are allowed to be executed. This can be modified using the -setting `script.allowed_contexts`. Only the contexts specified as part of the setting will -be allowed to be executed. To specify no contexts are allowed, set `script.allowed_contexts` +By default all script contexts are allowed to be executed. This can be modified using the +setting `script.allowed_contexts`. Only the contexts specified as part of the setting will +be allowed to be executed. To specify no contexts are allowed, set `script.allowed_contexts` to be `none`. [source,yaml] diff --git a/docs/reference/search/multi-search.asciidoc b/docs/reference/search/multi-search.asciidoc index 62d8c5c1fce5..0a37d185b04b 100644 --- a/docs/reference/search/multi-search.asciidoc +++ b/docs/reference/search/multi-search.asciidoc @@ -101,14 +101,14 @@ to +max(1, (# of <> * min(< Search results are returned, but were omitted here for brevity. -Even for a simple query, the response is relatively complicated. Let's break it +Even for a simple query, the response is relatively complicated. Let's break it down piece-by-piece before moving to more complex examples. @@ -221,18 +221,18 @@ aggregation execution. Because a search request may be executed against one or more shards in an index, and a search may cover one or more indices, the top level element in the profile response is an array of `shard` objects. Each shard object lists its `id` which -uniquely identifies the shard. The ID's format is +uniquely identifies the shard. The ID's format is `[nodeID][indexName][shardID]`. The profile itself may consist of one or more "searches", where a search is a -query executed against the underlying Lucene index. Most search requests +query executed against the underlying Lucene index. Most search requests submitted by the user will only execute a single `search` against the Lucene index. But occasionally multiple searches will be executed, such as including a global aggregation (which needs to execute a secondary "match_all" query for the global context). Inside each `search` object there will be two arrays of profiled information: -a `query` array and a `collector` array. Alongside the `search` object is an +a `query` array and a `collector` array. Alongside the `search` object is an `aggregations` object that contains the profile information for the aggregations. In the future, more sections may be added, such as `suggest`, `highlight`, etc. @@ -250,12 +250,12 @@ human readable timing information (e.g. `"time": "391,9ms"`, `"time": "123.3micr [NOTE] ======================================= The details provided by the Profile API directly expose Lucene class names and concepts, which means -that complete interpretation of the results require fairly advanced knowledge of Lucene. This +that complete interpretation of the results require fairly advanced knowledge of Lucene. This page attempts to give a crash-course in how Lucene executes queries so that you can use the Profile API to successfully -diagnose and debug queries, but it is only an overview. For complete understanding, please refer +diagnose and debug queries, but it is only an overview. For complete understanding, please refer to Lucene's documentation and, in places, the code. -With that said, a complete understanding is often not required to fix a slow query. It is usually +With that said, a complete understanding is often not required to fix a slow query. It is usually sufficient to see that a particular component of a query is slow, and not necessarily understand why the `advance` phase of that query is the cause, for example. ======================================= @@ -266,7 +266,7 @@ the `advance` phase of that query is the cause, for example. The `query` section contains detailed timing of the query tree executed by Lucene on a particular shard. The overall structure of this query tree will resemble your original Elasticsearch query, but may be slightly (or sometimes -very) different. It will also use similar but not always identical naming. +very) different. It will also use similar but not always identical naming. Using our previous `match` query example, let's analyze the `query` section: [source,console-result] @@ -301,21 +301,21 @@ Using our previous `match` query example, let's analyze the `query` section: <1> The breakdown timings are omitted for simplicity. Based on the profile structure, we can see that our `match` query was rewritten -by Lucene into a BooleanQuery with two clauses (both holding a TermQuery). The +by Lucene into a BooleanQuery with two clauses (both holding a TermQuery). The `type` field displays the Lucene class name, and often aligns with the -equivalent name in Elasticsearch. The `description` field displays the Lucene +equivalent name in Elasticsearch. The `description` field displays the Lucene explanation text for the query, and is made available to help differentiating between parts of your query (e.g. both `message:get` and `message:search` are TermQuery's and would appear identical otherwise. The `time_in_nanos` field shows that this query took ~11.9ms for the entire -BooleanQuery to execute. The recorded time is inclusive of all children. +BooleanQuery to execute. The recorded time is inclusive of all children. The `breakdown` field will give detailed stats about how the time was spent, -we'll look at that in a moment. Finally, the `children` array lists any -sub-queries that may be present. Because we searched for two values ("get -search"), our BooleanQuery holds two children TermQueries. They have identical -information (type, time, breakdown, etc). Children are allowed to have their +we'll look at that in a moment. Finally, the `children` array lists any +sub-queries that may be present. Because we searched for two values ("get +search"), our BooleanQuery holds two children TermQueries. They have identical +information (type, time, breakdown, etc). Children are allowed to have their own children. ===== Timing Breakdown @@ -351,7 +351,7 @@ Lucene execution: // TESTRESPONSE[s/(?<=[" ])\d+(\.\d+)?/$body.$_path/] Timings are listed in wall-clock nanoseconds and are not normalized at all. All -caveats about the overall `time_in_nanos` apply here. The intention of the +caveats about the overall `time_in_nanos` apply here. The intention of the breakdown is to give you a feel for A) what machinery in Lucene is actually eating time, and B) the magnitude of differences in times between the various components. Like the overall time, the breakdown is inclusive of all children @@ -366,20 +366,20 @@ The meaning of the stats are as follows: `create_weight`:: A Query in Lucene must be capable of reuse across multiple IndexSearchers (think of it as the engine that - executes a search against a specific Lucene Index). This puts Lucene in a tricky spot, since many queries + executes a search against a specific Lucene Index). This puts Lucene in a tricky spot, since many queries need to accumulate temporary state/statistics associated with the index it is being used against, but the Query contract mandates that it must be immutable. {empty} + {empty} + To get around this, Lucene asks each query to generate a Weight object which acts as a temporary context - object to hold state associated with this particular (IndexSearcher, Query) tuple. The `weight` metric + object to hold state associated with this particular (IndexSearcher, Query) tuple. The `weight` metric shows how long this process takes `build_scorer`:: - This parameter shows how long it takes to build a Scorer for the query. A Scorer is the mechanism that + This parameter shows how long it takes to build a Scorer for the query. A Scorer is the mechanism that iterates over matching documents and generates a score per-document (e.g. how well does "foo" match the document?). - Note, this records the time required to generate the Scorer object, not actually score the documents. Some + Note, this records the time required to generate the Scorer object, not actually score the documents. Some queries have faster or slower initialization of the Scorer, depending on optimizations, complexity, etc. {empty} + {empty} + @@ -387,10 +387,10 @@ The meaning of the stats are as follows: `next_doc`:: - The Lucene method `next_doc` returns Doc ID of the next document matching the query. This statistic shows + The Lucene method `next_doc` returns Doc ID of the next document matching the query. This statistic shows the time it takes to determine which document is the next match, a process that varies considerably depending - on the nature of the query. Next_doc is a specialized form of advance() which is more convenient for many - queries in Lucene. It is equivalent to advance(docId() + 1) + on the nature of the query. Next_doc is a specialized form of advance() which is more convenient for many + queries in Lucene. It is equivalent to advance(docId() + 1) `advance`:: @@ -403,13 +403,13 @@ The meaning of the stats are as follows: `match`:: - Some queries, such as phrase queries, match documents using a "two-phase" process. First, the document is + Some queries, such as phrase queries, match documents using a "two-phase" process. First, the document is "approximately" matched, and if it matches approximately, it is checked a second time with a more rigorous - (and expensive) process. The second phase verification is what the `match` statistic measures. + (and expensive) process. The second phase verification is what the `match` statistic measures. {empty} + {empty} + For example, a phrase query first checks a document approximately by ensuring all terms in the phrase are - present in the doc. If all the terms are present, it then executes the second phase verification to ensure + present in the doc. If all the terms are present, it then executes the second phase verification to ensure the terms are in-order to form the phrase, which is relatively more expensive than just checking for presence of the terms. {empty} + @@ -421,8 +421,8 @@ The meaning of the stats are as follows: This records the time taken to score a particular document via its Scorer `*_count`:: - Records the number of invocations of the particular method. For example, `"next_doc_count": 2,` - means the `nextDoc()` method was called on two different documents. This can be used to help judge + Records the number of invocations of the particular method. For example, `"next_doc_count": 2,` + means the `nextDoc()` method was called on two different documents. This can be used to help judge how selective queries are, by comparing counts between different query components. @@ -431,7 +431,7 @@ The meaning of the stats are as follows: The Collectors portion of the response shows high-level execution details. Lucene works by defining a "Collector" which is responsible for coordinating the -traversal, scoring, and collection of matching documents. Collectors are also +traversal, scoring, and collection of matching documents. Collectors are also how a single query can record aggregation results, execute unscoped "global" queries, execute post-query filters, etc. @@ -455,14 +455,14 @@ Looking at the previous example: We see a single collector named `SimpleTopScoreDocCollector` wrapped into `CancellableCollector`. `SimpleTopScoreDocCollector` is the default "scoring and sorting" `Collector` used by {es}. The `reason` field attempts to give a plain -English description of the class name. The `time_in_nanos` is similar to the +English description of the class name. The `time_in_nanos` is similar to the time in the Query tree: a wall-clock time inclusive of all children. Similarly, `children` lists all sub-collectors. The `CancellableCollector` that wraps `SimpleTopScoreDocCollector` is used by {es} to detect if the current search was cancelled and stop collecting documents as soon as it occurs. It should be noted that Collector times are **independent** from the Query -times. They are calculated, combined, and normalized independently! Due to the +times. They are calculated, combined, and normalized independently! Due to the nature of Lucene's execution, it is impossible to "merge" the times from the Collectors into the Query section, so they are displayed in separate portions. @@ -471,7 +471,7 @@ For reference, the various collector reasons are: [horizontal] `search_sorted`:: - A collector that scores and sorts documents. This is the most common collector and will be seen in most + A collector that scores and sorts documents. This is the most common collector and will be seen in most simple searches `search_count`:: @@ -481,27 +481,27 @@ For reference, the various collector reasons are: `search_terminate_after_count`:: - A collector that terminates search execution after `n` matching documents have been found. This is seen + A collector that terminates search execution after `n` matching documents have been found. This is seen when the `terminate_after_count` query parameter has been specified `search_min_score`:: - A collector that only returns matching documents that have a score greater than `n`. This is seen when + A collector that only returns matching documents that have a score greater than `n`. This is seen when the top-level parameter `min_score` has been specified. `search_multi`:: - A collector that wraps several other collectors. This is seen when combinations of search, aggregations, + A collector that wraps several other collectors. This is seen when combinations of search, aggregations, global aggs, and post_filters are combined in a single search. `search_timeout`:: - A collector that halts execution after a specified period of time. This is seen when a `timeout` top-level + A collector that halts execution after a specified period of time. This is seen when a `timeout` top-level parameter has been specified. `aggregation`:: - A collector that Elasticsearch uses to run aggregations against the query scope. A single `aggregation` + A collector that Elasticsearch uses to run aggregations against the query scope. A single `aggregation` collector is used to collect documents for *all* aggregations, so you will see a list of aggregations in the name rather. @@ -515,9 +515,9 @@ For reference, the various collector reasons are: [[rewrite-section]] ===== `rewrite` Section -All queries in Lucene undergo a "rewriting" process. A query (and its +All queries in Lucene undergo a "rewriting" process. A query (and its sub-queries) may be rewritten one or more times, and the process continues until -the query stops changing. This process allows Lucene to perform optimizations, +the query stops changing. This process allows Lucene to perform optimizations, such as removing redundant clauses, replacing one query for a more efficient execution path, etc. For example a Boolean -> Boolean -> TermQuery can be rewritten to a TermQuery, because all the Booleans are unnecessary in this case. @@ -686,7 +686,7 @@ The API returns the following result: <1> The `"aggregations"` portion has been omitted because it will be covered in the next section. -As you can see, the output is significantly more verbose than before. All the +As you can see, the output is significantly more verbose than before. All the major portions of the query are represented: 1. The first `TermQuery` (user.id:elkbee) represents the main `term` query. @@ -705,16 +705,16 @@ verbose responses, and are not overly structured. Essentially, these queries rewrite themselves on a per-segment basis. If you imagine the wildcard query `b*`, it technically can match any token that begins -with the letter "b". It would be impossible to enumerate all possible +with the letter "b". It would be impossible to enumerate all possible combinations, so Lucene rewrites the query in context of the segment being evaluated, e.g., one segment may contain the tokens `[bar, baz]`, so the query -rewrites to a BooleanQuery combination of "bar" and "baz". Another segment may +rewrites to a BooleanQuery combination of "bar" and "baz". Another segment may only have the token `[bakery]`, so the query rewrites to a single TermQuery for "bakery". Due to this dynamic, per-segment rewriting, the clean tree structure becomes distorted and no longer follows a clean "lineage" showing how one query rewrites -into the next. At present time, all we can do is apologize, and suggest you +into the next. At present time, all we can do is apologize, and suggest you collapse the details for that query's children if it is too confusing. Luckily, all the timing statistics are correct, just not the physical layout in the response, so it is sufficient to just analyze the top-level MultiTermQuery and @@ -869,7 +869,7 @@ which comes from `my_global_agg`. That aggregation then has a child `NumericTermsAggregator` which comes from the second term's aggregation on `http.response.status_code`. The `time_in_nanos` field shows the time executed by each aggregation, and is -inclusive of all children. While the overall time is useful, the `breakdown` +inclusive of all children. While the overall time is useful, the `breakdown` field will give detailed stats about how the time was spent. Some aggregations may return expert `debug` information that describe features @@ -908,7 +908,7 @@ method. For example, `"collect_count": 2` means the aggregation called the future use and always returns `0`. Timings are listed in wall-clock nanoseconds and are not normalized at all. All -caveats about the overall `time` apply here. The intention of the breakdown is +caveats about the overall `time` apply here. The intention of the breakdown is to give you a feel for A) what machinery in {es} is actually eating time, and B) the magnitude of differences in times between the various components. Like the overall time, the breakdown is inclusive of all children times. @@ -919,7 +919,7 @@ overall time, the breakdown is inclusive of all children times. Like any profiler, the Profile API introduces a non-negligible overhead to search execution. The act of instrumenting low-level method calls such as `collect`, `advance`, and `next_doc` can be fairly expensive, since these -methods are called in tight loops. Therefore, profiling should not be enabled +methods are called in tight loops. Therefore, profiling should not be enabled in production settings by default, and should not be compared against non-profiled query times. Profiling is just a diagnostic tool. diff --git a/docs/reference/search/search-template.asciidoc b/docs/reference/search/search-template.asciidoc index cec025de6eb5..02f2bf6d5c69 100644 --- a/docs/reference/search/search-template.asciidoc +++ b/docs/reference/search/search-template.asciidoc @@ -535,7 +535,7 @@ for `end`: ===== Conditional clauses Conditional clauses cannot be expressed using the JSON form of the template. -Instead, the template *must* be passed as a string. For instance, let's say +Instead, the template *must* be passed as a string. For instance, let's say we wanted to run a `match` query on the `line` field, and optionally wanted to filter by line numbers, where `start` and `end` are optional. diff --git a/docs/reference/search/search-your-data/collapse-search-results.asciidoc b/docs/reference/search/search-your-data/collapse-search-results.asciidoc index e4f8c8b100e0..013064b5a91e 100644 --- a/docs/reference/search/search-your-data/collapse-search-results.asciidoc +++ b/docs/reference/search/search-your-data/collapse-search-results.asciidoc @@ -74,7 +74,7 @@ GET /my-index-000001/_search See <> for the complete list of supported options and the format of the response. -It is also possible to request multiple `inner_hits` for each collapsed hit. This can be useful when you want to get +It is also possible to request multiple `inner_hits` for each collapsed hit. This can be useful when you want to get multiple representations of the collapsed hits. [source,console] @@ -111,7 +111,7 @@ GET /my-index-000001/_search <3> return the three most recent HTTP responses for the user The expansion of the group is done by sending an additional query for each -`inner_hit` request for each collapsed hit returned in the response. This can significantly slow things down +`inner_hit` request for each collapsed hit returned in the response. This can significantly slow things down if you have too many groups and/or `inner_hit` requests. The `max_concurrent_group_searches` request parameter can be used to control diff --git a/docs/reference/search/search-your-data/filter-search-results.asciidoc b/docs/reference/search/search-your-data/filter-search-results.asciidoc index 2704f1d11412..abc2749cb9b1 100644 --- a/docs/reference/search/search-your-data/filter-search-results.asciidoc +++ b/docs/reference/search/search-your-data/filter-search-results.asciidoc @@ -50,8 +50,8 @@ PUT /shirts/_doc/1?refresh Imagine a user has specified two filters: -`color:red` and `brand:gucci`. You only want to show them red shirts made by -Gucci in the search results. Normally you would do this with a +`color:red` and `brand:gucci`. You only want to show them red shirts made by +Gucci in the search results. Normally you would do this with a <>: [source,console] @@ -70,7 +70,7 @@ GET /shirts/_search -------------------------------------------------- However, you would also like to use _faceted navigation_ to display a list of -other options that the user could click on. Perhaps you have a `model` field +other options that the user could click on. Perhaps you have a `model` field that would allow the user to limit their search results to red Gucci `t-shirts` or `dress-shirts`. @@ -105,7 +105,7 @@ available in *other colors*. If you just add a `terms` aggregation on the returns only red shirts by Gucci. Instead, you want to include shirts of all colors during aggregation, then -apply the `colors` filter only to the search results. This is the purpose of +apply the `colors` filter only to the search results. This is the purpose of the `post_filter`: [source,console] @@ -226,8 +226,8 @@ The way the scores are combined can be controlled with the `score_mode`: [cols="<,<",options="header",] |======================================================================= |Score Mode |Description -|`total` |Add the original score and the rescore query score. The default. -|`multiply` |Multiply the original score by the rescore query score. Useful +|`total` |Add the original score and the rescore query score. The default. +|`multiply` |Multiply the original score by the rescore query score. Useful for <> rescores. |`avg` |Average the original score and the rescore query score. |`max` |Take the max of original score and the rescore query score. @@ -286,6 +286,6 @@ POST /_search // TEST[setup:my_index] The first one gets the results of the query then the second one gets the -results of the first, etc. The second rescore will "see" the sorting done +results of the first, etc. The second rescore will "see" the sorting done by the first rescore so it is possible to use a large window on the first rescore to pull documents into a smaller window for the second rescore. diff --git a/docs/reference/search/search-your-data/highlighting.asciidoc b/docs/reference/search/search-your-data/highlighting.asciidoc index 28691934b553..37de06387a3d 100644 --- a/docs/reference/search/search-your-data/highlighting.asciidoc +++ b/docs/reference/search/search-your-data/highlighting.asciidoc @@ -72,10 +72,10 @@ The `fvh` highlighter uses the Lucene Fast Vector highlighter. This highlighter can be used on fields with `term_vector` set to `with_positions_offsets` in the mapping. The fast vector highlighter: -* Can be customized with a <>. +* Can be customized with a <>. * Requires setting `term_vector` to `with_positions_offsets` which increases the size of the index -* Can combine matches from multiple fields into one result. See +* Can combine matches from multiple fields into one result. See `matched_fields` * Can assign different weights to matches at different positions allowing for things like phrase matches being sorted above term matches when @@ -137,7 +137,7 @@ boundary_scanner:: Specifies how to break the highlighted fragments: `chars`, Defaults to `sentence` for the `unified` highlighter. Defaults to `chars` for the `fvh` highlighter. `chars`::: Use the characters specified by `boundary_chars` as highlighting -boundaries. The `boundary_max_scan` setting controls how far to scan for +boundaries. The `boundary_max_scan` setting controls how far to scan for boundary characters. Only valid for the `fvh` highlighter. `sentence`::: Break highlighted fragments at the next sentence boundary, as determined by Java's @@ -200,7 +200,7 @@ include the search query as part of the `highlight_query`. matched_fields:: Combine matches on multiple fields to highlight a single field. This is most intuitive for multifields that analyze the same string in different -ways. All `matched_fields` must have `term_vector` set to +ways. All `matched_fields` must have `term_vector` set to `with_positions_offsets`, but only the field to which the matches are combined is loaded so only that field benefits from having `store` set to `yes`. Only valid for the `fvh` highlighter. @@ -216,7 +216,7 @@ handy when you need to highlight short texts such as a title or address, but fragmentation is not required. If `number_of_fragments` is 0, `fragment_size` is ignored. Defaults to 5. -order:: Sorts highlighted fragments by score when set to `score`. By default, +order:: Sorts highlighted fragments by score when set to `score`. By default, fragments will be output in the order they appear in the field (order: `none`). Setting this option to `score` will output the most relevant fragments first. Each highlighter applies its own logic to compute relevancy scores. See @@ -527,8 +527,8 @@ GET /_search WARNING: This is only supported by the `fvh` highlighter The Fast Vector Highlighter can combine matches on multiple fields to -highlight a single field. This is most intuitive for multifields that -analyze the same string in different ways. All `matched_fields` must have +highlight a single field. This is most intuitive for multifields that +analyze the same string in different ways. All `matched_fields` must have `term_vector` set to `with_positions_offsets` but only the field to which the matches are combined is loaded so only that field would benefit from having `store` set to `yes`. @@ -622,7 +622,7 @@ it is just fine not to list the field to which the matches are combined [NOTE] Technically it is also fine to add fields to `matched_fields` that don't share the same underlying string as the field to which the matches -are combined. The results might not make much sense and if one of the +are combined. The results might not make much sense and if one of the matches is off the end of the text then the whole query will fail. [NOTE] @@ -658,7 +658,7 @@ to [discrete] == Explicitly order highlighted fields Elasticsearch highlights the fields in the order that they are sent, but per the -JSON spec, objects are unordered. If you need to be explicit about the order +JSON spec, objects are unordered. If you need to be explicit about the order in which fields are highlighted specify the `fields` as an array: [source,console] diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index c1972d4815c0..6f2d6bd1e1a8 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -267,7 +267,7 @@ JavaScript:: ********************************************* NOTE: The results that are returned from a scroll request reflect the state of -the data stream or index at the time that the initial `search` request was made, like a +the data stream or index at the time that the initial `search` request was made, like a snapshot in time. Subsequent changes to documents (index, update or delete) will only affect later search requests. @@ -310,7 +310,7 @@ POST /_search/scroll <3> The `scroll_id` parameter The `size` parameter allows you to configure the maximum number of hits to be -returned with each batch of results. Each call to the `scroll` API returns the +returned with each batch of results. Each call to the `scroll` API returns the next batch of results until there are no more results left to return, ie the `hits` array is empty. @@ -351,7 +351,7 @@ request) tells Elasticsearch how long it should keep the search context alive. Its value (e.g. `1m`, see <>) does not need to be long enough to process all data -- it just needs to be long enough to process the previous batch of results. Each `scroll` request (with the `scroll` parameter) sets a -new expiry time. If a `scroll` request doesn't pass in the `scroll` +new expiry time. If a `scroll` request doesn't pass in the `scroll` parameter, then the search context will be freed as part of _that_ `scroll` request. diff --git a/docs/reference/search/search-your-data/sort-search-results.asciidoc b/docs/reference/search/search-your-data/sort-search-results.asciidoc index 023c4de5ef4b..be0da5ca5130 100644 --- a/docs/reference/search/search-your-data/sort-search-results.asciidoc +++ b/docs/reference/search/search-your-data/sort-search-results.asciidoc @@ -97,7 +97,7 @@ to. The `mode` option can have the following values: number based array fields. `avg`:: Use the average of all values as sort value. Only applicable for number based array fields. -`median`:: Use the median of all values as sort value. Only applicable +`median`:: Use the median of all values as sort value. Only applicable for number based array fields. The default sort mode in the ascending sort order is `min` -- the lowest value diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 471810c26dde..1e916ba9ce1e 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -334,7 +334,7 @@ pattern. (Optional, string) Format in which the doc values are returned. + -For <>, you can specify a date <>, you can specify a date <>. For <> fields, you can specify a https://docs.oracle.com/javase/8/docs/api/java/text/DecimalFormat.html[DecimalFormat pattern]. diff --git a/docs/reference/search/suggesters/completion-suggest.asciidoc b/docs/reference/search/suggesters/completion-suggest.asciidoc index 5f590b0fe630..0ba27e7d9074 100644 --- a/docs/reference/search/suggesters/completion-suggest.asciidoc +++ b/docs/reference/search/suggesters/completion-suggest.asciidoc @@ -377,7 +377,7 @@ The following parameters are supported: If `true`, all measurements (like fuzzy edit distance, transpositions, and lengths) are measured in Unicode code points instead of - in bytes. This is slightly slower than raw + in bytes. This is slightly slower than raw bytes, so it is set to `false` by default. NOTE: If you want to stick with the default values, but @@ -418,6 +418,6 @@ The following parameters are supported: Regular expressions are dangerous because it's easy to accidentally create an innocuous looking one that requires an exponential number of internal determinized automaton states (and corresponding RAM and CPU) - for Lucene to execute. Lucene prevents these using the - `max_determinized_states` setting (defaults to 10000). You can raise + for Lucene to execute. Lucene prevents these using the + `max_determinized_states` setting (defaults to 10000). You can raise this limit to allow more complex regular expressions to execute. diff --git a/docs/reference/search/suggesters/phrase-suggest.asciidoc b/docs/reference/search/suggesters/phrase-suggest.asciidoc index dfece161e25b..921510b2ad5b 100644 --- a/docs/reference/search/suggesters/phrase-suggest.asciidoc +++ b/docs/reference/search/suggesters/phrase-suggest.asciidoc @@ -165,7 +165,7 @@ The response contains suggestions scored by the most likely spelling correction accepts a float value in the range `[0..1)` as a fraction of the actual query terms or a number `>=1` as an absolute number of query terms. The default is set to `1.0`, meaning only corrections with - at most one misspelled term are returned. Note that setting this too high + at most one misspelled term are returned. Note that setting this too high can negatively impact performance. Low values like `1` or `2` are recommended; otherwise the time spend in suggest calls might exceed the time spend in query execution. @@ -195,10 +195,10 @@ The response contains suggestions scored by the most likely spelling correction Sets the text / query to provide suggestions for. `highlight`:: - Sets up suggestion highlighting. If not provided then - no `highlighted` field is returned. If provided must + Sets up suggestion highlighting. If not provided then + no `highlighted` field is returned. If provided must contain exactly `pre_tag` and `post_tag`, which are - wrapped around the changed tokens. If multiple tokens + wrapped around the changed tokens. If multiple tokens in a row are changed the entire phrase of changed tokens is wrapped rather than each token. @@ -209,7 +209,7 @@ The response contains suggestions scored by the most likely spelling correction been generated from. The `query` must be specified and it can be templated, see <> for more information. The current suggestion is automatically made available as the `{{suggestion}}` - variable, which should be used in your query. You can still specify + variable, which should be used in your query. You can still specify your own template `params` -- the `suggestion` value will be added to the variables you specify. Additionally, you can specify a `prune` to control if all phrase suggestions will be returned; when set to `true` the suggestions diff --git a/docs/reference/settings/monitoring-settings.asciidoc b/docs/reference/settings/monitoring-settings.asciidoc index 33fe03ee0a0a..832e574a10cc 100644 --- a/docs/reference/settings/monitoring-settings.asciidoc +++ b/docs/reference/settings/monitoring-settings.asciidoc @@ -148,7 +148,7 @@ automatically upgrade bulk requests to future-proof them. `cluster_alerts.management.enabled`:: Whether to create cluster alerts for this cluster. The default value is `true`. -To use this feature, {watcher} must be enabled. If you have a basic license, +To use this feature, {watcher} must be enabled. If you have a basic license, cluster alerts are not displayed. `wait_master.timeout`:: diff --git a/docs/reference/settings/notification-settings.asciidoc b/docs/reference/settings/notification-settings.asciidoc index 57429e1ed06c..e4569fa46b04 100644 --- a/docs/reference/settings/notification-settings.asciidoc +++ b/docs/reference/settings/notification-settings.asciidoc @@ -74,7 +74,7 @@ Specifies the maximum size an HTTP response is allowed to have, defaults to (<>) A list of URLs, that the internal HTTP client is allowed to connect to. This client is used in the HTTP input, the webhook, the slack, pagerduty, -and jira actions. This setting can be updated dynamically. It defaults to `*` +and jira actions. This setting can be updated dynamically. It defaults to `*` allowing everything. Note: If you configure this setting and you are using one of the slack/pagerduty actions, you have to ensure that the corresponding endpoints are explicitly allowed as well. diff --git a/docs/reference/settings/security-settings.asciidoc b/docs/reference/settings/security-settings.asciidoc index e5da34c62d57..b0ad720fb6b3 100644 --- a/docs/reference/settings/security-settings.asciidoc +++ b/docs/reference/settings/security-settings.asciidoc @@ -46,7 +46,7 @@ recommend that you explicitly add this setting to avoid confusion. (<>) A comma-separated list of settings that are omitted from the results of the <>. You can use wildcards to include -multiple settings in the list. For example, the following value hides all the +multiple settings in the list. For example, the following value hides all the settings for the ad1 active_directory realm: `xpack.security.authc.realms.active_directory.ad1.*`. The API already omits all `ssl` settings, `bind_dn`, and `bind_password` due to @@ -54,7 +54,7 @@ the sensitive nature of the information. `xpack.security.fips_mode.enabled`:: (<>) -Enables fips mode of operation. Set this to `true` if you run this {es} instance in a FIPS 140-2 enabled JVM. For more information, see <>. Defaults to `false`. +Enables fips mode of operation. Set this to `true` if you run this {es} instance in a FIPS 140-2 enabled JVM. For more information, see <>. Defaults to `false`. [discrete] [[password-hashing-settings]] @@ -70,7 +70,7 @@ See <>. Defaults to `bcrypt`. ==== Anonymous access settings You can configure the following anonymous access settings in -`elasticsearch.yml`. For more information, see <>. +`elasticsearch.yml`. For more information, see <>. `xpack.security.authc.anonymous.username`:: (<>) @@ -470,14 +470,14 @@ only group considered. Defaults to `sub_tree`. (<>) Specifies a filter to use to look up a group. When not set, the realm searches for `group`, `groupOfNames`, `groupOfUniqueNames`, -or `posixGroup` with the attributes `member`, `memberOf`, or `memberUid`. Any +or `posixGroup` with the attributes `member`, `memberOf`, or `memberUid`. Any instance of `{0}` in the filter is replaced by the user attribute defined in `group_search.user_attribute`. `group_search.user_attribute`:: (<>) Specifies the user attribute that is fetched and provided as a parameter to -the filter. If not set, the user DN is passed into the filter. Defaults to Empty. +the filter. If not set, the user DN is passed into the filter. Defaults to Empty. `unmapped_groups_as_roles`:: (<>) @@ -726,7 +726,7 @@ only user considered. Defaults to `sub_tree`. `user_search.filter`:: (<>) -Specifies a filter to use to lookup a user given a username. The default +Specifies a filter to use to lookup a user given a username. The default filter looks up `user` objects with either `sAMAccountName` or `userPrincipalName`. If specified, this must be a valid LDAP user search filter. For example `(&(objectClass=user)(sAMAccountName={0}))`. For more information, @@ -788,7 +788,7 @@ Defaults to `60s`. `group_search.base_dn`:: (<>) -The context to search for groups in which the user has membership. Defaults +The context to search for groups in which the user has membership. Defaults to the root of the Active Directory domain. `group_search.scope`:: @@ -813,7 +813,7 @@ Defaults to `5s` (5 seconds ). `timeout.tcp_read`:: (<>) deprecated[7.7] The TCP read timeout period after establishing an LDAP -connection. This is equivalent to and is deprecated in favor of +connection. This is equivalent to and is deprecated in favor of `timeout.response` and they cannot be used simultaneously. An `s` at the end indicates seconds, or `ms` indicates milliseconds. Defaults to the value of `timeout.ldap_search`. @@ -991,7 +991,7 @@ for SSL. This setting cannot be used with `certificate_authorities`. `files.role_mapping`:: (<>) Specifies the <> of the -<>. +<>. Defaults to `ES_PATH_CONF/role_mapping.yml`. `authorization_realms`:: @@ -1167,7 +1167,7 @@ As per `attribute_patterns.principal`, but for the _dn_ property. `nameid_format` {ess-icon}:: (<>) The NameID format that should be requested when asking the IdP to authenticate -the current user. The default is to not include the `nameid_format` attribute. +the current user. The default is to not include the `nameid_format` attribute. // end::saml-nameid-format-tag[] // tag::saml-nameid-allow-create-tag[] diff --git a/docs/reference/setup/bootstrap-checks-xes.asciidoc b/docs/reference/setup/bootstrap-checks-xes.asciidoc index 12339e4475eb..bc187e668d0c 100644 --- a/docs/reference/setup/bootstrap-checks-xes.asciidoc +++ b/docs/reference/setup/bootstrap-checks-xes.asciidoc @@ -57,7 +57,7 @@ If you enable {es} {security-features}, unless you have a trial license, you must configure SSL/TLS for internode-communication. NOTE: Single-node clusters that use a loopback interface do not have this -requirement. For more information, see +requirement. For more information, see <>. To pass this bootstrap check, you must diff --git a/docs/reference/setup/bootstrap-checks.asciidoc b/docs/reference/setup/bootstrap-checks.asciidoc index ff5261f4bde7..8cbab73603f5 100644 --- a/docs/reference/setup/bootstrap-checks.asciidoc +++ b/docs/reference/setup/bootstrap-checks.asciidoc @@ -228,7 +228,7 @@ release build of the JVM. Early versions of the HotSpot JVM that shipped with JDK 8 are known to have issues that can lead to index corruption when the G1GC collector is -enabled. The versions impacted are those earlier than the version of +enabled. The versions impacted are those earlier than the version of HotSpot that shipped with JDK 8u40. The G1GC check detects these early versions of the HotSpot JVM. diff --git a/docs/reference/setup/install.asciidoc b/docs/reference/setup/install.asciidoc index 1fbce2987587..787473803283 100644 --- a/docs/reference/setup/install.asciidoc +++ b/docs/reference/setup/install.asciidoc @@ -31,7 +31,7 @@ The `zip` archive is suitable for installation on Windows. `deb`:: The `deb` package is suitable for Debian, Ubuntu, and other Debian-based -systems. Debian packages may be downloaded from the Elasticsearch website or +systems. Debian packages may be downloaded from the Elasticsearch website or from our Debian repository. + <> @@ -39,7 +39,7 @@ from our Debian repository. `rpm`:: The `rpm` package is suitable for installation on Red Hat, Centos, SLES, -OpenSuSE and other RPM-based systems. RPMs may be downloaded from the +OpenSuSE and other RPM-based systems. RPMs may be downloaded from the Elasticsearch website or from our RPM repository. + <> diff --git a/docs/reference/setup/install/deb.asciidoc b/docs/reference/setup/install/deb.asciidoc index 5ead757b9abd..23d51ede8d8a 100644 --- a/docs/reference/setup/install/deb.asciidoc +++ b/docs/reference/setup/install/deb.asciidoc @@ -157,7 +157,7 @@ include::sysconfig-file.asciidoc[] NOTE: Distributions that use `systemd` require that system resource limits be configured via `systemd` rather than via the `/etc/sysconfig/elasticsearch` -file. See <> for more information. +file. See <> for more information. [[deb-layout]] ==== Directory layout of Debian package diff --git a/docs/reference/setup/install/docker.asciidoc b/docs/reference/setup/install/docker.asciidoc index d0282c814a7f..e627c122edbc 100644 --- a/docs/reference/setup/install/docker.asciidoc +++ b/docs/reference/setup/install/docker.asciidoc @@ -89,7 +89,7 @@ potentially ignoring any firewall settings. If you don't want to expose port 920 a reverse proxy, replace `9200:9200` with `127.0.0.1:9200:9200` in the docker-compose.yml file. {es} will then only be accessible from the host machine itself. -The https://docs.docker.com/storage/volumes[Docker named volumes] +The https://docs.docker.com/storage/volumes[Docker named volumes] `data01`, `data02`, and `data03` store the node data directories so the data persists across restarts. If they don't already exist, `docker-compose` creates them when you bring up the cluster. -- @@ -163,7 +163,7 @@ sysctl -w vm.max_map_count=262144 -------------------------------------------- -- -* macOS with https://docs.docker.com/docker-for-mac[Docker for Mac] +* macOS with https://docs.docker.com/docker-for-mac[Docker for Mac] + -- The `vm.max_map_count` setting must be set within the xhyve virtual machine: @@ -308,7 +308,7 @@ example +docker.elastic.co/elasticsearch/elasticsearch:{version}+. You should use a volume bound on `/usr/share/elasticsearch/data` for the following reasons: -. The data of your {es} node won't be lost if the container is killed +. The data of your {es} node won't be lost if the container is killed . {es} is I/O sensitive and the Docker storage driver is not ideal for fast I/O diff --git a/docs/reference/setup/install/etc-elasticsearch.asciidoc b/docs/reference/setup/install/etc-elasticsearch.asciidoc index e36a075c127f..5adb6543503a 100644 --- a/docs/reference/setup/install/etc-elasticsearch.asciidoc +++ b/docs/reference/setup/install/etc-elasticsearch.asciidoc @@ -10,5 +10,5 @@ Running commands from this directory or any subdirectories, such as the permissions. Elasticsearch loads its configuration from the -`/etc/elasticsearch/elasticsearch.yml` file by default. The format of this +`/etc/elasticsearch/elasticsearch.yml` file by default. The format of this config file is explained in <>. diff --git a/docs/reference/setup/install/next-steps.asciidoc b/docs/reference/setup/install/next-steps.asciidoc index e52cdfee077e..e8674b0d19ea 100644 --- a/docs/reference/setup/install/next-steps.asciidoc +++ b/docs/reference/setup/install/next-steps.asciidoc @@ -1,7 +1,7 @@ [role="exclude"] ==== Next steps -You now have a test {es} environment set up. Before you start +You now have a test {es} environment set up. Before you start serious development or go into production with {es}, you must do some additional setup: diff --git a/docs/reference/setup/install/rpm.asciidoc b/docs/reference/setup/install/rpm.asciidoc index 761d9c4eef6d..96215f4bd7ee 100644 --- a/docs/reference/setup/install/rpm.asciidoc +++ b/docs/reference/setup/install/rpm.asciidoc @@ -7,7 +7,7 @@ Elasticsearch on any RPM-based system such as OpenSuSE, SLES, Centos, Red Hat, and Oracle Enterprise. NOTE: RPM install is not supported on distributions with old versions of RPM, -such as SLES 11 and CentOS 5. Please see <> instead. +such as SLES 11 and CentOS 5. Please see <> instead. include::license.asciidoc[] @@ -150,7 +150,7 @@ include::sysconfig-file.asciidoc[] NOTE: Distributions that use `systemd` require that system resource limits be configured via `systemd` rather than via the `/etc/sysconfig/elasticsearch` -file. See <> for more information. +file. See <> for more information. [[rpm-layout]] ==== Directory layout of RPM diff --git a/docs/reference/setup/install/targz.asciidoc b/docs/reference/setup/install/targz.asciidoc index 7800200a0fcf..dbd03208c57a 100644 --- a/docs/reference/setup/install/targz.asciidoc +++ b/docs/reference/setup/install/targz.asciidoc @@ -92,7 +92,7 @@ include::targz-daemon.asciidoc[] ==== Configuring Elasticsearch on the command line Elasticsearch loads its configuration from the `$ES_HOME/config/elasticsearch.yml` -file by default. The format of this config file is explained in +file by default. The format of this config file is explained in <>. Any settings that can be specified in the config file can also be specified on @@ -116,7 +116,7 @@ created when unpacking the archive. This is very convenient because you don't have to create any directories to start using Elasticsearch, and uninstalling Elasticsearch is as easy as -removing the `$ES_HOME` directory. However, it is advisable to change the +removing the `$ES_HOME` directory. However, it is advisable to change the default locations of the config directory, the data directory, and the logs directory so that you do not delete important data later on. diff --git a/docs/reference/setup/install/windows.asciidoc b/docs/reference/setup/install/windows.asciidoc index 31b00bcf5bdc..f34b52addee1 100644 --- a/docs/reference/setup/install/windows.asciidoc +++ b/docs/reference/setup/install/windows.asciidoc @@ -3,7 +3,7 @@ beta[] -Elasticsearch can be installed on Windows using the `.msi` package. This can +Elasticsearch can be installed on Windows using the `.msi` package. This can install Elasticsearch as a Windows service or allow it to be run manually using the included `elasticsearch.exe` executable. @@ -362,7 +362,7 @@ the command line, using the `-E` syntax as follows: .\bin\elasticsearch.exe -E cluster.name=my_cluster -E node.name=node_1 -------------------------------------------- -NOTE: Values that contain spaces must be surrounded with quotes. For instance `-E path.logs="C:\My Logs\logs"`. +NOTE: Values that contain spaces must be surrounded with quotes. For instance `-E path.logs="C:\My Logs\logs"`. TIP: Typically, any cluster-wide settings (like `cluster.name`) should be added to the `elasticsearch.yml` config file, while any node-specific settings diff --git a/docs/reference/setup/install/zip-windows.asciidoc b/docs/reference/setup/install/zip-windows.asciidoc index 98a59814a0b0..f57f2e0a5c09 100644 --- a/docs/reference/setup/install/zip-windows.asciidoc +++ b/docs/reference/setup/install/zip-windows.asciidoc @@ -1,7 +1,7 @@ [[zip-windows]] === Install Elasticsearch with `.zip` on Windows -Elasticsearch can be installed on Windows using the Windows `.zip` archive. This +Elasticsearch can be installed on Windows using the Windows `.zip` archive. This comes with a `elasticsearch-service.bat` command which will setup Elasticsearch to run as a service. @@ -41,7 +41,7 @@ ifeval::["{release-state}"!="unreleased"] Download the `.zip` archive for Elasticsearch v{version} from: https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-{version}-windows-x86_64.zip -Unzip it with your favourite unzip tool. This will create a folder called +Unzip it with your favourite unzip tool. This will create a folder called +elasticsearch-{version}+, which we will refer to as `%ES_HOME%`. In a terminal window, `cd` to the `%ES_HOME%` directory, for instance: @@ -68,7 +68,7 @@ include::zip-windows-start.asciidoc[] ==== Configuring Elasticsearch on the command line Elasticsearch loads its configuration from the `%ES_HOME%\config\elasticsearch.yml` -file by default. The format of this config file is explained in +file by default. The format of this config file is explained in <>. Any settings that can be specified in the config file can also be specified on @@ -79,7 +79,7 @@ the command line, using the `-E` syntax as follows: .\bin\elasticsearch.bat -Ecluster.name=my_cluster -Enode.name=node_1 -------------------------------------------- -NOTE: Values that contain spaces must be surrounded with quotes. For instance `-Epath.logs="C:\My Logs\logs"`. +NOTE: Values that contain spaces must be surrounded with quotes. For instance `-Epath.logs="C:\My Logs\logs"`. TIP: Typically, any cluster-wide settings (like `cluster.name`) should be added to the `elasticsearch.yml` config file, while any node-specific settings @@ -161,11 +161,11 @@ The Elasticsearch service can be configured prior to installation by setting the `SERVICE_DISPLAY_NAME`:: - The name of the service. Defaults to `Elasticsearch %SERVICE_ID%`. + The name of the service. Defaults to `Elasticsearch %SERVICE_ID%`. `SERVICE_DESCRIPTION`:: - The description of the service. Defaults to `Elasticsearch Windows Service - https://elastic.co`. + The description of the service. Defaults to `Elasticsearch Windows Service - https://elastic.co`. `ES_JAVA_HOME`:: @@ -190,11 +190,11 @@ The Elasticsearch service can be configured prior to installation by setting the `ES_START_TYPE`:: - Startup mode for the service. Can be either `auto` or `manual` (default). + Startup mode for the service. Can be either `auto` or `manual` (default). `ES_STOP_TIMEOUT` :: - The timeout in seconds that procrun waits for service to exit gracefully. Defaults to `0`. + The timeout in seconds that procrun waits for service to exit gracefully. Defaults to `0`. NOTE: At its core, `elasticsearch-service.bat` relies on https://commons.apache.org/proper/commons-daemon/[Apache Commons Daemon] project to install the service. Environment variables set prior to the service installation are copied and will be used during the service lifecycle. This means any changes made to them after the installation will not be picked up unless the service is reinstalled. @@ -222,7 +222,7 @@ before you execute the service installation. Using the Manager GUI:: -It is also possible to configure the service after it's been installed using the manager GUI (`elasticsearch-service-mgr.exe`), which offers insight into the installed service, including its status, startup type, JVM, start and stop settings amongst other things. Simply invoking `elasticsearch-service.bat manager` from the command-line will open up the manager window: +It is also possible to configure the service after it's been installed using the manager GUI (`elasticsearch-service-mgr.exe`), which offers insight into the installed service, including its status, startup type, JVM, start and stop settings amongst other things. Simply invoking `elasticsearch-service.bat manager` from the command-line will open up the manager window: image::images/service-manager-win.png["Windows Service Manager GUI",align="center"] @@ -237,7 +237,7 @@ unpacking the archive. This is very convenient because you don't have to create any directories to start using Elasticsearch, and uninstalling Elasticsearch is as easy as -removing the `%ES_HOME%` directory. However, it is advisable to change the +removing the `%ES_HOME%` directory. However, it is advisable to change the default locations of the config directory, the data directory, and the logs directory so that you do not delete important data later on. diff --git a/docs/reference/setup/sysconfig.asciidoc b/docs/reference/setup/sysconfig.asciidoc index dc9072d6906d..341b488a905e 100644 --- a/docs/reference/setup/sysconfig.asciidoc +++ b/docs/reference/setup/sysconfig.asciidoc @@ -2,7 +2,7 @@ == Important System Configuration Ideally, Elasticsearch should run alone on a server and use all of the -resources available to it. In order to do so, you need to configure your +resources available to it. In order to do so, you need to configure your operating system to allow the user running Elasticsearch to access more resources than allowed by default. @@ -27,8 +27,8 @@ Elasticsearch node. As soon as you configure a network setting like `network.host`, Elasticsearch assumes that you are moving to production and will upgrade the above warnings -to exceptions. These exceptions will prevent your Elasticsearch node from -starting. This is an important safety measure to ensure that you will not +to exceptions. These exceptions will prevent your Elasticsearch node from +starting. This is an important safety measure to ensure that you will not lose data because of a malconfigured server. include::sysconfig/configuring.asciidoc[] diff --git a/docs/reference/setup/sysconfig/configuring.asciidoc b/docs/reference/setup/sysconfig/configuring.asciidoc index 7976efee84fe..61255b88fa09 100644 --- a/docs/reference/setup/sysconfig/configuring.asciidoc +++ b/docs/reference/setup/sysconfig/configuring.asciidoc @@ -19,7 +19,7 @@ require that system limits are specified in a On Linux systems, `ulimit` can be used to change resource limits on a temporary basis. Limits usually need to be set as `root` before switching to -the user that will run Elasticsearch. For example, to set the number of +the user that will run Elasticsearch. For example, to set the number of open file handles (`ulimit -n`) to 65,536, you can do the following: [source,sh] @@ -55,7 +55,7 @@ a new session. [NOTE] .Ubuntu and `limits.conf` =============================== -Ubuntu ignores the `limits.conf` file for processes started by `init.d`. To +Ubuntu ignores the `limits.conf` file for processes started by `init.d`. To enable the `limits.conf` file, edit `/etc/pam.d/su` and uncomment the following line: diff --git a/docs/reference/setup/sysconfig/executable-jna-tmpdir.asciidoc b/docs/reference/setup/sysconfig/executable-jna-tmpdir.asciidoc index 0ede64d57b70..b1dce0824e7a 100644 --- a/docs/reference/setup/sysconfig/executable-jna-tmpdir.asciidoc +++ b/docs/reference/setup/sysconfig/executable-jna-tmpdir.asciidoc @@ -16,7 +16,7 @@ the JVM process from being able to map this code as executable. On some hardened Linux installations this is a default mount option for `/tmp`. One indication that the underlying mount is mounted with `noexec` is that at startup JNA will fail to load with a `java.lang.UnsatisfiedLinkerError` exception with a message -along the lines of `failed to map segment from shared object`. Note that the +along the lines of `failed to map segment from shared object`. Note that the exception message can differ amongst JVM versions. Additionally, the components of Elasticsearch that rely on execution of native code via JNA will fail with messages indicating that it is `because JNA is not available`. If you are seeing diff --git a/docs/reference/setup/sysconfig/file-descriptors.asciidoc b/docs/reference/setup/sysconfig/file-descriptors.asciidoc index 27d330b6a541..905a29d846c3 100644 --- a/docs/reference/setup/sysconfig/file-descriptors.asciidoc +++ b/docs/reference/setup/sysconfig/file-descriptors.asciidoc @@ -7,7 +7,7 @@ Elasticsearch on Windows. On Windows that JVM uses an https://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx[API] limited only by available resources. -Elasticsearch uses a lot of file descriptors or file handles. Running out of +Elasticsearch uses a lot of file descriptors or file handles. Running out of file descriptors can be disastrous and will most probably lead to data loss. Make sure to increase the limit on the number of open files descriptors for the user running Elasticsearch to 65,536 or higher. diff --git a/docs/reference/setup/sysconfig/swap.asciidoc b/docs/reference/setup/sysconfig/swap.asciidoc index fa4c18807c52..86a9307f6827 100644 --- a/docs/reference/setup/sysconfig/swap.asciidoc +++ b/docs/reference/setup/sysconfig/swap.asciidoc @@ -79,11 +79,11 @@ GET _nodes?filter_path=**.mlockall -------------- If you see that `mlockall` is `false`, then it means that the `mlockall` -request has failed. You will also see a line with more information in the logs +request has failed. You will also see a line with more information in the logs with the words `Unable to lock JVM Memory`. The most probable reason, on Linux/Unix systems, is that the user running -Elasticsearch doesn't have permission to lock memory. This can be granted as +Elasticsearch doesn't have permission to lock memory. This can be granted as follows: `.zip` and `.tar.gz`:: diff --git a/docs/reference/setup/sysconfig/virtual-memory.asciidoc b/docs/reference/setup/sysconfig/virtual-memory.asciidoc index 67aff16d24a3..07f37f4717b1 100644 --- a/docs/reference/setup/sysconfig/virtual-memory.asciidoc +++ b/docs/reference/setup/sysconfig/virtual-memory.asciidoc @@ -2,7 +2,7 @@ === Virtual memory Elasticsearch uses a <> directory by -default to store its indices. The default operating system limits on mmap +default to store its indices. The default operating system limits on mmap counts is likely to be too low, which may result in out of memory exceptions. On Linux, you can increase the limits by running the following command as @@ -14,7 +14,7 @@ sysctl -w vm.max_map_count=262144 ------------------------------------- To set this value permanently, update the `vm.max_map_count` setting in -`/etc/sysctl.conf`. To verify after rebooting, run `sysctl vm.max_map_count`. +`/etc/sysctl.conf`. To verify after rebooting, run `sysctl vm.max_map_count`. -The RPM and Debian packages will configure this setting automatically. No +The RPM and Debian packages will configure this setting automatically. No further configuration is required. diff --git a/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc b/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc index 5b60575c85b6..9f13c4b25549 100644 --- a/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/create-snapshot-api.asciidoc @@ -48,7 +48,7 @@ You can use the create snapshot API to create a <>, w backup taken from a running {es} cluster. By default, a snapshot includes all data streams and open indices in the -cluster, as well as the cluster state. You can change this behavior by +cluster, as well as the cluster state. You can change this behavior by specifying a list of data streams and indices to back up in the body of the snapshot request. diff --git a/docs/reference/snapshot-restore/apis/repo-analysis-api.asciidoc b/docs/reference/snapshot-restore/apis/repo-analysis-api.asciidoc index a48b56d24cc2..36473678333f 100644 --- a/docs/reference/snapshot-restore/apis/repo-analysis-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/repo-analysis-api.asciidoc @@ -192,7 +192,7 @@ Defaults to `10`. `read_node_count`:: (Optional, integer) The number of nodes on which to perform a read operation -after writing each blob. Defaults to `10`. +after writing each blob. Defaults to `10`. `early_read_node_count`:: (Optional, integer) The number of nodes on which to perform an early read diff --git a/docs/reference/snapshot-restore/monitor-snapshot-restore.asciidoc b/docs/reference/snapshot-restore/monitor-snapshot-restore.asciidoc index 2e386c76fd84..54ebdd56bd63 100644 --- a/docs/reference/snapshot-restore/monitor-snapshot-restore.asciidoc +++ b/docs/reference/snapshot-restore/monitor-snapshot-restore.asciidoc @@ -82,7 +82,7 @@ This request fails if some of the snapshots are unavailable. Use the boolean par return all snapshots that are currently available. Getting all snapshots in the repository can be costly on cloud-based repositories, -both from a cost and performance perspective. If the only information required is +both from a cost and performance perspective. If the only information required is the snapshot names or UUIDs in the repository and the data streams and indices in each snapshot, then the optional boolean parameter `verbose` can be set to `false` to execute a more performant and cost-effective retrieval of the snapshots in the repository. diff --git a/docs/reference/sql/endpoints/jdbc.asciidoc b/docs/reference/sql/endpoints/jdbc.asciidoc index 8bbc6d0fd2aa..15c6f950deff 100644 --- a/docs/reference/sql/endpoints/jdbc.asciidoc +++ b/docs/reference/sql/endpoints/jdbc.asciidoc @@ -51,7 +51,7 @@ include::version-compat.asciidoc[] === Setup The driver main class is `org.elasticsearch.xpack.sql.jdbc.EsDriver`. -Note the driver implements the JDBC 4.0 +Service Provider+ mechanism meaning it is registered automatically +Note the driver implements the JDBC 4.0 +Service Provider+ mechanism meaning it is registered automatically as long as it is available in the classpath. Once registered, the driver understands the following syntax as an URL: diff --git a/docs/reference/sql/endpoints/odbc/configuration.asciidoc b/docs/reference/sql/endpoints/odbc/configuration.asciidoc index eda7b9ee9b51..3d86304cd6dd 100644 --- a/docs/reference/sql/endpoints/odbc/configuration.asciidoc +++ b/docs/reference/sql/endpoints/odbc/configuration.asciidoc @@ -360,7 +360,7 @@ particular connection, in case the default behavior of the driver is not suitable. For earlier versions of the driver, this needs to be done within the client application, in a manner particular to that application, generally in a free text input box (sometimes named "Connection string", "String extras", or -similar). The format of the string is `Attribute1=Value1`. Multiple attributes +similar). The format of the string is `Attribute1=Value1`. Multiple attributes can be specified, separated by a semicolon `Attribute1=Value1;Attribute2=Value2;`. The attribute names are given below. diff --git a/docs/reference/sql/functions/math.asciidoc b/docs/reference/sql/functions/math.asciidoc index f2ebe488248f..25b233d697e0 100644 --- a/docs/reference/sql/functions/math.asciidoc +++ b/docs/reference/sql/functions/math.asciidoc @@ -373,7 +373,7 @@ TRUNCATE( *Output*: numeric *Description*: Returns `numeric_exp` truncated to `integer_exp` places right of the decimal point. If `integer_exp` is negative, -`numeric_exp` is truncated to |`integer_exp`| places to the left of the decimal point. If `integer_exp` is omitted, +`numeric_exp` is truncated to |`integer_exp`| places to the left of the decimal point. If `integer_exp` is omitted, the function will perform as if `integer_exp` would be 0. The returned numeric data type is the same as the data type of `numeric_exp`. diff --git a/docs/reference/transform/apis/put-transform.asciidoc b/docs/reference/transform/apis/put-transform.asciidoc index 7982299ab988..618ee9ad05df 100644 --- a/docs/reference/transform/apis/put-transform.asciidoc +++ b/docs/reference/transform/apis/put-transform.asciidoc @@ -35,7 +35,7 @@ For more information, see <>, <>, and This API defines a {transform}, which copies data from source indices, transforms it, and persists it into an entity-centric destination index. If you choose to use the pivot method for your {transform}, the entities are defined by -the set of `group_by` fields in the `pivot` object. If you choose to use the +the set of `group_by` fields in the `pivot` object. If you choose to use the latest method, the entities are defined by the `unique_key` field values in the `latest` object. diff --git a/docs/reference/transform/transforms-at-scale.asciidoc b/docs/reference/transform/transforms-at-scale.asciidoc index 1de6cd4ff7bc..a224e3a33025 100644 --- a/docs/reference/transform/transforms-at-scale.asciidoc +++ b/docs/reference/transform/transforms-at-scale.asciidoc @@ -47,7 +47,7 @@ where most work is being done. The **Stats** interface of the **{transforms-cap}** page in {kib} contains information that covers three main areas: indexing, searching, and processing time (alternatively, you can use the <>). If, for example, the results -show that the highest proportion of time is spent on search, then prioritize +show that the highest proportion of time is spent on search, then prioritize efforts on optimizing the search query of the {transform}. {transforms-cap} also has https://esrally.readthedocs.io[Rally support] that makes it possible to run performance checks on {transforms} configurations if it is required. If you diff --git a/docs/reference/upgrade/disable-shard-alloc.asciidoc b/docs/reference/upgrade/disable-shard-alloc.asciidoc index 56461fa99972..fa97a2e43a0e 100644 --- a/docs/reference/upgrade/disable-shard-alloc.asciidoc +++ b/docs/reference/upgrade/disable-shard-alloc.asciidoc @@ -2,7 +2,7 @@ When you shut down a node, the allocation process waits for `index.unassigned.node_left.delayed_timeout` (by default, one minute) before starting to replicate the shards on that node to other nodes in the cluster, -which can involve a lot of I/O. Since the node is shortly going to be +which can involve a lot of I/O. Since the node is shortly going to be restarted, this I/O is unnecessary. You can avoid racing the clock by <> of replicas before shutting down the node: diff --git a/docs/reference/upgrade/rolling_upgrade.asciidoc b/docs/reference/upgrade/rolling_upgrade.asciidoc index 0180ef81c8cf..289b5f68f797 100644 --- a/docs/reference/upgrade/rolling_upgrade.asciidoc +++ b/docs/reference/upgrade/rolling_upgrade.asciidoc @@ -170,7 +170,7 @@ status will change to `green`. ==================================================== Shards that were not <> might take longer to -recover. You can monitor the recovery status of individual shards by +recover. You can monitor the recovery status of individual shards by submitting a <> request: [source,console] @@ -186,7 +186,7 @@ recovery completes. + -- -When the node has recovered and the cluster is stable, repeat these steps +When the node has recovered and the cluster is stable, repeat these steps for each node that needs to be updated. You can monitor the health of the cluster with a <> request: diff --git a/docs/reference/upgrade/upgrade-node.asciidoc b/docs/reference/upgrade/upgrade-node.asciidoc index c445c03a38ab..9161dfde4fba 100644 --- a/docs/reference/upgrade/upgrade-node.asciidoc +++ b/docs/reference/upgrade/upgrade-node.asciidoc @@ -1,6 +1,6 @@ To upgrade using a <> or <> package: -* Use `rpm` or `dpkg` to install the new package. All files are +* Use `rpm` or `dpkg` to install the new package. All files are installed in the appropriate location for the operating system and {es} config files are not overwritten. diff --git a/docs/resiliency/index.asciidoc b/docs/resiliency/index.asciidoc index 662219d7f2fc..25ac0f3a06a2 100644 --- a/docs/resiliency/index.asciidoc +++ b/docs/resiliency/index.asciidoc @@ -6,9 +6,9 @@ == Overview The team at Elasticsearch is committed to continuously improving both -Elasticsearch and Apache Lucene to protect your data. As with any distributed +Elasticsearch and Apache Lucene to protect your data. As with any distributed system, Elasticsearch is complex and has many moving parts, each of which can -encounter edge cases that require proper handling. Our resiliency project is +encounter edge cases that require proper handling. Our resiliency project is an ongoing effort to find and fix these edge cases. If you want to keep up with all this project on GitHub, see our issues list under the tag https://github.com/elastic/elasticsearch/issues?q=label%3Aresiliency[resiliency]. @@ -67,12 +67,12 @@ all new scenarios and will report issues that we find on this page and in our Gi === Better request retry mechanism when nodes are disconnected (STATUS: ONGOING) If the node holding a primary shard is disconnected for whatever reason, the -coordinating node retries the request on the same or a new primary shard. In +coordinating node retries the request on the same or a new primary shard. In certain rare conditions, where the node disconnects and immediately reconnects, it is possible that the original request has already been successfully applied but has not been reported, resulting in duplicate requests. This is particularly true when retrying bulk requests, where some -actions may have completed and some may not have. +actions may have completed and some may not have. An optimization which disabled the existence check for documents indexed with auto-generated IDs could result in the creation of duplicate documents. This @@ -93,7 +93,7 @@ See {GIT}9967[#9967]. (STATUS: ONGOING) The family of circuit breakers has greatly reduced the occurrence of OOM exceptions, but it is still possible to cause a node to run out of heap -space. The following issues have been identified: +space. The following issues have been identified: * Set a hard limit on `from`/`size` parameters {GIT}9311[#9311]. (STATUS: DONE, v2.1.0) * Prevent combinatorial explosion in aggregations from causing OOM {GIT}8081[#8081]. (STATUS: DONE, v5.0.0) @@ -316,41 +316,41 @@ nodes have sent their joins request (based on the `minimum_master_nodes` setting === Mapping changes should be applied synchronously (STATUS: DONE, v2.0.0) When introducing new fields using dynamic mapping, it is possible that the same -field can be added to different shards with different data types. Each shard +field can be added to different shards with different data types. Each shard will operate with its local data type but, if the shard is relocated, the data type from the cluster state will be applied to the new shard, which -can result in a corrupt shard. To prevent this, new fields should not +can result in a corrupt shard. To prevent this, new fields should not be added to a shard's mapping until confirmed by the master. {GIT}8688[#8688] (STATUS: DONE) [discrete] === Add per-segment and per-commit ID to help replication (STATUS: DONE, v2.0.0) -{JIRA}5895[LUCENE-5895] adds a unique ID for each segment and each commit point. File-based replication (as performed by snapshot/restore) can use this ID to know whether the segment/commit on the source and destination machines are the same. Fixed in Lucene 5.0. +{JIRA}5895[LUCENE-5895] adds a unique ID for each segment and each commit point. File-based replication (as performed by snapshot/restore) can use this ID to know whether the segment/commit on the source and destination machines are the same. Fixed in Lucene 5.0. [discrete] === Write index metadata on data nodes where shards allocated (STATUS: DONE, v2.0.0) Today, index metadata is written only on nodes that are master-eligible, not on -data-only nodes. This is not a problem when running with multiple master nodes, +data-only nodes. This is not a problem when running with multiple master nodes, as recommended, as the loss of all but one master node is still recoverable. However, users running with a single master node are at risk of losing -their index metadata if the master fails. Instead, this metadata should +their index metadata if the master fails. Instead, this metadata should also be written on any node where a shard is allocated. {GIT}8823[#8823], {GIT}9952[#9952] [discrete] === Better file distribution with multiple data paths (STATUS: DONE, v2.0.0) Today, a node configured with multiple data paths distributes writes across -all paths by writing one file to each path in turn. This can mean that the -failure of a single disk corrupts many shards at once. Instead, by allocating +all paths by writing one file to each path in turn. This can mean that the +failure of a single disk corrupts many shards at once. Instead, by allocating an entire shard to a single data path, the extent of the damage can be limited to just the shards on that disk. {GIT}9498[#9498] [discrete] === Lucene checksums phase 3 (STATUS: DONE, v2.0.0) -Almost all files in Elasticsearch now have checksums which are validated before use. A few changes remain: +Almost all files in Elasticsearch now have checksums which are validated before use. A few changes remain: * {GIT}7586[#7586] adds checksums for cluster and index state files. (STATUS: DONE, Fixed in v1.5.0) * {GIT}9183[#9183] supports validating the checksums on all files when starting a node. (STATUS: DONE, Fixed in v2.0.0) @@ -387,13 +387,13 @@ It is possible in very extreme cases during a complicated full cluster restart, that the current shard state ID can be reset or even go backwards. Elasticsearch now ensures that the state ID always moves forwards, and throws an exception when a legacy ID is higher than the -current ID. See {GIT}10316[#10316] (STATUS: DONE, v1.5.1) +current ID. See {GIT}10316[#10316] (STATUS: DONE, v1.5.1) [discrete] === Verification of index UUIDs (STATUS: DONE, v1.5.0) When deleting and recreating indices rapidly, it is possible that cluster state -updates can arrive out of sync and old states can be merged incorrectly. Instead, +updates can arrive out of sync and old states can be merged incorrectly. Instead, Elasticsearch now checks the index UUID to ensure that cluster state updates refer to the same index version that is present on the local node. See {GIT}9541[#9541] and {GIT}10200[#10200] (STATUS: DONE, Fixed in v1.5.0) @@ -412,13 +412,13 @@ before v1.3.2 are disabled entirely. See {GIT}9925[#9925] (STATUS: DONE, Fixed i Upgrading the metadata of old 3.x segments on node upgrade can be error prone and can result in corruption when merges are being run concurrently. Instead, Elasticsearch will now upgrade the metadata of 3.x segments before the engine -starts. See {GIT}9899[#9899] (STATUS; DONE, fixed in v1.5.0) +starts. See {GIT}9899[#9899] (STATUS; DONE, fixed in v1.5.0) [discrete] === Prevent setting minimum_master_nodes to more than the current node count (STATUS: DONE, v1.5.0) Setting `zen.discovery.minimum_master_nodes` to a value higher than the current node count -effectively leaves the cluster without a master and unable to process requests. The only +effectively leaves the cluster without a master and unable to process requests. The only way to fix this is to add more master-eligible nodes. {GIT}8321[#8321] adds a mechanism to validate settings before applying them, and {GIT}9051[#9051] extends this validation support to settings applied during a cluster restore. (STATUS: DONE, Fixed in v1.5.0) @@ -428,8 +428,8 @@ support to settings applied during a cluster restore. (STATUS: DONE, Fixed in v1 Randomized testing combined with chaotic failures has revealed corner cases where the recovery and allocation of shards in a concurrent manner can result -in shard corruption. There is an ongoing effort to reduce the complexity of -these operations in order to make them more deterministic. These include: +in shard corruption. There is an ongoing effort to reduce the complexity of +these operations in order to make them more deterministic. These include: * Introduce shard level locks to prevent concurrent shard modifications {GIT}8436[#8436]. (STATUS: DONE, Fixed in v1.5.0) * Delete shard contents under a lock {GIT}9083[#9083]. (STATUS: DONE, Fixed in v1.5.0) @@ -463,7 +463,7 @@ Recovery from failure is a complicated process, especially in an asynchronous di [discrete] === Lucene checksums phase 2 (STATUS:DONE, v1.4.0.Beta1) -When Lucene opens a segment for reading, it validates the checksum on the smaller segment files -- those which it reads entirely into memory -- but not the large files like term frequencies and positions, as this would be very expensive. During merges, term vectors and stored fields are validated, as long the segments being merged come from the same version of Lucene. Checksumming for term vectors and stored fields is important because merging consists of performing optimized byte copies. Term frequencies, term positions, payloads, doc values, and norms are currently not checked during merges, although Lucene provides the option to do so. These files are less prone to silent corruption as they are actively decoded during merge, and so are more likely to throw exceptions if there is any corruption. +When Lucene opens a segment for reading, it validates the checksum on the smaller segment files -- those which it reads entirely into memory -- but not the large files like term frequencies and positions, as this would be very expensive. During merges, term vectors and stored fields are validated, as long the segments being merged come from the same version of Lucene. Checksumming for term vectors and stored fields is important because merging consists of performing optimized byte copies. Term frequencies, term positions, payloads, doc values, and norms are currently not checked during merges, although Lucene provides the option to do so. These files are less prone to silent corruption as they are actively decoded during merge, and so are more likely to throw exceptions if there is any corruption. The following changes have been made: @@ -474,7 +474,7 @@ The following changes have been made: [discrete] === Don't allow unsupported codecs (STATUS: DONE, v1.4.0.Beta1) -Lucene 4 added a number of alternative codecs for experimentation purposes, and Elasticsearch exposed the ability to change codecs. Since then, Lucene has settled on the best choice of codec and provides backwards compatibility only for the default codec. {GIT}7566[#7566] removes the ability to set alternate codecs. +Lucene 4 added a number of alternative codecs for experimentation purposes, and Elasticsearch exposed the ability to change codecs. Since then, Lucene has settled on the best choice of codec and provides backwards compatibility only for the default codec. {GIT}7566[#7566] removes the ability to set alternate codecs. [discrete] === Use checksums to identify entire segments (STATUS: DONE, v1.4.0.Beta1) @@ -519,7 +519,7 @@ Upgrading indices create with Lucene 3.x (Elasticsearch v0.20 and before) to Luc [discrete] === Improve error handling when deleting files (STATUS: DONE, v1.4.0.Beta1) -Lucene uses reference counting to prevent files that are still in use from being deleted. Lucene testing discovered a bug ({JIRA}5919[LUCENE-5919]) when decrementing the ref count on a batch of files. If deleting some of the files resulted in an exception (e.g. due to interference from a virus scanner), the files that had their ref counts decremented successfully could later have their ref counts deleted again, incorrectly, resulting in files being physically deleted before their time. This is fixed in Lucene 4.10. +Lucene uses reference counting to prevent files that are still in use from being deleted. Lucene testing discovered a bug ({JIRA}5919[LUCENE-5919]) when decrementing the ref count on a batch of files. If deleting some of the files resulted in an exception (e.g. due to interference from a virus scanner), the files that had their ref counts decremented successfully could later have their ref counts deleted again, incorrectly, resulting in files being physically deleted before their time. This is fixed in Lucene 4.10. [discrete] === Using Lucene Checksums to verify shards during snapshot/restore (STATUS:DONE, v1.3.3) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc index 6608914bc76f..1711b91879a9 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc @@ -5,7 +5,7 @@ Test Suite: .Required settings ======================================= Certain tests require specific settings to be applied to the -Elasticsearch instance in order to pass. You should run +Elasticsearch instance in order to pass. You should run Elasticsearch as follows: [source,sh] @@ -65,7 +65,7 @@ order, but individual test sections may be run in any order, as follows: Dot notation: ------------- -Dot notation is used for (1) method calls and (2) hierarchical data structures. For +Dot notation is used for (1) method calls and (2) hierarchical data structures. For instance, a method call like `cluster.health` would do the equivalent of: client.cluster.health(...params...) @@ -87,7 +87,7 @@ Skipping tests: If a test section should only be run on certain versions of Elasticsearch, then the first entry in the section (after the title) should be called `skip`, and should contain the range of versions to be -skipped, and the reason why the tests are skipped. For instance: +skipped, and the reason why the tests are skipped. For instance: .... "Parent": @@ -308,7 +308,7 @@ The response from the `do` operator should be stored in the `response` var, whic is reset (1) at the beginning of a file or (2) on the next `do`. If the arguments to `do` include `catch`, then we are expecting an error, which should -be caught and tested. For instance: +be caught and tested. For instance: .... - do: @@ -408,7 +408,7 @@ Looks like: === `set` For some tests, it is necessary to extract a value from the previous `response`, in -order to reuse it in a subsequent `do` and other tests. For instance, when +order to reuse it in a subsequent `do` and other tests. For instance, when testing indexing a document without a specified ID: .... @@ -500,7 +500,7 @@ The specified key doesn't exist or has a false value (ie `0`, `false`, `undefine === `match` -Used to compare two variables (could be scalars, arrays or hashes). The two variables +Used to compare two variables (could be scalars, arrays or hashes). The two variables should be identical, eg: .... diff --git a/x-pack/docs/en/rest-api/security/create-users.asciidoc b/x-pack/docs/en/rest-api/security/create-users.asciidoc index c74e45f787ab..22b6bcaeaa60 100644 --- a/x-pack/docs/en/rest-api/security/create-users.asciidoc +++ b/x-pack/docs/en/rest-api/security/create-users.asciidoc @@ -95,7 +95,7 @@ used in the same request. `roles`:: (Required, list) A set of roles the user has. The roles determine the user's -access permissions. To create a user without any roles, specify an empty list: +access permissions. To create a user without any roles, specify an empty list: `[]`. -- diff --git a/x-pack/docs/en/rest-api/watcher/query-watches.asciidoc b/x-pack/docs/en/rest-api/watcher/query-watches.asciidoc index c46785d360dd..4a469af852b5 100644 --- a/x-pack/docs/en/rest-api/watcher/query-watches.asciidoc +++ b/x-pack/docs/en/rest-api/watcher/query-watches.asciidoc @@ -37,7 +37,7 @@ This API supports the following fields: | `size` | no | 10 | The number of hits to return. Needs to be non-negative. -| `query` | no | null | Optional, <> filter watches to be returned. +| `query` | no | null | Optional, <> filter watches to be returned. | `sort` | no | null | Optional <>. diff --git a/x-pack/docs/en/security/auditing/event-types.asciidoc b/x-pack/docs/en/security/auditing/event-types.asciidoc index b1048f9230c4..dd67f027f7be 100644 --- a/x-pack/docs/en/security/auditing/event-types.asciidoc +++ b/x-pack/docs/en/security/auditing/event-types.asciidoc @@ -180,7 +180,7 @@ show more details about the requesting client: the address of another cluster node, or the local node's bound address, if the request originated locally. Unless the remote client connects directly to the cluster, the - _client address_ will actually be the address of the first + _client address_ will actually be the address of the first OSI layer 3 proxy in front of the cluster. `origin.type` :: The origin type of the request associated with this event: `rest` (request originated from a REST API request), @@ -327,7 +327,7 @@ that have been previously described: `user.name` :: The name of the _effective_ user. This is usually the same as the _authenticated_ user, but if using the <> - this instead denotes the name of the _impersonated_ user. + this instead denotes the name of the _impersonated_ user. If authenticated using an API key, this is the name of the API key owner. `user.realm` :: Name of the realm to which the _effective_ user @@ -335,7 +335,7 @@ that have been previously described: the name of the realm to which the API key owner belongs. `user.run_by.name` :: This attribute is present only if the request is using the <> - and denotes the name of the _authenticated_ user, + and denotes the name of the _authenticated_ user, which is also known as the _impersonator_. `user.run_by.realm` :: Name of the realm to which the _authenticated_ (_impersonator_) user belongs. @@ -375,7 +375,7 @@ that have been previously described: `user.name` :: The name of the _effective_ user. This is usually the same as the _authenticated_ user, but if using the <> - this instead denotes the name of the _impersonated_ user. + this instead denotes the name of the _impersonated_ user. If authenticated using an API key, this is the name of the API key owner. `user.realm` :: Name of the realm to which the _effective_ user diff --git a/x-pack/docs/en/security/authentication/configuring-active-directory-realm.asciidoc b/x-pack/docs/en/security/authentication/configuring-active-directory-realm.asciidoc index 57d4c46374df..87b07b48de1a 100644 --- a/x-pack/docs/en/security/authentication/configuring-active-directory-realm.asciidoc +++ b/x-pack/docs/en/security/authentication/configuring-active-directory-realm.asciidoc @@ -89,7 +89,7 @@ Directory servers. + -- The `load_balance.type` setting can be used at the realm level. Two modes of -operation are supported: failover and load balancing. See <>. +operation are supported: failover and load balancing. See <>. -- . (Optional) To protect passwords, diff --git a/x-pack/docs/en/security/authentication/configuring-pki-realm.asciidoc b/x-pack/docs/en/security/authentication/configuring-pki-realm.asciidoc index a6d658109e88..54118027e59a 100644 --- a/x-pack/docs/en/security/authentication/configuring-pki-realm.asciidoc +++ b/x-pack/docs/en/security/authentication/configuring-pki-realm.asciidoc @@ -131,7 +131,7 @@ xpack: ------------------------------------------------------------ If the truststore is password protected, the password should be configured by -adding the appropriate `secure_password` setting to the {es} keystore. For +adding the appropriate `secure_password` setting to the {es} keystore. For example, the following command adds the password for the example realm above: [source, shell] @@ -213,7 +213,7 @@ alternative to role mapping. By default, the PKI realm relies on the node's network interface to perform the SSL/TLS handshake and extract the client certificate. This behaviour requires that clients connect directly to {es} so that their SSL connection is terminated -by the {es} node. If SSL/TLS authentication is to be performed by {kib}, the +by the {es} node. If SSL/TLS authentication is to be performed by {kib}, the PKI realm must be configured to permit delegation. Specifically, when clients presenting X.509 certificates connect to {kib}, diff --git a/x-pack/docs/en/security/authentication/ldap-realm.asciidoc b/x-pack/docs/en/security/authentication/ldap-realm.asciidoc index 6e5f437bb675..34c91c6bc13a 100644 --- a/x-pack/docs/en/security/authentication/ldap-realm.asciidoc +++ b/x-pack/docs/en/security/authentication/ldap-realm.asciidoc @@ -60,7 +60,7 @@ This metadata is returned in the <>, and can be used with <> in roles. -Additional fields can be included in the user's metadata by configuring +Additional fields can be included in the user's metadata by configuring the `metadata` setting on the LDAP realm. This metadata is available for use with the <> or in <>. diff --git a/x-pack/docs/en/security/authentication/oidc-guide.asciidoc b/x-pack/docs/en/security/authentication/oidc-guide.asciidoc index 84ac891d079b..09d919b8791d 100644 --- a/x-pack/docs/en/security/authentication/oidc-guide.asciidoc +++ b/x-pack/docs/en/security/authentication/oidc-guide.asciidoc @@ -38,7 +38,7 @@ RP that you commonly need to provide for registration are the following: nor the Elastic Stack implementation impose any constraints on this value. - `Redirect URI`: This is the URI where the OP will redirect the user's browser after authentication. The appropriate value for this will depend on your setup and whether or not {kib} sits behind a proxy or -load balancer. It will typically be +$\{kibana-url}/api/security/oidc/callback+ (for the authorization code flow) or +$\{kibana-url}/api/security/oidc/implicit+ (for the implicit flow) where _$\{kibana-url}_ is the base URL for your {kib} instance. You might also see this +load balancer. It will typically be +$\{kibana-url}/api/security/oidc/callback+ (for the authorization code flow) or +$\{kibana-url}/api/security/oidc/implicit+ (for the implicit flow) where _$\{kibana-url}_ is the base URL for your {kib} instance. You might also see this called `Callback URI`. At the end of the registration process, the OP will assign a Client Identifier and a Client Secret for the RP ({stack}) to use. @@ -68,7 +68,7 @@ For more information, see [[oidc-enable-token]] ==== Enable the token service -The {es} OpenID Connect implementation makes use of the {es} Token Service. This service +The {es} OpenID Connect implementation makes use of the {es} Token Service. This service is automatically enabled if you configure TLS on the HTTP interface, and can be explicitly configured by including the following in your `elasticsearch.yml` file: @@ -157,7 +157,7 @@ op.authorization_endpoint:: op.token_endpoint:: The URL for the Token Endpoint in the OpenID Connect Provider. This is the endpoint where - {es} will send a request to exchange the code for an ID Token. This setting is optional when + {es} will send a request to exchange the code for an ID Token. This setting is optional when you use the implicit flow. The value for this setting should be provided by your OpenID Connect Provider. op.jwkset_path:: diff --git a/x-pack/docs/en/security/authentication/realms.asciidoc b/x-pack/docs/en/security/authentication/realms.asciidoc index 7d77c15317f3..3bf8d4778e7b 100644 --- a/x-pack/docs/en/security/authentication/realms.asciidoc +++ b/x-pack/docs/en/security/authentication/realms.asciidoc @@ -39,7 +39,7 @@ of username and password and is always available. See <>. _saml_:: A realm that facilitates authentication using the SAML 2.0 Web SSO protocol. This realm is designed to support authentication through {kib} and is not -intended for use in the REST API. See <>. +intended for use in the REST API. See <>. _kerberos_:: A realm that authenticates a user using Kerberos authentication. Users are diff --git a/x-pack/docs/en/security/authentication/saml-guide.asciidoc b/x-pack/docs/en/security/authentication/saml-guide.asciidoc index bad8536cf9e9..b04d3bc29b90 100644 --- a/x-pack/docs/en/security/authentication/saml-guide.asciidoc +++ b/x-pack/docs/en/security/authentication/saml-guide.asciidoc @@ -14,7 +14,7 @@ performs that actual authentication of users. If you are interested in configuring SSO into {kib}, then you will need to provide {es} with information about your _Identity Provider_, and you will need to register the Elastic Stack as a known _Service Provider_ within that -Identity Provider. There are also a few configuration changes that are +Identity Provider. There are also a few configuration changes that are required in {kib} to activate the SAML authentication provider. NOTE: The SAML support in {kib} is designed on the expectation that it will be @@ -49,7 +49,7 @@ read the metadata document to find it - look for the `entityID` attribute on the `EntityDescriptor` element. Most IdPs will provide an appropriate metadata file with all the features that -the Elastic Stack requires, and should only require the configuration steps +the Elastic Stack requires, and should only require the configuration steps described below. For completeness sake, the minimum requirements that the Elastic Stack has for the IdP's metadata are: @@ -96,7 +96,7 @@ For more information, see [[saml-enable-token]] ==== Enable the token service -The {es} SAML implementation makes use of the {es} Token Service. This service +The {es} SAML implementation makes use of the {es} Token Service. This service is automatically enabled if you configure TLS on the HTTP interface, and can be explicitly configured by including the following in your `elasticsearch.yml` file: @@ -233,7 +233,7 @@ The recommended steps for configuring these SAML attributes are as follows: your IdP. At a _minimum_, the `principal` attribute is required. . Configure your IdP to "release" those attributes to your {kib} SAML service - provider. This process varies by provider - some will provide a user interface + provider. This process varies by provider - some will provide a user interface for this, while others may require that you edit configuration files. Usually the IdP (or your local administrator) will have suggestions about what URI to use for each attribute. You can simply accept those suggestions, as the @@ -265,7 +265,7 @@ additional names that can be used: This uses the SAML `NameID` value, but only if the NameID format is `urn:oasis:names:tc:SAML:2.0:nameid-format:persistent`. A SAML `NameID` element has an optional `Format` attribute that indicates - the semantics of the provided name. It is common for IdPs to be configured + the semantics of the provided name. It is common for IdPs to be configured with "transient" NameIDs that present a new identifier for each session. Since it is rarely useful to use a transient NameID as part of an attribute mapping, the "nameid:persistent" attribute name can be used as a safety @@ -950,7 +950,7 @@ POST /_security/saml/prepare . Handle the response from `/_security/saml/prepare`. The response from {es} will contain 3 parameters: `redirect`, `realm` and `id`. The custom web application would need to store the value for `id` in the user's session (client side in a cookie or server side if session information is -persisted this way). It must also redirect the user's browser to the URL that was returned in the +persisted this way). It must also redirect the user's browser to the URL that was returned in the `redirect` parameter. The `id` value should not be disregarded as it is used as a nonce in SAML in order to mitigate against replay attacks. . Handle a subsequent response from the SAML IdP. After the user is successfully authenticated with the diff --git a/x-pack/docs/en/security/authorization/custom-authorization.asciidoc b/x-pack/docs/en/security/authorization/custom-authorization.asciidoc index 7e4dccf9b809..3a6111ee1505 100644 --- a/x-pack/docs/en/security/authorization/custom-authorization.asciidoc +++ b/x-pack/docs/en/security/authorization/custom-authorization.asciidoc @@ -18,7 +18,7 @@ To create a custom roles provider: which are the role names to resolve, and an ActionListener, on which the set of resolved role descriptors are passed on as the response. . The custom roles provider implementation must take special care to not block on any I/O - operations. It is the responsibility of the implementation to ensure asynchronous behavior + operations. It is the responsibility of the implementation to ensure asynchronous behavior and non-blocking calls, which is made easier by the fact that the `ActionListener` is provided on which to send the response when the roles have been resolved and the response is ready. @@ -40,8 +40,8 @@ getRolesProviders(Settings settings, ResourceWatcherService resourceWatcherServi + The `getRolesProviders` method is used to provide a list of custom roles providers that will be used to resolve role names, if the role names could not be resolved by the reserved -roles or native roles stores. The list should be returned in the order that the custom role -providers should be invoked to resolve roles. For example, if `getRolesProviders` returns two +roles or native roles stores. The list should be returned in the order that the custom role +providers should be invoked to resolve roles. For example, if `getRolesProviders` returns two instances of roles providers, and both of them are able to resolve role `A`, then the resolved role descriptor that will be used for role `A` will be the one resolved by the first roles provider in the list. @@ -109,7 +109,7 @@ bin/elasticsearch-plugin install file:////my-extension-plugin-1.0.zip ---------------------------------------- . Add any configuration parameters for implementations in the extension to the -`elasticsearch.yml` file. The settings are not namespaced and you have access to any +`elasticsearch.yml` file. The settings are not namespaced and you have access to any settings when constructing the extensions, although it is recommended to have a namespacing convention for extensions to keep your `elasticsearch.yml` configuration easy to understand. diff --git a/x-pack/docs/en/security/authorization/managing-roles.asciidoc b/x-pack/docs/en/security/authorization/managing-roles.asciidoc index c23a566aacad..101bf4804747 100644 --- a/x-pack/docs/en/security/authorization/managing-roles.asciidoc +++ b/x-pack/docs/en/security/authorization/managing-roles.asciidoc @@ -27,7 +27,7 @@ A role is defined by the following JSON structure: makes authorization decisions based solely on the action being executed. A global privilege also considers the parameters included in the request. Support for global privileges is currently limited to the management of - application privileges. This field is optional. + application privileges. This field is optional. <4> A list of indices permissions entries. This field is optional (missing `indices` privileges effectively mean no index level permissions). <5> A list of application privilege entries. This field is optional. @@ -70,7 +70,7 @@ The following describes the structure of an indices permissions entry: **Toggling this flag is most discouraged because it could effectively grant superuser privileges.** If however, for administrative purposes, you need to create a role with privileges covering restricted indices, you must set - this field to `true` (default is `false`), and then the `names` field will + this field to `true` (default is `false`), and then the `names` field will cover the restricted indices as well. [TIP] @@ -186,7 +186,7 @@ TIP: For a complete list of available <>. diff --git a/x-pack/docs/en/security/authorization/privileges.asciidoc b/x-pack/docs/en/security/authorization/privileges.asciidoc index bed41689fc65..0c532de72e7e 100644 --- a/x-pack/docs/en/security/authorization/privileges.asciidoc +++ b/x-pack/docs/en/security/authorization/privileges.asciidoc @@ -171,7 +171,7 @@ All read-only {slm-init} actions, such as getting policies and checking the {slm-init} status. `transport_client`:: -All privileges necessary for a transport client to connect. Required by the remote +All privileges necessary for a transport client to connect. Required by the remote cluster to enable <>. [[privileges-list-indices]] diff --git a/x-pack/docs/en/security/authorization/set-security-user.asciidoc b/x-pack/docs/en/security/authorization/set-security-user.asciidoc index d80134e94d1a..5da5033baad2 100644 --- a/x-pack/docs/en/security/authorization/set-security-user.asciidoc +++ b/x-pack/docs/en/security/authorization/set-security-user.asciidoc @@ -2,7 +2,7 @@ ==== Pre-processing documents to add security details // If an index is shared by many small users it makes sense to put all these users -// into the same index. Having a dedicated index or shard per user is wasteful. +// into the same index. Having a dedicated index or shard per user is wasteful. // TBD: It's unclear why we're putting users in an index here. To guarantee that a user reads only their own documents, it makes sense to set up diff --git a/x-pack/docs/en/security/ccs-clients-integrations/cross-cluster.asciidoc b/x-pack/docs/en/security/ccs-clients-integrations/cross-cluster.asciidoc index ced5c6b5b1a3..55449e6ef918 100644 --- a/x-pack/docs/en/security/ccs-clients-integrations/cross-cluster.asciidoc +++ b/x-pack/docs/en/security/ccs-clients-integrations/cross-cluster.asciidoc @@ -31,7 +31,7 @@ For more information about the `xpack.security.enabled` setting, see <> on every node. * Enable a trust relationship between the cluster used for performing cross - cluster search (the local cluster) and all remote clusters. This can be done + cluster search (the local cluster) and all remote clusters. This can be done either by: + ** Using the same certificate authority to generate certificates for all @@ -41,7 +41,7 @@ For more information about the `xpack.security.enabled` setting, see * On the local cluster, ensure that users are assigned to (at least) one role - that exists on the remote clusters. On the remote clusters, use that role + that exists on the remote clusters. On the remote clusters, use that role to define which indices the user may access. (See <>). * Configure the local cluster to connect to remote clusters as described diff --git a/x-pack/docs/en/security/configuring-es.asciidoc b/x-pack/docs/en/security/configuring-es.asciidoc index a39b9e285c6d..7130753f45d3 100644 --- a/x-pack/docs/en/security/configuring-es.asciidoc +++ b/x-pack/docs/en/security/configuring-es.asciidoc @@ -30,7 +30,7 @@ For more information, see <>. -- NOTE: This requirement applies to clusters with more than one node and to clusters with a single node that listens on an external interface. Single-node -clusters that use a loopback interface do not have this requirement. For more +clusters that use a loopback interface do not have this requirement. For more information, see <>. diff --git a/x-pack/docs/en/security/get-started-kibana-users.asciidoc b/x-pack/docs/en/security/get-started-kibana-users.asciidoc index 1c24444c5a2d..b701e77c7d6f 100644 --- a/x-pack/docs/en/security/get-started-kibana-users.asciidoc +++ b/x-pack/docs/en/security/get-started-kibana-users.asciidoc @@ -43,7 +43,7 @@ keystore and add the secure settings: ---------------------------------------------------------------------- When prompted, specify the `kibana_system` built-in user and its password for these -setting values. The settings are automatically applied when you start {kib}. +setting values. The settings are automatically applied when you start {kib}. To learn more, see {kibana-ref}/secure-settings.html[Secure settings]. // end::store-kibana-user[] -- diff --git a/x-pack/docs/en/security/get-started-security.asciidoc b/x-pack/docs/en/security/get-started-security.asciidoc index 3cd3c38966e4..43ba8a30d9d5 100644 --- a/x-pack/docs/en/security/get-started-security.asciidoc +++ b/x-pack/docs/en/security/get-started-security.asciidoc @@ -139,7 +139,7 @@ choose a role yet--we'll come back to that in subsequent steps. -- In {stack-gs}/get-started-elastic-stack.html[Getting started with the {stack}], you configured {ls} to listen for {metricbeat} -input and to send the events to {es}. You therefore need to create a user +input and to send the events to {es}. You therefore need to create a user that {ls} can use to communicate with {es}. For example: [role="screenshot"] @@ -275,7 +275,7 @@ configuration. To learn more, see {logstash-ref}/keystore.html[Secrets keystore for secure settings]. You can now use these `ES_USER` and `ES_PWD` keys in your configuration -file. For example, add the `user` and `password` settings in the +file. For example, add the `user` and `password` settings in the `demo-metrics-pipeline.conf` file as follows: [source,ruby] diff --git a/x-pack/docs/en/security/limitations.asciidoc b/x-pack/docs/en/security/limitations.asciidoc index 69f6db27701c..13caca5567ce 100644 --- a/x-pack/docs/en/security/limitations.asciidoc +++ b/x-pack/docs/en/security/limitations.asciidoc @@ -86,7 +86,7 @@ confidential or sensitive information. === LDAP realm The <> does not currently support the discovery of nested -LDAP Groups. For example, if a user is a member of `group_1` and `group_1` is a +LDAP Groups. For example, if a user is a member of `group_1` and `group_1` is a member of `group_2`, only `group_1` will be discovered. However, the <> *does* support transitive group membership. diff --git a/x-pack/docs/en/security/securing-communications/security-basic-setup.asciidoc b/x-pack/docs/en/security/securing-communications/security-basic-setup.asciidoc index c05c1a20fd9a..a78ff8cd11c5 100644 --- a/x-pack/docs/en/security/securing-communications/security-basic-setup.asciidoc +++ b/x-pack/docs/en/security/securing-communications/security-basic-setup.asciidoc @@ -116,7 +116,7 @@ update your cluster to use these files. NOTE: Complete the following steps for each node in your cluster. To join the same cluster, all nodes must share the same `cluster.name` value. -. Open the `ES_PATH_CONF/elasticsearch.yml` file and make the following +. Open the `ES_PATH_CONF/elasticsearch.yml` file and make the following changes: a. Add the `cluster-name` setting and enter a name for your cluster: @@ -147,7 +147,7 @@ xpack.security.transport.ssl.keystore.path: .p12 xpack.security.transport.ssl.truststore.path: .p12 ---- -. If you entered a password when creating the node certificate, run the following commands to store the password in the {es} keystore: +. If you entered a password when creating the node certificate, run the following commands to store the password in the {es} keystore: + -- [source,shell] @@ -161,9 +161,9 @@ xpack.security.transport.ssl.truststore.path: .p12 ---- -- -. Complete the previous steps for each node in your cluster. +. Complete the previous steps for each node in your cluster. -. Restart {es}. The method for <> and <> {es} varies depending on how you installed it. +. Restart {es}. The method for <> and <> {es} varies depending on how you installed it. + For example, if you installed {es} with an archive distribution (`tar.gz` or `.zip`), you can enter `Ctrl+C` on the command line to stop diff --git a/x-pack/docs/en/security/troubleshooting.asciidoc b/x-pack/docs/en/security/troubleshooting.asciidoc index b442146e3754..a4d9b47aace0 100644 --- a/x-pack/docs/en/security/troubleshooting.asciidoc +++ b/x-pack/docs/en/security/troubleshooting.asciidoc @@ -91,7 +91,7 @@ this error. |_group identification_ | Groups are located by either an LDAP search or by the "memberOf" attribute on -the user. Also, If subtree search is turned off, it will search only one +the user. Also, If subtree search is turned off, it will search only one level deep. For all the options, see <>. There are many options here and sticking to the defaults will not work for all scenarios. diff --git a/x-pack/docs/en/watcher/actions/index.asciidoc b/x-pack/docs/en/watcher/actions/index.asciidoc index 78c99a9797cb..da6fd9e7f837 100644 --- a/x-pack/docs/en/watcher/actions/index.asciidoc +++ b/x-pack/docs/en/watcher/actions/index.asciidoc @@ -55,7 +55,7 @@ The following snippet shows a simple `index` action definition: | `timeout` | no | 60s | The timeout for waiting for the index api call to return. If no response is returned within this time, the index action times out and fails. This setting - overrides the default timeouts. + overrides the default timeouts. | `refresh` | no | - | Optional setting of the {ref}/docs-refresh.html[refresh policy] for the write request diff --git a/x-pack/docs/en/watcher/actions/jira.asciidoc b/x-pack/docs/en/watcher/actions/jira.asciidoc index 318eb3c251bd..db686ae55503 100644 --- a/x-pack/docs/en/watcher/actions/jira.asciidoc +++ b/x-pack/docs/en/watcher/actions/jira.asciidoc @@ -5,7 +5,7 @@ Jira action ++++ -Use the `jira` action to create issues in https://www.atlassian.com/software/jira[Atlassian's Jira Software]. +Use the `jira` action to create issues in https://www.atlassian.com/software/jira[Atlassian's Jira Software]. To create issues you need to <> in `elasticsearch.yml`. [[configuring-jira-actions]] diff --git a/x-pack/docs/en/watcher/actions/slack.asciidoc b/x-pack/docs/en/watcher/actions/slack.asciidoc index b4f72422529a..df135f3d3fdc 100644 --- a/x-pack/docs/en/watcher/actions/slack.asciidoc +++ b/x-pack/docs/en/watcher/actions/slack.asciidoc @@ -149,7 +149,7 @@ aggregation and the Slack action: |====== | Name |Required | Description -| `message.from` | no | The sender name to display in the Slack message. +| `message.from` | no | The sender name to display in the Slack message. Overrides the incoming webhook's configured name. | `message.to` | yes | The channels and users you want to send the message diff --git a/x-pack/docs/en/watcher/how-watcher-works.asciidoc b/x-pack/docs/en/watcher/how-watcher-works.asciidoc index a82e10458dff..ed6e49b72e9c 100644 --- a/x-pack/docs/en/watcher/how-watcher-works.asciidoc +++ b/x-pack/docs/en/watcher/how-watcher-works.asciidoc @@ -143,7 +143,7 @@ bound to the watcher lifecycle runs. Even though all primaries and replicas are taken into account, when a watch is triggered, watcher also ensures, that each watch is only triggered on one of those shards. The more replica shards you add, the more distributed the watches can be executed. If you add or remove -replicas, all watches need to be reloaded. If a shard is relocated, the +replicas, all watches need to be reloaded. If a shard is relocated, the primary and all replicas of this particular shard will reload. Because the watches are executed on the node, where the watch shards are, you can create diff --git a/x-pack/docs/en/watcher/input/http.asciidoc b/x-pack/docs/en/watcher/input/http.asciidoc index 47edd378ca94..023884dd8e53 100644 --- a/x-pack/docs/en/watcher/input/http.asciidoc +++ b/x-pack/docs/en/watcher/input/http.asciidoc @@ -164,7 +164,7 @@ and restrict the results to documents added within the last five minutes: ==== Accessing the HTTP response -If the response body is formatted in JSON or YAML, it is parsed and loaded into +If the response body is formatted in JSON or YAML, it is parsed and loaded into the execution context. If the response body is not formatted in JSON or YAML, it is loaded into the payload's `_value` field. diff --git a/x-pack/plugin/identity-provider/docs/en/rest-api/idp-saml-init.asciidoc b/x-pack/plugin/identity-provider/docs/en/rest-api/idp-saml-init.asciidoc index fb79af551967..561b5abd8a66 100644 --- a/x-pack/plugin/identity-provider/docs/en/rest-api/idp-saml-init.asciidoc +++ b/x-pack/plugin/identity-provider/docs/en/rest-api/idp-saml-init.asciidoc @@ -103,7 +103,7 @@ code indicating that the authentication request failed and the reason for that f `urn:oasis:names:tc:SAML:2.0:status:Requester` indicates that the error is on the side of the SP or the user, while a `saml_status` of `urn:oasis:names:tc:SAML:2.0:status:Responder` indicates that something went wrong in the IDP side. The `error` field contains a short human friendly interpretation of the error that is outside the SAML standard and is meant to be communicated to the user, especially -if the user is not redirected back the SP with the `saml_response` +if the user is not redirected back the SP with the `saml_response` [source, console-result] --------------------------------------------------------------------