From 32d2f60f8aec4d2a2291c67a508c9796cb74bf82 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 10 Aug 2021 14:07:53 +0200 Subject: [PATCH] Emit multiple fields from a runtime field script (#75108) We have recently introduced support for grok and dissect to the runtime fields Painless context that allows to split a field into multiple fields. However, each runtime field can only emit values for a single field. This commit introduces support for emitting multiple fields from the same script. The API call to define a runtime field that emits multiple fields is the following: ``` PUT localhost:9200/logs/_mappings { "runtime" : { "log" : { "type" : "composite", "script" : "emit(grok(\"%{COMMONAPACHELOG}\").extract(doc[\"message.keyword\"].value))", "fields" : { "clientip" : { "type" : "ip" }, "response" : { "type" : "long" } } } } } ``` The script context for this new field type accepts two emit signatures: * `emit(String, Object)` * `emit(Map)` Sub-fields need to be declared under fields in order to be discoverable through the field_caps API and accessible through the search API. The way that it emits multiple fields is by returning multiple MappedFieldTypes from RuntimeField#asMappedFieldTypes. The sub-fields are instances of the runtime fields that are already supported, with a little tweak to adapt the script defined by their parent to an artificial script factory for each of the sub-fields that makes its corresponding sub-field accessible. This approach allows to reuse all of the existing runtime fields code for the sub-fields. The runtime section has been flat so far as it has not supported objects until now. That stays the same, meaning that runtime fields can have dots in their names. Because there are though two ways to create the same field with the introduction of the ability to emit multiple fields, we have to make sure that a runtime field with a certain name cannot be defined twice, which is why the following mappings are rejected with the error `Found two runtime fields with same name [log.response]`: ``` PUT localhost:9200/logs/_mappings { "runtime" : { "log.response" : { "type" : "keyword" }, "log" : { "type" : "composite", "script" : "emit(\"response\", grok(\"%{COMMONAPACHELOG}\").extract(doc[\"message.keyword\"].value)?.response)", "fields" : { "response" : { "type" : "long" } } } } } ``` Closes #68203 --- .../action/PainlessExecuteAction.java | 11 +- ...g.elasticsearch.script.composite_field.txt | 21 + .../action/PainlessExecuteApiTests.java | 14 + .../test/runtime_fields/110_composite.yml | 104 +++++ .../index/mapper/AbstractScriptFieldType.java | 44 +- .../index/mapper/BooleanScriptFieldType.java | 15 +- .../index/mapper/CompositeRuntimeField.java | 134 ++++++ .../index/mapper/DateScriptFieldType.java | 18 +- .../index/mapper/DoubleScriptFieldType.java | 14 +- .../index/mapper/DynamicFieldsBuilder.java | 4 +- .../index/mapper/FieldMapper.java | 2 +- .../index/mapper/GeoPointScriptFieldType.java | 15 +- .../index/mapper/IpScriptFieldType.java | 14 +- .../index/mapper/KeywordScriptFieldType.java | 14 +- .../index/mapper/LeafRuntimeField.java | 15 +- .../index/mapper/LongScriptFieldType.java | 14 +- .../index/mapper/MappingLookup.java | 4 +- .../index/mapper/RootObjectMapper.java | 7 +- .../index/mapper/RuntimeField.java | 61 ++- .../index/query/SearchExecutionContext.java | 8 +- .../elasticsearch/indices/IndicesModule.java | 16 +- .../script/AbstractFieldScript.java | 12 +- .../script/BooleanFieldScript.java | 21 + .../script/CompositeFieldScript.java | 116 +++++ .../elasticsearch/script/DateFieldScript.java | 21 + .../script/DoubleFieldScript.java | 21 + .../script/GeoPointFieldScript.java | 21 + .../elasticsearch/script/IpFieldScript.java | 21 + .../elasticsearch/script/LongFieldScript.java | 21 + .../elasticsearch/script/ScriptModule.java | 11 +- .../script/StringFieldScript.java | 21 + .../mapper/CompositeRuntimeFieldTests.java | 438 ++++++++++++++++++ .../index/mapper/DocumentParserTests.java | 16 +- .../index/mapper/TestRuntimeField.java | 5 +- .../script/MockScriptEngine.java | 9 + 35 files changed, 1237 insertions(+), 66 deletions(-) create mode 100644 modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.composite_field.txt create mode 100644 modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/110_composite.yml create mode 100644 server/src/main/java/org/elasticsearch/index/mapper/CompositeRuntimeField.java create mode 100644 server/src/main/java/org/elasticsearch/script/CompositeFieldScript.java create mode 100644 server/src/test/java/org/elasticsearch/index/mapper/CompositeRuntimeFieldTests.java diff --git a/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java b/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java index 7b72b8cab142..6899d58b17cf 100644 --- a/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java +++ b/modules/lang-painless/src/main/java/org/elasticsearch/painless/action/PainlessExecuteAction.java @@ -68,6 +68,7 @@ import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; import org.elasticsearch.rest.action.RestToXContentListener; import org.elasticsearch.script.BooleanFieldScript; +import org.elasticsearch.script.CompositeFieldScript; import org.elasticsearch.script.DateFieldScript; import org.elasticsearch.script.DocValuesDocReader; import org.elasticsearch.script.DoubleFieldScript; @@ -624,12 +625,20 @@ public class PainlessExecuteAction extends ActionType { StringFieldScript.Factory factory = scriptService.compile(request.script, StringFieldScript.CONTEXT); StringFieldScript.LeafFactory leafFactory = - factory.newFactory(StringFieldScript.CONTEXT.name, request.getScript().getParams(), context.lookup()); + factory.newFactory(StringFieldScript.CONTEXT.name, request.getScript().getParams(), context.lookup()); StringFieldScript stringFieldScript = leafFactory.newInstance(leafReaderContext); List keywords = new ArrayList<>(); stringFieldScript.runForDoc(0, keywords::add); return new Response(keywords); }, indexService); + } else if (scriptContext == CompositeFieldScript.CONTEXT) { + return prepareRamIndex(request, (context, leafReaderContext) -> { + CompositeFieldScript.Factory factory = scriptService.compile(request.script, CompositeFieldScript.CONTEXT); + CompositeFieldScript.LeafFactory leafFactory = + factory.newFactory(CompositeFieldScript.CONTEXT.name, request.getScript().getParams(), context.lookup()); + CompositeFieldScript compositeFieldScript = leafFactory.newInstance(leafReaderContext); + return new Response(compositeFieldScript.runForDoc(0)); + }, indexService); } else { throw new UnsupportedOperationException("unsupported context [" + scriptContext.name + "]"); } diff --git a/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.composite_field.txt b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.composite_field.txt new file mode 100644 index 000000000000..b5c499abb877 --- /dev/null +++ b/modules/lang-painless/src/main/resources/org/elasticsearch/painless/org.elasticsearch.script.composite_field.txt @@ -0,0 +1,21 @@ +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the Server Side Public License, v 1; you may not use this file except +# in compliance with, at your election, the Elastic License 2.0 or the Server +# Side Public License, v 1. +# + +# The whitelist for composite runtime fields + +# These two whitelists are required for painless to find the classes +class org.elasticsearch.script.CompositeFieldScript @no_import { +} +class org.elasticsearch.script.CompositeFieldScript$Factory @no_import { +} + +static_import { + # The `emit` callback to collect values for the fields + void emit(org.elasticsearch.script.CompositeFieldScript, String, Object) bound_to org.elasticsearch.script.CompositeFieldScript$EmitField + void emit(org.elasticsearch.script.CompositeFieldScript, Map) bound_to org.elasticsearch.script.CompositeFieldScript$EmitMap +} diff --git a/modules/lang-painless/src/test/java/org/elasticsearch/painless/action/PainlessExecuteApiTests.java b/modules/lang-painless/src/test/java/org/elasticsearch/painless/action/PainlessExecuteApiTests.java index a91cd4858dff..1ea75f511a1d 100644 --- a/modules/lang-painless/src/test/java/org/elasticsearch/painless/action/PainlessExecuteApiTests.java +++ b/modules/lang-painless/src/test/java/org/elasticsearch/painless/action/PainlessExecuteApiTests.java @@ -288,6 +288,20 @@ public class PainlessExecuteApiTests extends ESSingleNodeTestCase { assertEquals(Arrays.asList("test", "baz was not here", "Data", "-10", "20", "9"), response.getResult()); } + public void testCompositeExecutionContext() throws IOException { + ScriptService scriptService = getInstanceFromNode(ScriptService.class); + IndexService indexService = createIndex("index", Settings.EMPTY, "doc", "rank", "type=long", "text", "type=keyword"); + + Request.ContextSetup contextSetup = new Request.ContextSetup("index", new BytesArray("{}"), new MatchAllQueryBuilder()); + contextSetup.setXContentType(XContentType.JSON); + Request request = new Request(new Script(ScriptType.INLINE, "painless", + "emit(\"foo\", \"bar\"); emit(\"foo2\", 2);", emptyMap()), "composite_field", contextSetup); + Response response = innerShardOperation(request, scriptService, indexService); + assertEquals(Map.of( + "composite_field.foo", List.of("bar"), + "composite_field.foo2", List.of(2)), response.getResult()); + } + public void testContextWhitelists() throws IOException { ScriptService scriptService = getInstanceFromNode(ScriptService.class); // score diff --git a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/110_composite.yml b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/110_composite.yml new file mode 100644 index 000000000000..2c80545050bd --- /dev/null +++ b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/110_composite.yml @@ -0,0 +1,104 @@ +--- +setup: + - do: + indices.create: + index: http_logs + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + runtime: + http: + type: composite + script: + source: | + emit(grok('%{COMMONAPACHELOG}').extract(doc["message"].value)); + fields: + clientip: + type: ip + verb: + type: keyword + response: + type: long + properties: + timestamp: + type: date + message: + type: keyword + - do: + bulk: + index: http_logs + refresh: true + body: | + {"index":{}} + {"timestamp": "1998-04-30T14:30:17-05:00", "message" : "40.135.0.0 - - [30/Apr/1998:14:30:17 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} + {"index":{}} + {"timestamp": "1998-04-30T14:30:53-05:00", "message" : "232.0.0.0 - - [30/Apr/1998:14:30:53 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} + {"index":{}} + {"timestamp": "1998-04-30T14:31:12-05:00", "message" : "26.1.0.0 - - [30/Apr/1998:14:31:12 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} + {"index":{}} + {"timestamp": "1998-04-30T14:31:19-05:00", "message" : "247.37.0.0 - - [30/Apr/1998:14:31:19 -0500] \"GET /french/splash_inet.html HTTP/1.0\" 200 3781"} + {"index":{}} + {"timestamp": "1998-04-30T14:31:22-05:00", "message" : "247.37.0.0 - - [30/Apr/1998:14:31:22 -0500] \"GET /images/hm_nbg.jpg HTTP/1.0\" 304 0"} + {"index":{}} + {"timestamp": "1998-04-30T14:31:27-05:00", "message" : "252.0.0.0 - - [30/Apr/1998:14:31:27 -0500] \"GET /images/hm_bg.jpg HTTP/1.0\" 200 24736"} + {"index":{}} + {"timestamp": "1998-04-30T14:31:28-05:00", "message" : "not a valid apache log"} + +--- +fetch: + - do: + search: + index: http_logs + body: + sort: timestamp + fields: + - http.clientip + - http.verb + - http.response + - match: {hits.total.value: 7} + - match: {hits.hits.0.fields.http\.clientip: [40.135.0.0] } + - match: {hits.hits.0.fields.http\.verb: [GET] } + - match: {hits.hits.0.fields.http\.response: [200] } + - is_false: hits.hits.6.fields.http\.clientip + - is_false: hits.hits.6.fields.http\.verb + - is_false: hits.hits.6.fields.http\.response + +--- +query: + - do: + search: + index: http_logs + body: + query: + term: + http.verb: GET + - match: { hits.total.value: 6 } + + - do: + search: + index: http_logs + body: + query: + range: + http.clientip: + from: 232.0.0.0 + to: 253.0.0.0 + - match: { hits.total.value: 4 } + +--- +"terms agg": + - do: + search: + index: http_logs + body: + aggs: + response: + terms: + field: http.response + - match: {hits.total.value: 7} + - match: {aggregations.response.buckets.0.key: 200 } + - match: {aggregations.response.buckets.0.doc_count: 5 } + - match: {aggregations.response.buckets.1.key: 304 } + - match: {aggregations.response.buckets.1.doc_count: 1 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java index 4bd34abceb59..35146ebca63e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldType.java @@ -18,9 +18,9 @@ import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.time.DateMathParser; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.script.CompositeFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptContext; -import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.lookup.SearchLookup; import java.time.ZoneId; @@ -194,31 +194,49 @@ abstract class AbstractScriptFieldType extends MappedFieldType { abstract static class Builder extends RuntimeField.Builder { private final ScriptContext scriptContext; - private final Factory parseFromSourceFactory; final FieldMapper.Parameter