Use flattened names in ignored source (#115822)

* Use flattened names in ignored source

* spotless

* fix rest compat

* fix unittests

* expand dots
This commit is contained in:
Kostas Krikellas 2024-10-29 21:12:43 +02:00 committed by GitHub
parent 853f51fa05
commit 06eb0727c2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 211 additions and 17 deletions

View file

@ -59,6 +59,10 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling with vector_operations_count") task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling with vector_operations_count")
task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility") task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility")
task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility") task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility")
task.skipTest("indices.create/20_synthetic_source/object with dynamic override", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/object with unmapped fields", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/empty object with unmapped fields", "temporary until backported")
task.skipTest("indices.create/20_synthetic_source/nested object with unmapped fields", "temporary until backported")
task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "temporary until backported") task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "temporary until backported")
task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy") task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy")
}) })

View file

@ -1,6 +1,6 @@
object with unmapped fields: object with unmapped fields:
- requires: - requires:
cluster_features: ["mapper.track_ignored_source"] cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"]
reason: requires tracking ignored source reason: requires tracking ignored source
- do: - do:
@ -41,13 +41,13 @@ object with unmapped fields:
- match: { hits.hits.0._source.some_string: AaAa } - match: { hits.hits.0._source.some_string: AaAa }
- match: { hits.hits.0._source.some_int: 1000 } - match: { hits.hits.0._source.some_int: 1000 }
- match: { hits.hits.0._source.some_double: 123.456789 } - match: { hits.hits.0._source.some_double: 123.456789 }
- match: { hits.hits.0._source.a.very.deeply.nested.field: AAAA } - match: { hits.hits.0._source.a: { very.deeply.nested.field: AAAA } }
- match: { hits.hits.0._source.some_bool: true } - match: { hits.hits.0._source.some_bool: true }
- match: { hits.hits.1._source.name: bbbb } - match: { hits.hits.1._source.name: bbbb }
- match: { hits.hits.1._source.some_string: BbBb } - match: { hits.hits.1._source.some_string: BbBb }
- match: { hits.hits.1._source.some_int: 2000 } - match: { hits.hits.1._source.some_int: 2000 }
- match: { hits.hits.1._source.some_double: 321.987654 } - match: { hits.hits.1._source.some_double: 321.987654 }
- match: { hits.hits.1._source.a.very.deeply.nested.field: BBBB } - match: { hits.hits.1._source.a: { very.deeply.nested.field: BBBB } }
--- ---
@ -100,7 +100,7 @@ unmapped arrays:
--- ---
nested object with unmapped fields: nested object with unmapped fields:
- requires: - requires:
cluster_features: ["mapper.track_ignored_source"] cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"]
reason: requires tracking ignored source reason: requires tracking ignored source
- do: - do:
@ -143,16 +143,16 @@ nested object with unmapped fields:
- match: { hits.total.value: 2 } - match: { hits.total.value: 2 }
- match: { hits.hits.0._source.path.to.name: aaaa } - match: { hits.hits.0._source.path.to.name: aaaa }
- match: { hits.hits.0._source.path.to.surname: AaAa } - match: { hits.hits.0._source.path.to.surname: AaAa }
- match: { hits.hits.0._source.path.some.other.name: AaAaAa } - match: { hits.hits.0._source.path.some.other\.name: AaAaAa }
- match: { hits.hits.1._source.path.to.name: bbbb } - match: { hits.hits.1._source.path.to.name: bbbb }
- match: { hits.hits.1._source.path.to.surname: BbBb } - match: { hits.hits.1._source.path.to.surname: BbBb }
- match: { hits.hits.1._source.path.some.other.name: BbBbBb } - match: { hits.hits.1._source.path.some.other\.name: BbBbBb }
--- ---
empty object with unmapped fields: empty object with unmapped fields:
- requires: - requires:
cluster_features: ["mapper.track_ignored_source"] cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"]
reason: requires tracking ignored source reason: requires tracking ignored source
- do: - do:
@ -191,7 +191,7 @@ empty object with unmapped fields:
- match: { hits.total.value: 1 } - match: { hits.total.value: 1 }
- match: { hits.hits.0._source.path.to.surname: AaAa } - match: { hits.hits.0._source.path.to.surname: AaAa }
- match: { hits.hits.0._source.path.some.other.name: AaAaAa } - match: { hits.hits.0._source.path.some.other\.name: AaAaAa }
--- ---
@ -434,7 +434,7 @@ mixed disabled and enabled objects:
--- ---
object with dynamic override: object with dynamic override:
- requires: - requires:
cluster_features: ["mapper.ignored_source.dont_expand_dots"] cluster_features: ["mapper.ignored_source.dont_expand_dots", "mapper.bwc_workaround_9_0"]
reason: requires tracking ignored source reason: requires tracking ignored source
- do: - do:
@ -475,7 +475,7 @@ object with dynamic override:
- match: { hits.hits.0._source.path_no.to: { a.very.deeply.nested.field: A } } - match: { hits.hits.0._source.path_no.to: { a.very.deeply.nested.field: A } }
- match: { hits.hits.0._source.path_runtime.name: bar } - match: { hits.hits.0._source.path_runtime.name: bar }
- match: { hits.hits.0._source.path_runtime.some_int: 20 } - match: { hits.hits.0._source.path_runtime.some_int: 20 }
- match: { hits.hits.0._source.path_runtime.to.a.very.deeply.nested.field: B } - match: { hits.hits.0._source.path_runtime.to: { a.very.deeply.nested.field: B } }
--- ---

View file

@ -1249,3 +1249,52 @@ index param - nested object with stored array:
- match: { hits.hits.1._source.nested.0.b.1.c: 300 } - match: { hits.hits.1._source.nested.0.b.1.c: 300 }
- match: { hits.hits.1._source.nested.1.b.0.c: 40 } - match: { hits.hits.1._source.nested.1.b.0.c: 40 }
- match: { hits.hits.1._source.nested.1.b.1.c: 400 } - match: { hits.hits.1._source.nested.1.b.1.c: 400 }
---
index param - flattened fields:
- requires:
cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"]
reason: requires keeping array source
- do:
indices.create:
index: test
body:
settings:
index:
mapping:
synthetic_source_keep: arrays
mappings:
_source:
mode: synthetic
properties:
name:
type: keyword
outer:
properties:
inner:
type: object
- do:
bulk:
index: test
refresh: true
body:
- '{ "create": { } }'
- '{ "name": "A", "outer": { "inner": [ { "a.b": "AA", "a.c": "AAA" } ] } }'
- '{ "create": { } }'
- '{ "name": "B", "outer": { "inner": [ { "a.x.y.z": "BB", "a.z.y.x": "BBB" } ] } }'
- match: { errors: false }
- do:
search:
index: test
sort: name
- match: { hits.total.value: 2 }
- match: { hits.hits.0._source.name: A }
- match: { hits.hits.0._source.outer.inner: [{ a.b: AA, a.c: AAA }] }
- match: { hits.hits.1._source.name: B }
- match: { hits.hits.1._source.outer.inner: [{ a.x.y.z: BB, a.z.y.x: BBB }] }

View file

@ -528,11 +528,7 @@ public abstract class DocumentParserContext {
if (canAddIgnoredField()) { if (canAddIgnoredField()) {
try { try {
addIgnoredField( addIgnoredField(
IgnoredSourceFieldMapper.NameValue.fromContext( IgnoredSourceFieldMapper.NameValue.fromContext(this, mapper.fullPath(), encodeFlattenedToken())
this,
mapper.fullPath(),
XContentDataHelper.encodeToken(parser())
)
); );
} catch (IOException e) { } catch (IOException e) {
throw new IllegalArgumentException("failed to parse field [" + mapper.fullPath() + " ]", e); throw new IllegalArgumentException("failed to parse field [" + mapper.fullPath() + " ]", e);

View file

@ -34,6 +34,10 @@ import java.util.function.Supplier;
*/ */
class DotExpandingXContentParser extends FilterXContentParserWrapper { class DotExpandingXContentParser extends FilterXContentParserWrapper {
static boolean isInstance(XContentParser parser) {
return parser instanceof WrappingParser;
}
private static final class WrappingParser extends FilterXContentParser { private static final class WrappingParser extends FilterXContentParser {
private final ContentPath contentPath; private final ContentPath contentPath;

View file

@ -221,8 +221,11 @@ public final class XContentDataHelper {
private static Tuple<XContentParserConfiguration, XContentBuilder> cloneSubContextParserConfiguration(DocumentParserContext context) private static Tuple<XContentParserConfiguration, XContentBuilder> cloneSubContextParserConfiguration(DocumentParserContext context)
throws IOException { throws IOException {
XContentParser parser = context.parser(); XContentParser parser = context.parser();
var oldValue = context.path().isWithinLeafObject();
context.path().setWithinLeafObject(true);
XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent()); XContentBuilder builder = XContentBuilder.builder(parser.contentType().xContent());
builder.copyCurrentStructure(parser); builder.copyCurrentStructure(parser);
context.path().setWithinLeafObject(oldValue);
XContentParserConfiguration configuration = XContentParserConfiguration.EMPTY.withRegistry(parser.getXContentRegistry()) XContentParserConfiguration configuration = XContentParserConfiguration.EMPTY.withRegistry(parser.getXContentRegistry())
.withDeprecationHandler(parser.getDeprecationHandler()) .withDeprecationHandler(parser.getDeprecationHandler())
@ -235,9 +238,17 @@ public final class XContentDataHelper {
XContentParserConfiguration configuration, XContentParserConfiguration configuration,
XContentBuilder builder XContentBuilder builder
) throws IOException { ) throws IOException {
DocumentParserContext subcontext = context.switchParser( XContentParser newParser = XContentHelper.createParserNotCompressed(
XContentHelper.createParserNotCompressed(configuration, BytesReference.bytes(builder), context.parser().contentType()) configuration,
BytesReference.bytes(builder),
context.parser().contentType()
); );
if (DotExpandingXContentParser.isInstance(context.parser())) {
// If we performed dot expanding originally we need to continue to do so when we replace the parser.
newParser = DotExpandingXContentParser.expandDots(newParser, context.path());
}
DocumentParserContext subcontext = context.switchParser(newParser);
subcontext.setRecordedSource(); // Avoids double-storing parts of the source for the same parser subtree. subcontext.setRecordedSource(); // Avoids double-storing parts of the source for the same parser subtree.
subcontext.parser().nextToken(); subcontext.parser().nextToken();
return subcontext; return subcontext;

View file

@ -2075,6 +2075,136 @@ public class IgnoredSourceFieldMapperTests extends MapperServiceTestCase {
{"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray); {"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray);
} }
public void testRegularObjectWithFlatFields() throws IOException {
DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> {
b.startObject("top").field("type", "object").field("synthetic_source_keep", "all").endObject();
})).documentMapper();
CheckedConsumer<XContentBuilder, IOException> document = b -> {
b.startObject("top");
b.field("file.name", "A");
b.field("file.line", 10);
b.endObject();
};
var syntheticSource = syntheticSource(documentMapper, document);
assertEquals("{\"top\":{\"file.name\":\"A\",\"file.line\":10}}", syntheticSource);
CheckedConsumer<XContentBuilder, IOException> documentWithArray = b -> {
b.startArray("top");
b.startObject();
b.field("file.name", "A");
b.field("file.line", 10);
b.endObject();
b.startObject();
b.field("file.name", "B");
b.field("file.line", 20);
b.endObject();
b.endArray();
};
var syntheticSourceWithArray = syntheticSource(documentMapper, documentWithArray);
assertEquals("""
{"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray);
}
public void testRegularObjectWithFlatFieldsInsideAnArray() throws IOException {
DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> {
b.startObject("top");
b.startObject("properties");
{
b.startObject("inner").field("type", "object").field("synthetic_source_keep", "all").endObject();
}
b.endObject();
b.endObject();
})).documentMapper();
CheckedConsumer<XContentBuilder, IOException> document = b -> {
b.startArray("top");
b.startObject();
{
b.startObject("inner");
b.field("file.name", "A");
b.field("file.line", 10);
b.endObject();
}
b.endObject();
b.endArray();
};
var syntheticSource = syntheticSource(documentMapper, document);
assertEquals("{\"top\":{\"inner\":{\"file.name\":\"A\",\"file.line\":10}}}", syntheticSource);
CheckedConsumer<XContentBuilder, IOException> documentWithArray = b -> {
b.startArray("top");
b.startObject();
{
b.startObject("inner");
b.field("file.name", "A");
b.field("file.line", 10);
b.endObject();
}
b.endObject();
b.startObject();
{
b.startObject("inner");
b.field("file.name", "B");
b.field("file.line", 20);
b.endObject();
}
b.endObject();
b.endArray();
};
var syntheticSourceWithArray = syntheticSource(documentMapper, documentWithArray);
assertEquals("""
{"top":{"inner":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}}""", syntheticSourceWithArray);
}
public void testIgnoredDynamicObjectWithFlatFields() throws IOException {
var syntheticSource = getSyntheticSourceWithFieldLimit(b -> {
b.startObject("top");
b.field("file.name", "A");
b.field("file.line", 10);
b.endObject();
});
assertEquals("{\"top\":{\"file.name\":\"A\",\"file.line\":10}}", syntheticSource);
var syntheticSourceWithArray = getSyntheticSourceWithFieldLimit(b -> {
b.startArray("top");
b.startObject();
b.field("file.name", "A");
b.field("file.line", 10);
b.endObject();
b.startObject();
b.field("file.name", "B");
b.field("file.line", 20);
b.endObject();
b.endArray();
});
assertEquals("""
{"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray);
}
public void testStoredArrayWithFlatFields() throws IOException {
DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(syntheticSourceMapping(b -> {
b.startObject("outer").startObject("properties");
{
b.startObject("inner").field("type", "object").endObject();
}
b.endObject().endObject();
})).documentMapper();
var syntheticSource = syntheticSource(documentMapper, b -> {
b.startObject("outer").startArray("inner");
{
b.startObject().field("a.b", "a.b").field("a.c", "a.c").endObject();
}
b.endArray().endObject();
});
assertEquals("""
{"outer":{"inner":[{"a.b":"a.b","a.c":"a.c"}]}}""", syntheticSource);
}
protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader)
throws IOException { throws IOException {
// We exclude ignored source field since in some cases it contains an exact copy of a part of document source. // We exclude ignored source field since in some cases it contains an exact copy of a part of document source.