elasticsearch/rest-api-spec/build.gradle
Martijn van Groningen 43665f0a35
Store arrays offsets for keyword fields natively with synthetic source (#113757)
The keyword doc values field gets an extra sorted doc values field, that encodes the order of how array values were specified at index time. This also captures duplicate values. This is stored in an offset to ordinal array that gets zigzag vint encoded into a sorted doc values field.

For example, in case of the following string array for a keyword field: ["c", "b", "a", "c"].
Sorted set doc values: ["a", "b", "c"] with ordinals: 0, 1 and 2. The offset array will be: [2, 1, 0, 2]

Null values are also supported. For example ["c", "b", null, "c"] results into sorted set doc values: ["b", "c"] with ordinals: 0 and 1. The offset array will be: [1, 0, -1, 1]

Empty arrays are also supported by encoding a zigzag vint array of zero elements.

Limitations:

currently only doc values based array support for keyword field mapper.
multi level leaf arrays are flattened. For example: [[b], [c]] -> [b, c]
arrays are always synthesized as one type. In case of keyword field, [1, 2] gets synthesized as ["1", "2"].
These limitations can be addressed, but some require more complexity and or additional storage.

With this PR, keyword field array will no longer be stored in ignored source, but array offsets are kept track of in an adjacent sorted doc value field. This only applies if index.mapping.synthetic_source_keep is set to arrays (default for logsdb).
2025-02-20 09:20:49 +01:00

90 lines
5.4 KiB
Groovy

apply plugin: 'elasticsearch.build'
apply plugin: 'elasticsearch.publish'
apply plugin: 'elasticsearch.rest-resources'
apply plugin: 'elasticsearch.validate-rest-spec'
apply plugin: 'elasticsearch.internal-yaml-rest-test'
apply plugin: 'elasticsearch.yaml-rest-compat-test'
apply plugin: 'elasticsearch.internal-test-artifact'
restResources {
restTests {
includeCore '*'
}
}
// REST API specifications are published under the Apache 2.0 License
ext.projectLicenses.set(['The Apache Software License, Version 2.0': providers.provider(() -> 'http://www.apache.org/licenses/LICENSE-2.0')])
licenseFile.set(rootProject.file('licenses/APACHE-LICENSE-2.0.txt'))
configurations {
// configuration to make use by external yaml rest test plugin in our examples
// easy and efficient
basicRestSpecs {
attributes {
attribute(ArtifactTypeDefinition.ARTIFACT_TYPE_ATTRIBUTE, ArtifactTypeDefinition.DIRECTORY_TYPE)
}
}
}
artifacts {
basicRestSpecs(new File(projectDir, "src/main/resources"))
restSpecs(new File(projectDir, "src/main/resources/rest-api-spec/api"))
restTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test"))
}
dependencies {
clusterModules project(":modules:mapper-extras")
clusterModules project(":modules:rest-root")
clusterModules project(":modules:reindex")
clusterModules project(':modules:analysis-common')
clusterModules project(':modules:health-shards-availability')
clusterModules project(':modules:data-streams')
}
tasks.register('enforceYamlTestConvention').configure {
def tree = fileTree('src/main/resources/rest-api-spec/test')
doLast {
if (tree.files) {
throw new GradleException("There are YAML tests in src/main source set. These should be moved to src/yamlRestTest.")
}
}
}
tasks.named("precommit").configure {
dependsOn 'enforceYamlTestConvention'
}
tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling")
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling with vector_operations_count")
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.collector.0.name", "TopScoreDocCollector", "dfs knn vector profiling")
task.replaceValueInMatch("profile.shards.0.dfs.knn.0.collector.0.name", "TopScoreDocCollector", "dfs knn vector profiling with vector_operations_count")
task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy")
task.skipTest("cat.shards/10_basic/Help", "sync_id is removed in 9.0")
task.skipTest("search/500_date_range/from, to, include_lower, include_upper deprecated", "deprecated parameters are removed in 9.0")
task.skipTest("search.highlight/30_max_analyzed_offset/Plain highlighter with max_analyzed_offset < 0 should FAIL", "semantics of test has changed")
task.skipTest("range/20_synthetic_source/Double range", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("range/20_synthetic_source/Float range", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("range/20_synthetic_source/Integer range", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("range/20_synthetic_source/IP range", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("range/20_synthetic_source/Long range", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("range/20_synthetic_source/Date range Rounding Fixes", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("index/92_metrics_auto_subobjects/Metrics object indexing with synthetic source", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("index/92_metrics_auto_subobjects/Root without subobjects with synthetic source", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("index/91_metrics_no_subobjects/Metrics object indexing with synthetic source", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("index/91_metrics_no_subobjects/Root without subobjects with synthetic source", "_source.mode mapping attribute is no-op since 9.0.0")
task.skipTest("indices.create/20_synthetic_source/synthetic_source with copy_to inside nested object", "temporary until backported")
task.skipTest(
"cluster.desired_nodes/10_basic/Test delete desired nodes with node_version generates a warning",
"node_version warning is removed in 9.0"
)
task.skipTest(
"cluster.desired_nodes/10_basic/Test update desired nodes with node_version generates a warning",
"node_version warning is removed in 9.0"
)
task.skipTest("tsdb/20_mapping/nested fields", "nested field support in tsdb indices is now supported")
task.skipTest("logsdb/10_settings/routing path allowed in logs mode with routing on sort fields", "Unknown feature routing.logsb_route_on_sort_fields")
task.skipTest("indices.create/21_synthetic_source_stored/index param - field ordering", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")
})