Non existing synonyms sets do not fail shard recovery (#125659)

This commit is contained in:
Carlos Delgado 2025-03-27 17:04:20 +01:00 committed by GitHub
parent 6a9d765408
commit 968bddc462
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 199 additions and 35 deletions

View file

@ -0,0 +1,6 @@
pr: 125659
summary: Non existing synonyms sets do not fail shard recovery for indices
area: "Analysis"
type: bug
issues:
- 125603

View file

@ -72,7 +72,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
);
} else {
reader = new ReaderWithOrigin(
Analysis.getReaderFromIndex(synonymsSet, factory.synonymsManagementAPIService),
Analysis.getReaderFromIndex(synonymsSet, factory.synonymsManagementAPIService, factory.lenient),
"[" + synonymsSet + "] synonyms_set in .synonyms index",
synonymsSet
);

View file

@ -70,9 +70,6 @@ excludeList.add('aggregations/percentiles_hdr_metric/Negative values test')
// sync_id is removed in 9.0
excludeList.add("cat.shards/10_basic/Help")
// Can't work until auto-expand replicas is 0-1 for synonyms index
excludeList.add("synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set")
def clusterPath = getPath()
buildParams.bwcVersions.withWireCompatible { bwcVersion, baseName ->

View file

@ -88,4 +88,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("cluster.info/30_info_thread_pool/Cluster HTTP Info", "The search_throttled thread pool has been removed")
task.skipTest("synonyms/80_synonyms_from_index/Fail loading synonyms from index if synonyms_set doesn't exist", "Synonyms do no longer fail if the synonyms_set doesn't exist")
})

View file

@ -313,3 +313,159 @@ setup:
indices.stats: { index: test_index }
- length: { indices: 0 }
---
"Load index with non existent synonyms set":
- requires:
cluster_features: [ index.synonyms_set_lenient_on_non_existing ]
reason: "requires synonyms_set_lenient_on_non_existing bug fix"
- do:
indices.create:
index: test_index
body:
settings:
index:
number_of_shards: 1
number_of_replicas: 0
analysis:
filter:
my_synonym_filter:
type: synonym
synonyms_set: set1
updateable: true
analyzer:
my_analyzer:
type: custom
tokenizer: whitespace
filter: [ lowercase, my_synonym_filter ]
mappings:
properties:
my_field:
type: text
search_analyzer: my_analyzer
- match: { acknowledged: true }
- match: { shards_acknowledged: true }
- do:
indices.stats: { index: test_index }
- match: { indices.test_index.health: "green" }
# Synonyms are not applied
- do:
indices.analyze:
index: test_index
body:
analyzer: my_analyzer
text: foo
- length: { tokens: 1 }
- match: { tokens.0.token: foo }
# Create synonyms set and check synonyms are applied
- do:
synonyms.put_synonym:
id: set1
body:
synonyms_set:
synonyms: "foo => bar, baz"
# This is to ensure that all index shards (write and read) are available. In serverless this can take some time.
- do:
cluster.health:
index: .synonyms
wait_for_status: green
- do:
indices.stats: { index: test_index }
- match: { indices.test_index.health: "green" }
# Synonyms are applied
- do:
indices.analyze:
index: test_index
body:
analyzer: my_analyzer
text: foo
- length: { tokens: 2 }
---
"Load index with non existent synonyms set and lenient set to false":
- requires:
test_runner_features: [ allowed_warnings ]
- do:
indices.create:
index: test_index
body:
settings:
index:
number_of_shards: 1
number_of_replicas: 0
analysis:
filter:
my_synonym_filter:
type: synonym
synonyms_set: set1
updateable: true
lenient: false
analyzer:
my_analyzer:
type: custom
tokenizer: whitespace
filter: [ lowercase, my_synonym_filter ]
mappings:
properties:
my_field:
type: text
search_analyzer: my_analyzer
- match: { acknowledged: true }
- match: { shards_acknowledged: false }
- do:
indices.stats: { index: test_index }
- length: { indices: 0 }
# Create synonyms set and check synonyms are applied
- do:
synonyms.put_synonym:
id: set1
body:
synonyms_set:
synonyms: "foo => bar, baz"
# This is to ensure that all index shards (write and read) are available. In serverless this can take some time.
- do:
cluster.health:
index: .synonyms
wait_for_status: green
- do:
# Warning issued in previous versions
allowed_warnings:
- "The [state] field in the response to the reroute API is deprecated and will be removed in a future version. Specify ?metric=none to adopt the future behaviour."
cluster.reroute:
retry_failed: true
- do:
cluster.health:
index: test_index
wait_for_status: green
# Synonyms are applied
- do:
indices.analyze:
index: test_index
body:
analyzer: my_analyzer
text: foo
- length: { tokens: 2 }

View file

@ -165,34 +165,6 @@ setup:
query: hola
- match: { hits.total.value: 1 }
---
"Fail loading synonyms from index if synonyms_set doesn't exist":
- do:
indices.create:
index: another_index
body:
settings:
index:
number_of_shards: 1
analysis:
filter:
my_synonym_filter:
type: synonym
synonyms_set: set_missing
updateable: true
analyzer:
my_analyzer:
type: custom
tokenizer: standard
filter: [ lowercase, my_synonym_filter ]
mappings:
properties:
my_field:
type: text
search_analyzer: my_analyzer
- match: { acknowledged: true }
- match: { shards_acknowledged: false }
---
"Load empty synonyms set from index for an analyzer":
- do:

View file

@ -23,8 +23,10 @@ public class IndexFeatures implements FeatureSpecification {
public static final NodeFeature LOGSDB_NO_HOST_NAME_FIELD = new NodeFeature("index.logsdb_no_host_name_field");
private static final NodeFeature SYNONYMS_SET_LENIENT_ON_NON_EXISTING = new NodeFeature("index.synonyms_set_lenient_on_non_existing");
@Override
public Set<NodeFeature> getTestFeatures() {
return Set.of(LOGSDB_NO_HOST_NAME_FIELD);
return Set.of(LOGSDB_NO_HOST_NAME_FIELD, SYNONYMS_SET_LENIENT_ON_NON_EXISTING);
}
}

View file

@ -48,6 +48,7 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.apache.lucene.analysis.util.CSVUtil;
import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
@ -353,10 +354,39 @@ public class Analysis {
}
}
public static Reader getReaderFromIndex(String synonymsSet, SynonymsManagementAPIService synonymsManagementAPIService) {
public static Reader getReaderFromIndex(
String synonymsSet,
SynonymsManagementAPIService synonymsManagementAPIService,
boolean ignoreMissing
) {
final PlainActionFuture<PagedResult<SynonymRule>> synonymsLoadingFuture = new PlainActionFuture<>();
synonymsManagementAPIService.getSynonymSetRules(synonymsSet, synonymsLoadingFuture);
PagedResult<SynonymRule> results = synonymsLoadingFuture.actionGet();
PagedResult<SynonymRule> results;
try {
results = synonymsLoadingFuture.actionGet();
} catch (Exception e) {
if (ignoreMissing == false) {
throw e;
}
boolean notFound = e instanceof ResourceNotFoundException;
String message = String.format(
Locale.ROOT,
"Synonyms set %s %s. Synonyms will not be applied to search results on indices that use this synonym set",
synonymsSet,
notFound ? "not found" : "could not be loaded"
);
if (notFound) {
logger.warn(message);
} else {
logger.error(message, e);
}
results = new PagedResult<>(0, new SynonymRule[0]);
}
SynonymRule[] synonymRules = results.pageResults();
StringBuilder sb = new StringBuilder();