[ML] Fix NPE in trained model assignment updater (#108942)

The NPE causes the rebalance to fail, later retires would have corrected 
the situation
This commit is contained in:
David Kyle 2024-05-24 11:40:30 +01:00 committed by GitHub
parent e88de19843
commit 103633aa7b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 20 additions and 8 deletions

View file

@ -0,0 +1,5 @@
pr: 108942
summary: Fix NPE in trained model assignment updater
area: Machine Learning
type: bug
issues: []

View file

@ -1047,15 +1047,22 @@ public class TrainedModelAssignmentClusterService implements ClusterStateListene
if (event.changedCustomMetadataSet().contains(PersistentTasksCustomMetadata.TYPE) == false) {
return Optional.empty();
}
final PersistentTasksCustomMetadata previousPersistentTasks = event.previousState()
.getMetadata()
.custom(PersistentTasksCustomMetadata.TYPE);
final PersistentTasksCustomMetadata currentPersistentTasks = event.state().getMetadata().custom(PersistentTasksCustomMetadata.TYPE);
Set<String> previousMlTaskIds = findMlProcessTaskIds(previousPersistentTasks);
PersistentTasksCustomMetadata previousPersistentTasks = PersistentTasksCustomMetadata.getPersistentTasksCustomMetadata(
event.previousState()
);
if (previousPersistentTasks == null) { // no previous jobs so nothing has stopped
return Optional.empty();
}
PersistentTasksCustomMetadata currentPersistentTasks = PersistentTasksCustomMetadata.getPersistentTasksCustomMetadata(
event.state()
);
Set<String> currentMlTaskIds = findMlProcessTaskIds(currentPersistentTasks);
Set<String> stoppedTaskTypes = previousMlTaskIds.stream()
.filter(id -> currentMlTaskIds.contains(id) == false) // remove the tasks that are still present. Stopped Ids only.
.map(previousPersistentTasks::getTask)
Set<PersistentTasksCustomMetadata.PersistentTask<?>> previousMlTasks = MlTasks.findMlProcessTasks(previousPersistentTasks);
Set<String> stoppedTaskTypes = previousMlTasks.stream()
.filter(task -> currentMlTaskIds.contains(task.getId()) == false) // remove the tasks that are still present. Stopped Ids only.
.map(PersistentTasksCustomMetadata.PersistentTask::getTaskName)
.map(MlTasks::prettyPrintTaskName)
.collect(Collectors.toSet());