Skip to content

[ML] autoscaling context current capacity could be null, this commit handles that #74822

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class AutoscalingIT extends MlNativeAutodetectIntegTestCase {

// This test assumes that xpack.ml.max_machine_memory_percent is 30
// and that xpack.ml.use_auto_machine_memory_percent is false
public void testMLAutoscalingCapacity() {
public void testMLAutoscalingCapacity() throws Exception {
SortedMap<String, Settings> deciders = new TreeMap<>();
deciders.put(MlAutoscalingDeciderService.NAME,
Settings.builder().put(MlAutoscalingDeciderService.DOWN_SCALE_DELAY.getKey(), TimeValue.ZERO).build());
Expand All @@ -57,14 +57,15 @@ public void testMLAutoscalingCapacity() {
);
assertAcked(client().execute(PutAutoscalingPolicyAction.INSTANCE, request).actionGet());

assertMlCapacity(
assertBusy(() -> assertMlCapacity(
client().execute(
GetAutoscalingCapacityAction.INSTANCE,
new GetAutoscalingCapacityAction.Request()
).actionGet(),
"Requesting scale down as tier and/or node size could be smaller",
0L,
0L);
0L)
);

putJob("job1", 100);
putJob("job2", 200);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -534,9 +534,13 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
// Due to weird rounding errors, it may be that a scale down result COULD cause a scale up
// Ensuring the scaleDown here forces the scale down result to always be lower than the current capacity.
// This is safe as we know that ALL jobs are assigned at the current capacity
.map(result -> new AutoscalingDeciderResult(
ensureScaleDown(result.requiredCapacity(), context.currentCapacity()), result.reason()
));
.map(result -> {
AutoscalingCapacity capacity = ensureScaleDown(result.requiredCapacity(), context.currentCapacity());
if (capacity == null) {
return null;
}
return new AutoscalingDeciderResult(capacity, result.reason());
});

if (maybeScaleDown.isPresent()) {
final AutoscalingDeciderResult scaleDownDecisionResult = maybeScaleDown.get();
Expand Down Expand Up @@ -599,6 +603,9 @@ public AutoscalingDeciderResult scale(Settings configuration, AutoscalingDecider
}

static AutoscalingCapacity ensureScaleDown(AutoscalingCapacity scaleDownResult, AutoscalingCapacity currentCapacity) {
if (scaleDownResult == null || currentCapacity == null) {
return null;
}
AutoscalingCapacity newCapacity = new AutoscalingCapacity(
new AutoscalingCapacity.AutoscalingResources(
currentCapacity.total().storage(),
Expand Down