[ML] Inference API rate limit queuing logic refactor (#107706)

* Adding new executor

* Adding in queuing logic

* working tests

* Added cleanup task

* Update docs/changelog/107706.yaml

* Updating yml

* deregistering callbacks for settings changes

* Cleaning up code

* Update docs/changelog/107706.yaml

* Fixing rate limit settings bug and only sleeping least amount

* Removing debug logging

* Removing commented code

* Renaming feedback

* fixing tests

* Updating docs and validation

* Fixing source blocks

* Adjusting cancel logic

* Reformatting ascii

* Addressing feedback

* adding rate limiting for google embeddings and mistral

---------

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Jonathan Buttner 2024-06-05 08:25:25 -04:00 committed by GitHub
parent cd84749d87
commit fdb5058b13
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
102 changed files with 1499 additions and 937 deletions

View file

@ -88,6 +88,13 @@ public class TimeValue implements Comparable<TimeValue> {
return new TimeValue(days, TimeUnit.DAYS);
}
/**
* @return the {@link TimeValue} object that has the least duration.
*/
public static TimeValue min(TimeValue time1, TimeValue time2) {
return time1.compareTo(time2) < 0 ? time1 : time2;
}
/**
* @return the unit used for the this time value, see {@link #duration()}
*/

View file

@ -17,6 +17,7 @@ import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.lessThan;
import static org.hamcrest.object.HasToString.hasToString;
@ -231,6 +232,12 @@ public class TimeValueTests extends ESTestCase {
assertThat(ex.getMessage(), containsString("duration cannot be negative"));
}
public void testMin() {
assertThat(TimeValue.min(TimeValue.ZERO, TimeValue.timeValueNanos(1)), is(TimeValue.timeValueNanos(0)));
assertThat(TimeValue.min(TimeValue.MAX_VALUE, TimeValue.timeValueNanos(1)), is(TimeValue.timeValueNanos(1)));
assertThat(TimeValue.min(TimeValue.MINUS_ONE, TimeValue.timeValueHours(1)), is(TimeValue.MINUS_ONE));
}
private TimeUnit randomTimeUnitObject() {
return randomFrom(
TimeUnit.NANOSECONDS,