mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 17:59:23 -04:00
[ML] Use a new ML endpoint to estimate a model memory (#60376)
* [ML] refactor calculate_model_memory_limit route, use estimateModelMemory endpoint * [ML] refactor validate_model_memory_limit, migrate tests to jest * [ML] fix typing issue * [ML] start estimateModelMemory url with / * [ML] fix typo, filter mlcategory * [ML] extract getCardinalities function * [ML] fields_service.ts * [ML] wip getMaxBucketCardinality * [ML] refactor and comments * [ML] fix aggs keys with special characters, fix integration tests * [ML] use pre-defined job types * [ML] fallback to 0 in case max bucket cardinality receives null * [ML] calculateModelMemoryLimit on influencers change * [ML] fix maxModelMemoryLimit * [ML] cap aggregation to max 1000 buckets * [ML] rename intervalDuration
This commit is contained in:
parent
ae0e35041e
commit
7aa4651292
19 changed files with 817 additions and 632 deletions
|
@ -81,7 +81,7 @@ export interface ModelPlotConfig {
|
|||
|
||||
// TODO, finish this when it's needed
|
||||
export interface CustomRule {
|
||||
actions: any;
|
||||
scope: object;
|
||||
conditions: object;
|
||||
actions: string[];
|
||||
scope?: object;
|
||||
conditions: any[];
|
||||
}
|
||||
|
|
|
@ -88,16 +88,13 @@ export class MultiMetricJobCreator extends JobCreator {
|
|||
|
||||
// called externally to set the model memory limit based current detector configuration
|
||||
public async calculateModelMemoryLimit() {
|
||||
if (this._splitField === null) {
|
||||
// not split field, use the default
|
||||
if (this.jobConfig.analysis_config.detectors.length === 0) {
|
||||
this.modelMemoryLimit = DEFAULT_MODEL_MEMORY_LIMIT;
|
||||
} else {
|
||||
const { modelMemoryLimit } = await ml.calculateModelMemoryLimit({
|
||||
analysisConfig: this.jobConfig.analysis_config,
|
||||
indexPattern: this._indexPatternTitle,
|
||||
splitFieldName: this._splitField.name,
|
||||
query: this._datafeed_config.query,
|
||||
fieldNames: this.fields.map(f => f.id),
|
||||
influencerNames: this._influencers,
|
||||
timeFieldName: this._job_config.data_description.time_field,
|
||||
earliestMs: this._start,
|
||||
latestMs: this._end,
|
||||
|
|
|
@ -22,6 +22,7 @@ import {
|
|||
Datafeed,
|
||||
CombinedJob,
|
||||
Detector,
|
||||
AnalysisConfig,
|
||||
} from '../../../../common/types/anomaly_detection_jobs';
|
||||
import { ES_AGGREGATION } from '../../../../common/constants/aggregation_types';
|
||||
import { FieldRequestConfig } from '../../datavisualizer/index_based/common';
|
||||
|
@ -532,30 +533,24 @@ export const ml = {
|
|||
},
|
||||
|
||||
calculateModelMemoryLimit({
|
||||
analysisConfig,
|
||||
indexPattern,
|
||||
splitFieldName,
|
||||
query,
|
||||
fieldNames,
|
||||
influencerNames,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs,
|
||||
}: {
|
||||
analysisConfig: AnalysisConfig;
|
||||
indexPattern: string;
|
||||
splitFieldName: string;
|
||||
query: any;
|
||||
fieldNames: string[];
|
||||
influencerNames: string[];
|
||||
timeFieldName: string;
|
||||
earliestMs: number;
|
||||
latestMs: number;
|
||||
}) {
|
||||
const body = JSON.stringify({
|
||||
analysisConfig,
|
||||
indexPattern,
|
||||
splitFieldName,
|
||||
query,
|
||||
fieldNames,
|
||||
influencerNames,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs,
|
||||
|
|
|
@ -413,6 +413,14 @@ export const elasticsearchJsPlugin = (Client: any, config: any, components: any)
|
|||
method: 'POST',
|
||||
});
|
||||
|
||||
ml.estimateModelMemory = ca({
|
||||
url: {
|
||||
fmt: '/_ml/anomaly_detectors/_estimate_model_memory',
|
||||
},
|
||||
needBody: true,
|
||||
method: 'POST',
|
||||
});
|
||||
|
||||
ml.datafeedPreview = ca({
|
||||
url: {
|
||||
fmt: '/_ml/datafeeds/<%=datafeedId%>/_preview',
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
import { APICaller } from 'kibana/server';
|
||||
|
||||
export function calculateModelMemoryLimitProvider(
|
||||
callAsCurrentUser: APICaller
|
||||
): (
|
||||
indexPattern: string,
|
||||
splitFieldName: string,
|
||||
query: any,
|
||||
fieldNames: any,
|
||||
influencerNames: any, // string[] ?
|
||||
timeFieldName: string,
|
||||
earliestMs: number,
|
||||
latestMs: number
|
||||
) => Promise<any>;
|
|
@ -1,117 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
// calculates the size of the model memory limit used in the job config
|
||||
// based on the cardinality of the field being used to split the data.
|
||||
// the limit should be 10MB plus 20kB per series, rounded up to the nearest MB.
|
||||
import numeral from '@elastic/numeral';
|
||||
import { fieldsServiceProvider } from '../fields_service';
|
||||
|
||||
export function calculateModelMemoryLimitProvider(callAsCurrentUser) {
|
||||
const fieldsService = fieldsServiceProvider(callAsCurrentUser);
|
||||
|
||||
return function calculateModelMemoryLimit(
|
||||
indexPattern,
|
||||
splitFieldName,
|
||||
query,
|
||||
fieldNames,
|
||||
influencerNames,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs,
|
||||
allowMMLGreaterThanMax = false
|
||||
) {
|
||||
return new Promise((response, reject) => {
|
||||
const limits = {};
|
||||
callAsCurrentUser('ml.info')
|
||||
.then(resp => {
|
||||
if (resp.limits !== undefined && resp.limits.max_model_memory_limit !== undefined) {
|
||||
limits.max_model_memory_limit = resp.limits.max_model_memory_limit;
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
reject(error);
|
||||
});
|
||||
|
||||
// find the cardinality of the split field
|
||||
function splitFieldCardinality() {
|
||||
return fieldsService.getCardinalityOfFields(
|
||||
indexPattern,
|
||||
[splitFieldName],
|
||||
query,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs
|
||||
);
|
||||
}
|
||||
|
||||
// find the cardinality of an influencer field
|
||||
function influencerCardinality(influencerName) {
|
||||
return fieldsService.getCardinalityOfFields(
|
||||
indexPattern,
|
||||
[influencerName],
|
||||
query,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs
|
||||
);
|
||||
}
|
||||
|
||||
const calculations = [
|
||||
splitFieldCardinality(),
|
||||
...influencerNames.map(inf => influencerCardinality(inf)),
|
||||
];
|
||||
|
||||
Promise.all(calculations)
|
||||
.then(responses => {
|
||||
let mmlMB = 0;
|
||||
const MB = 1000;
|
||||
responses.forEach((resp, i) => {
|
||||
let mmlKB = 0;
|
||||
if (i === 0) {
|
||||
// first in the list is the basic calculation.
|
||||
// a base of 10MB plus 64KB per series per detector
|
||||
// i.e. 10000KB + (64KB * cardinality of split field * number or detectors)
|
||||
const cardinality = resp[splitFieldName];
|
||||
mmlKB = 10000;
|
||||
const SERIES_MULTIPLIER = 64;
|
||||
const numberOfFields = fieldNames.length;
|
||||
|
||||
if (cardinality !== undefined) {
|
||||
mmlKB += SERIES_MULTIPLIER * cardinality * numberOfFields;
|
||||
}
|
||||
} else {
|
||||
// the rest of the calculations are for influencers fields
|
||||
// 10KB per series of influencer field
|
||||
// i.e. 10KB * cardinality of influencer field
|
||||
const cardinality = resp[splitFieldName];
|
||||
mmlKB = 0;
|
||||
const SERIES_MULTIPLIER = 10;
|
||||
if (cardinality !== undefined) {
|
||||
mmlKB = SERIES_MULTIPLIER * cardinality;
|
||||
}
|
||||
}
|
||||
// convert the total to MB, rounding up.
|
||||
mmlMB += Math.ceil(mmlKB / MB);
|
||||
});
|
||||
|
||||
// if max_model_memory_limit has been set,
|
||||
// make sure the estimated value is not greater than it.
|
||||
if (allowMMLGreaterThanMax === false && limits.max_model_memory_limit !== undefined) {
|
||||
const maxBytes = numeral(limits.max_model_memory_limit.toUpperCase()).value();
|
||||
const mmlBytes = numeral(`${mmlMB}MB`).value();
|
||||
if (mmlBytes > maxBytes) {
|
||||
mmlMB = Math.floor(maxBytes / numeral('1MB').value());
|
||||
}
|
||||
}
|
||||
response({ modelMemoryLimit: `${mmlMB}MB` });
|
||||
})
|
||||
.catch(error => {
|
||||
reject(error);
|
||||
});
|
||||
});
|
||||
};
|
||||
}
|
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
import numeral from '@elastic/numeral';
|
||||
import { APICaller } from 'kibana/server';
|
||||
import { AnalysisConfig } from '../../../common/types/anomaly_detection_jobs';
|
||||
import { fieldsServiceProvider } from '../fields_service';
|
||||
|
||||
interface ModelMemoryEstimationResult {
|
||||
/**
|
||||
* Result model memory limit
|
||||
*/
|
||||
modelMemoryLimit: string;
|
||||
/**
|
||||
* Estimated model memory by elasticsearch ml endpoint
|
||||
*/
|
||||
estimatedModelMemoryLimit: string;
|
||||
/**
|
||||
* Maximum model memory limit
|
||||
*/
|
||||
maxModelMemoryLimit?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Response of the _estimate_model_memory endpoint.
|
||||
*/
|
||||
export interface ModelMemoryEstimate {
|
||||
model_memory_estimate: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves overall and max bucket cardinalities.
|
||||
*/
|
||||
async function getCardinalities(
|
||||
callAsCurrentUser: APICaller,
|
||||
analysisConfig: AnalysisConfig,
|
||||
indexPattern: string,
|
||||
query: any,
|
||||
timeFieldName: string,
|
||||
earliestMs: number,
|
||||
latestMs: number
|
||||
): Promise<{
|
||||
overallCardinality: { [key: string]: number };
|
||||
maxBucketCardinality: { [key: string]: number };
|
||||
}> {
|
||||
/**
|
||||
* Fields not involved in cardinality check
|
||||
*/
|
||||
const excludedKeywords = new Set<string>(
|
||||
/**
|
||||
* The keyword which is used to mean the output of categorization,
|
||||
* so it will have cardinality zero in the actual input data.
|
||||
*/
|
||||
'mlcategory'
|
||||
);
|
||||
|
||||
const fieldsService = fieldsServiceProvider(callAsCurrentUser);
|
||||
|
||||
const { detectors, influencers, bucket_span: bucketSpan } = analysisConfig;
|
||||
|
||||
let overallCardinality = {};
|
||||
let maxBucketCardinality = {};
|
||||
const overallCardinalityFields: Set<string> = detectors.reduce(
|
||||
(
|
||||
acc,
|
||||
{
|
||||
by_field_name: byFieldName,
|
||||
partition_field_name: partitionFieldName,
|
||||
over_field_name: overFieldName,
|
||||
}
|
||||
) => {
|
||||
[byFieldName, partitionFieldName, overFieldName]
|
||||
.filter(field => field !== undefined && field !== '' && !excludedKeywords.has(field))
|
||||
.forEach(key => {
|
||||
acc.add(key as string);
|
||||
});
|
||||
return acc;
|
||||
},
|
||||
new Set<string>()
|
||||
);
|
||||
|
||||
const maxBucketFieldCardinalities: string[] = influencers.filter(
|
||||
influencerField =>
|
||||
typeof influencerField === 'string' &&
|
||||
!excludedKeywords.has(influencerField) &&
|
||||
!!influencerField &&
|
||||
!overallCardinalityFields.has(influencerField)
|
||||
) as string[];
|
||||
|
||||
if (overallCardinalityFields.size > 0) {
|
||||
overallCardinality = await fieldsService.getCardinalityOfFields(
|
||||
indexPattern,
|
||||
[...overallCardinalityFields],
|
||||
query,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs
|
||||
);
|
||||
}
|
||||
|
||||
if (maxBucketFieldCardinalities.length > 0) {
|
||||
maxBucketCardinality = await fieldsService.getMaxBucketCardinalities(
|
||||
indexPattern,
|
||||
maxBucketFieldCardinalities,
|
||||
query,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs,
|
||||
bucketSpan
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
overallCardinality,
|
||||
maxBucketCardinality,
|
||||
};
|
||||
}
|
||||
|
||||
export function calculateModelMemoryLimitProvider(callAsCurrentUser: APICaller) {
|
||||
/**
|
||||
* Retrieves an estimated size of the model memory limit used in the job config
|
||||
* based on the cardinality of the fields being used to split the data
|
||||
* and influencers.
|
||||
*/
|
||||
return async function calculateModelMemoryLimit(
|
||||
analysisConfig: AnalysisConfig,
|
||||
indexPattern: string,
|
||||
query: any,
|
||||
timeFieldName: string,
|
||||
earliestMs: number,
|
||||
latestMs: number,
|
||||
allowMMLGreaterThanMax = false
|
||||
): Promise<ModelMemoryEstimationResult> {
|
||||
let maxModelMemoryLimit;
|
||||
try {
|
||||
const resp = await callAsCurrentUser('ml.info');
|
||||
if (resp?.limits?.max_model_memory_limit !== undefined) {
|
||||
maxModelMemoryLimit = resp.limits.max_model_memory_limit.toUpperCase();
|
||||
}
|
||||
} catch (e) {
|
||||
throw new Error('Unable to retrieve max model memory limit');
|
||||
}
|
||||
|
||||
const { overallCardinality, maxBucketCardinality } = await getCardinalities(
|
||||
callAsCurrentUser,
|
||||
analysisConfig,
|
||||
indexPattern,
|
||||
query,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs
|
||||
);
|
||||
|
||||
const estimatedModelMemoryLimit = (
|
||||
await callAsCurrentUser<ModelMemoryEstimate>('ml.estimateModelMemory', {
|
||||
body: {
|
||||
analysis_config: analysisConfig,
|
||||
overall_cardinality: overallCardinality,
|
||||
max_bucket_cardinality: maxBucketCardinality,
|
||||
},
|
||||
})
|
||||
).model_memory_estimate.toUpperCase();
|
||||
|
||||
let modelMemoryLimit: string = estimatedModelMemoryLimit;
|
||||
// if max_model_memory_limit has been set,
|
||||
// make sure the estimated value is not greater than it.
|
||||
if (!allowMMLGreaterThanMax && maxModelMemoryLimit !== undefined) {
|
||||
// @ts-ignore
|
||||
const maxBytes = numeral(maxModelMemoryLimit).value();
|
||||
// @ts-ignore
|
||||
const mmlBytes = numeral(estimatedModelMemoryLimit).value();
|
||||
if (mmlBytes > maxBytes) {
|
||||
// @ts-ignore
|
||||
modelMemoryLimit = `${Math.floor(maxBytes / numeral('1MB').value())}MB`;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
estimatedModelMemoryLimit,
|
||||
modelMemoryLimit,
|
||||
...(maxModelMemoryLimit ? { maxModelMemoryLimit } : {}),
|
||||
};
|
||||
};
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
import { APICaller } from 'kibana/server';
|
||||
|
||||
export function fieldsServiceProvider(
|
||||
callAsCurrentUser: APICaller
|
||||
): {
|
||||
getCardinalityOfFields: (
|
||||
index: string[] | string,
|
||||
fieldNames: string[],
|
||||
query: any,
|
||||
timeFieldName: string,
|
||||
earliestMs: number,
|
||||
latestMs: number
|
||||
) => Promise<any>;
|
||||
getTimeFieldRange: (index: string[] | string, timeFieldName: string, query: any) => Promise<any>;
|
||||
};
|
|
@ -1,148 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
// Service for carrying out queries to obtain data
|
||||
// specific to fields in Elasticsearch indices.
|
||||
|
||||
export function fieldsServiceProvider(callAsCurrentUser) {
|
||||
// Obtains the cardinality of one or more fields.
|
||||
// Returns an Object whose keys are the names of the fields,
|
||||
// with values equal to the cardinality of the field.
|
||||
// Any of the supplied fieldNames which are not aggregatable will
|
||||
// be omitted from the returned Object.
|
||||
function getCardinalityOfFields(index, fieldNames, query, timeFieldName, earliestMs, latestMs) {
|
||||
// First check that each of the supplied fieldNames are aggregatable,
|
||||
// then obtain the cardinality for each of the aggregatable fields.
|
||||
return new Promise((resolve, reject) => {
|
||||
callAsCurrentUser('fieldCaps', {
|
||||
index,
|
||||
fields: fieldNames,
|
||||
})
|
||||
.then(fieldCapsResp => {
|
||||
const aggregatableFields = [];
|
||||
fieldNames.forEach(fieldName => {
|
||||
const fieldInfo = fieldCapsResp.fields[fieldName];
|
||||
const typeKeys = fieldInfo !== undefined ? Object.keys(fieldInfo) : [];
|
||||
if (typeKeys.length > 0) {
|
||||
const fieldType = typeKeys[0];
|
||||
const isFieldAggregatable = fieldInfo[fieldType].aggregatable;
|
||||
if (isFieldAggregatable === true) {
|
||||
aggregatableFields.push(fieldName);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (aggregatableFields.length > 0) {
|
||||
// Build the criteria to use in the bool filter part of the request.
|
||||
// Add criteria for the time range and the datafeed config query.
|
||||
const mustCriteria = [
|
||||
{
|
||||
range: {
|
||||
[timeFieldName]: {
|
||||
gte: earliestMs,
|
||||
lte: latestMs,
|
||||
format: 'epoch_millis',
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
if (query) {
|
||||
mustCriteria.push(query);
|
||||
}
|
||||
|
||||
const aggs = aggregatableFields.reduce((obj, field) => {
|
||||
obj[field] = { cardinality: { field } };
|
||||
return obj;
|
||||
}, {});
|
||||
|
||||
const body = {
|
||||
query: {
|
||||
bool: {
|
||||
must: mustCriteria,
|
||||
},
|
||||
},
|
||||
size: 0,
|
||||
_source: {
|
||||
excludes: [],
|
||||
},
|
||||
aggs,
|
||||
};
|
||||
|
||||
callAsCurrentUser('search', {
|
||||
index,
|
||||
body,
|
||||
})
|
||||
.then(resp => {
|
||||
const aggregations = resp.aggregations;
|
||||
if (aggregations !== undefined) {
|
||||
const results = aggregatableFields.reduce((obj, field) => {
|
||||
obj[field] = (aggregations[field] || { value: 0 }).value;
|
||||
return obj;
|
||||
}, {});
|
||||
resolve(results);
|
||||
} else {
|
||||
resolve({});
|
||||
}
|
||||
})
|
||||
.catch(resp => {
|
||||
reject(resp);
|
||||
});
|
||||
} else {
|
||||
// None of the fields are aggregatable. Return empty Object.
|
||||
resolve({});
|
||||
}
|
||||
})
|
||||
.catch(resp => {
|
||||
reject(resp);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function getTimeFieldRange(index, timeFieldName, query) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const obj = { success: true, start: { epoch: 0, string: '' }, end: { epoch: 0, string: '' } };
|
||||
|
||||
callAsCurrentUser('search', {
|
||||
index,
|
||||
size: 0,
|
||||
body: {
|
||||
query,
|
||||
aggs: {
|
||||
earliest: {
|
||||
min: {
|
||||
field: timeFieldName,
|
||||
},
|
||||
},
|
||||
latest: {
|
||||
max: {
|
||||
field: timeFieldName,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
.then(resp => {
|
||||
if (resp.aggregations && resp.aggregations.earliest && resp.aggregations.latest) {
|
||||
obj.start.epoch = resp.aggregations.earliest.value;
|
||||
obj.start.string = resp.aggregations.earliest.value_as_string;
|
||||
|
||||
obj.end.epoch = resp.aggregations.latest.value;
|
||||
obj.end.string = resp.aggregations.latest.value_as_string;
|
||||
}
|
||||
resolve(obj);
|
||||
})
|
||||
.catch(resp => {
|
||||
reject(resp);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
getCardinalityOfFields,
|
||||
getTimeFieldRange,
|
||||
};
|
||||
}
|
296
x-pack/plugins/ml/server/models/fields_service/fields_service.ts
Normal file
296
x-pack/plugins/ml/server/models/fields_service/fields_service.ts
Normal file
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
import Boom from 'boom';
|
||||
import { APICaller } from 'kibana/server';
|
||||
import { parseInterval } from '../../../common/util/parse_interval';
|
||||
|
||||
/**
|
||||
* Service for carrying out queries to obtain data
|
||||
* specific to fields in Elasticsearch indices.
|
||||
*/
|
||||
export function fieldsServiceProvider(callAsCurrentUser: APICaller) {
|
||||
/**
|
||||
* Gets aggregatable fields.
|
||||
*/
|
||||
async function getAggregatableFields(
|
||||
index: string | string[],
|
||||
fieldNames: string[]
|
||||
): Promise<string[]> {
|
||||
const fieldCapsResp = await callAsCurrentUser('fieldCaps', {
|
||||
index,
|
||||
fields: fieldNames,
|
||||
});
|
||||
const aggregatableFields: string[] = [];
|
||||
fieldNames.forEach(fieldName => {
|
||||
const fieldInfo = fieldCapsResp.fields[fieldName];
|
||||
const typeKeys = fieldInfo !== undefined ? Object.keys(fieldInfo) : [];
|
||||
if (typeKeys.length > 0) {
|
||||
const fieldType = typeKeys[0];
|
||||
const isFieldAggregatable = fieldInfo[fieldType].aggregatable;
|
||||
if (isFieldAggregatable === true) {
|
||||
aggregatableFields.push(fieldName);
|
||||
}
|
||||
}
|
||||
});
|
||||
return aggregatableFields;
|
||||
}
|
||||
|
||||
// Obtains the cardinality of one or more fields.
|
||||
// Returns an Object whose keys are the names of the fields,
|
||||
// with values equal to the cardinality of the field.
|
||||
// Any of the supplied fieldNames which are not aggregatable will
|
||||
// be omitted from the returned Object.
|
||||
async function getCardinalityOfFields(
|
||||
index: string[] | string,
|
||||
fieldNames: string[],
|
||||
query: any,
|
||||
timeFieldName: string,
|
||||
earliestMs: number,
|
||||
latestMs: number
|
||||
): Promise<{ [key: string]: number }> {
|
||||
const aggregatableFields = await getAggregatableFields(index, fieldNames);
|
||||
|
||||
if (aggregatableFields.length === 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
// Build the criteria to use in the bool filter part of the request.
|
||||
// Add criteria for the time range and the datafeed config query.
|
||||
const mustCriteria = [
|
||||
{
|
||||
range: {
|
||||
[timeFieldName]: {
|
||||
gte: earliestMs,
|
||||
lte: latestMs,
|
||||
format: 'epoch_millis',
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
if (query) {
|
||||
mustCriteria.push(query);
|
||||
}
|
||||
|
||||
const aggs = aggregatableFields.reduce((obj, field) => {
|
||||
obj[field] = { cardinality: { field } };
|
||||
return obj;
|
||||
}, {} as { [field: string]: { cardinality: { field: string } } });
|
||||
|
||||
const body = {
|
||||
query: {
|
||||
bool: {
|
||||
must: mustCriteria,
|
||||
},
|
||||
},
|
||||
size: 0,
|
||||
_source: {
|
||||
excludes: [],
|
||||
},
|
||||
aggs,
|
||||
};
|
||||
|
||||
const aggregations = (
|
||||
await callAsCurrentUser('search', {
|
||||
index,
|
||||
body,
|
||||
})
|
||||
)?.aggregations;
|
||||
|
||||
if (!aggregations) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return aggregatableFields.reduce((obj, field) => {
|
||||
obj[field] = (aggregations[field] || { value: 0 }).value;
|
||||
return obj;
|
||||
}, {} as { [field: string]: number });
|
||||
}
|
||||
|
||||
function getTimeFieldRange(
|
||||
index: string[] | string,
|
||||
timeFieldName: string,
|
||||
query: any
|
||||
): Promise<any> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const obj = { success: true, start: { epoch: 0, string: '' }, end: { epoch: 0, string: '' } };
|
||||
|
||||
callAsCurrentUser('search', {
|
||||
index,
|
||||
size: 0,
|
||||
body: {
|
||||
query,
|
||||
aggs: {
|
||||
earliest: {
|
||||
min: {
|
||||
field: timeFieldName,
|
||||
},
|
||||
},
|
||||
latest: {
|
||||
max: {
|
||||
field: timeFieldName,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
.then(resp => {
|
||||
if (resp.aggregations && resp.aggregations.earliest && resp.aggregations.latest) {
|
||||
obj.start.epoch = resp.aggregations.earliest.value;
|
||||
obj.start.string = resp.aggregations.earliest.value_as_string;
|
||||
|
||||
obj.end.epoch = resp.aggregations.latest.value;
|
||||
obj.end.string = resp.aggregations.latest.value_as_string;
|
||||
}
|
||||
resolve(obj);
|
||||
})
|
||||
.catch(resp => {
|
||||
reject(resp);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Caps provided time boundaries based on the interval.
|
||||
* @param earliestMs
|
||||
* @param latestMs
|
||||
* @param interval
|
||||
*/
|
||||
function getSafeTimeRange(
|
||||
earliestMs: number,
|
||||
latestMs: number,
|
||||
interval: string
|
||||
): { start: number; end: number } {
|
||||
const maxNumberOfBuckets = 1000;
|
||||
const end = latestMs;
|
||||
|
||||
const intervalDuration = parseInterval(interval);
|
||||
|
||||
if (intervalDuration === null) {
|
||||
throw Boom.badRequest('Interval is invalid');
|
||||
}
|
||||
|
||||
const start = Math.max(
|
||||
earliestMs,
|
||||
latestMs - maxNumberOfBuckets * intervalDuration.asMilliseconds()
|
||||
);
|
||||
|
||||
return { start, end };
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves max cardinalities for provided fields from date interval buckets
|
||||
* using max bucket pipeline aggregation.
|
||||
*
|
||||
* @param index
|
||||
* @param fieldNames - fields to perform cardinality aggregation on
|
||||
* @param query
|
||||
* @param timeFieldName
|
||||
* @param earliestMs
|
||||
* @param latestMs
|
||||
* @param interval - a fixed interval for the date histogram aggregation
|
||||
*/
|
||||
async function getMaxBucketCardinalities(
|
||||
index: string[] | string,
|
||||
fieldNames: string[],
|
||||
query: any,
|
||||
timeFieldName: string,
|
||||
earliestMs: number,
|
||||
latestMs: number,
|
||||
interval: string | undefined
|
||||
): Promise<{ [key: string]: number }> {
|
||||
if (!interval) {
|
||||
throw new Error('Interval is required to retrieve max bucket cardinalities.');
|
||||
}
|
||||
|
||||
const aggregatableFields = await getAggregatableFields(index, fieldNames);
|
||||
|
||||
if (aggregatableFields.length === 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
const { start, end } = getSafeTimeRange(earliestMs, latestMs, interval);
|
||||
|
||||
const mustCriteria = [
|
||||
{
|
||||
range: {
|
||||
[timeFieldName]: {
|
||||
gte: start,
|
||||
lte: end,
|
||||
format: 'epoch_millis',
|
||||
},
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
if (query) {
|
||||
mustCriteria.push(query);
|
||||
}
|
||||
|
||||
const dateHistogramAggKey = 'bucket_span_buckets';
|
||||
/**
|
||||
* Replace any non-word characters
|
||||
*/
|
||||
const getSafeAggName = (field: string) => field.replace(/\W/g, '');
|
||||
const getMaxBucketAggKey = (field: string) => `max_bucket_${field}`;
|
||||
|
||||
const fieldsCardinalityAggs = aggregatableFields.reduce((obj, field) => {
|
||||
obj[getSafeAggName(field)] = { cardinality: { field } };
|
||||
return obj;
|
||||
}, {} as { [field: string]: { cardinality: { field: string } } });
|
||||
|
||||
const maxBucketCardinalitiesAggs = Object.keys(fieldsCardinalityAggs).reduce((acc, field) => {
|
||||
acc[getMaxBucketAggKey(field)] = {
|
||||
max_bucket: {
|
||||
buckets_path: `${dateHistogramAggKey}>${field}`,
|
||||
},
|
||||
};
|
||||
return acc;
|
||||
}, {} as { [key: string]: { max_bucket: { buckets_path: string } } });
|
||||
|
||||
const body = {
|
||||
query: {
|
||||
bool: {
|
||||
filter: mustCriteria,
|
||||
},
|
||||
},
|
||||
size: 0,
|
||||
aggs: {
|
||||
[dateHistogramAggKey]: {
|
||||
date_histogram: {
|
||||
field: timeFieldName,
|
||||
fixed_interval: interval,
|
||||
},
|
||||
aggs: fieldsCardinalityAggs,
|
||||
},
|
||||
...maxBucketCardinalitiesAggs,
|
||||
},
|
||||
};
|
||||
|
||||
const aggregations = (
|
||||
await callAsCurrentUser('search', {
|
||||
index,
|
||||
body,
|
||||
})
|
||||
)?.aggregations;
|
||||
|
||||
if (!aggregations) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return aggregatableFields.reduce((obj, field) => {
|
||||
obj[field] = (aggregations[getMaxBucketAggKey(field)] || { value: 0 }).value ?? 0;
|
||||
return obj;
|
||||
}, {} as { [field: string]: number });
|
||||
}
|
||||
|
||||
return {
|
||||
getCardinalityOfFields,
|
||||
getTimeFieldRange,
|
||||
getMaxBucketCardinalities,
|
||||
};
|
||||
}
|
|
@ -5,8 +5,9 @@
|
|||
*/
|
||||
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
|
||||
|
||||
export function validateJobObject(job) {
|
||||
export function validateJobObject(job: CombinedJob | null) {
|
||||
if (job === null || typeof job !== 'object') {
|
||||
throw new Error(
|
||||
i18n.translate('xpack.ml.models.jobValidation.validateJobObject.jobIsNotObjectErrorMessage', {
|
|
@ -1,170 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
import numeral from '@elastic/numeral';
|
||||
import { validateJobObject } from './validate_job_object';
|
||||
import { calculateModelMemoryLimitProvider } from '../../models/calculate_model_memory_limit';
|
||||
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
|
||||
|
||||
// The minimum value the backend expects is 1MByte
|
||||
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
|
||||
|
||||
export async function validateModelMemoryLimit(callWithRequest, job, duration) {
|
||||
validateJobObject(job);
|
||||
|
||||
// retrieve the max_model_memory_limit value from the server
|
||||
// this will be unset unless the user has set this on their cluster
|
||||
const mlInfo = await callWithRequest('ml.info');
|
||||
const maxModelMemoryLimit =
|
||||
typeof mlInfo.limits === 'undefined' ? undefined : mlInfo.limits.max_model_memory_limit;
|
||||
|
||||
// retrieve the model memory limit specified by the user in the job config.
|
||||
// note, this will probably be the auto generated value, unless the user has
|
||||
// over written it.
|
||||
const mml =
|
||||
typeof job.analysis_limits !== 'undefined' &&
|
||||
typeof job.analysis_limits.model_memory_limit !== 'undefined'
|
||||
? job.analysis_limits.model_memory_limit.toUpperCase()
|
||||
: null;
|
||||
|
||||
const splitFieldNames = {};
|
||||
let splitFieldName = '';
|
||||
const fieldNames = [];
|
||||
let runCalcModelMemoryTest = true;
|
||||
let validModelMemoryLimit = true;
|
||||
|
||||
// extract the field names and partition field names from the detectors
|
||||
// we only want to estimate the mml for multi-metric jobs.
|
||||
// a multi-metric job will have one partition field, one or more field names
|
||||
// and no over or by fields
|
||||
job.analysis_config.detectors.forEach(d => {
|
||||
if (typeof d.field_name !== 'undefined') {
|
||||
fieldNames.push(d.field_name);
|
||||
}
|
||||
|
||||
// create a deduplicated list of partition field names.
|
||||
if (typeof d.partition_field_name !== 'undefined') {
|
||||
splitFieldNames[d.partition_field_name] = null;
|
||||
}
|
||||
|
||||
// if an over or by field is present, do not run the estimate test
|
||||
if (typeof d.over_field_name !== 'undefined' || typeof d.by_field_name !== 'undefined') {
|
||||
runCalcModelMemoryTest = false;
|
||||
}
|
||||
});
|
||||
|
||||
// if there are no or more than one partition fields, do not run the test
|
||||
if (Object.keys(splitFieldNames).length === 1) {
|
||||
splitFieldName = Object.keys(splitFieldNames)[0];
|
||||
} else {
|
||||
runCalcModelMemoryTest = false;
|
||||
}
|
||||
|
||||
// if there is no duration, do not run the estimate test
|
||||
if (
|
||||
typeof duration === 'undefined' ||
|
||||
typeof duration.start === 'undefined' ||
|
||||
typeof duration.end === 'undefined'
|
||||
) {
|
||||
runCalcModelMemoryTest = false;
|
||||
}
|
||||
|
||||
const messages = [];
|
||||
|
||||
// check that mml is a valid data format
|
||||
if (mml !== null) {
|
||||
const mmlSplit = mml.match(/\d+(\w+)/);
|
||||
const unit = mmlSplit && mmlSplit.length === 2 ? mmlSplit[1] : null;
|
||||
|
||||
if (ALLOWED_DATA_UNITS.indexOf(unit) === -1) {
|
||||
messages.push({
|
||||
id: 'mml_value_invalid',
|
||||
mml,
|
||||
});
|
||||
// mml is not a valid data format.
|
||||
// abort all other tests
|
||||
validModelMemoryLimit = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (validModelMemoryLimit) {
|
||||
if (runCalcModelMemoryTest) {
|
||||
const mmlEstimate = await calculateModelMemoryLimitProvider(callWithRequest)(
|
||||
job.datafeed_config.indices.join(','),
|
||||
splitFieldName,
|
||||
job.datafeed_config.query,
|
||||
fieldNames,
|
||||
job.analysis_config.influencers,
|
||||
job.data_description.time_field,
|
||||
duration.start,
|
||||
duration.end,
|
||||
true
|
||||
);
|
||||
const mmlEstimateBytes = numeral(mmlEstimate.modelMemoryLimit).value();
|
||||
|
||||
let runEstimateGreaterThenMml = true;
|
||||
// if max_model_memory_limit has been set,
|
||||
// make sure the estimated value is not greater than it.
|
||||
if (typeof maxModelMemoryLimit !== 'undefined') {
|
||||
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
|
||||
if (mmlEstimateBytes > maxMmlBytes) {
|
||||
runEstimateGreaterThenMml = false;
|
||||
messages.push({
|
||||
id: 'estimated_mml_greater_than_max_mml',
|
||||
maxModelMemoryLimit,
|
||||
mmlEstimate,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// check to see if the estimated mml is greater that the user
|
||||
// specified mml
|
||||
// do not run this if we've already found that it's larger than
|
||||
// the max mml
|
||||
if (runEstimateGreaterThenMml && mml !== null) {
|
||||
const mmlBytes = numeral(mml).value();
|
||||
if (mmlBytes < MODEL_MEMORY_LIMIT_MINIMUM_BYTES) {
|
||||
messages.push({
|
||||
id: 'mml_value_invalid',
|
||||
mml,
|
||||
});
|
||||
} else if (mmlEstimateBytes / 2 > mmlBytes) {
|
||||
messages.push({
|
||||
id: 'half_estimated_mml_greater_than_mml',
|
||||
maxModelMemoryLimit,
|
||||
mml,
|
||||
});
|
||||
} else if (mmlEstimateBytes > mmlBytes) {
|
||||
messages.push({
|
||||
id: 'estimated_mml_greater_than_mml',
|
||||
maxModelMemoryLimit,
|
||||
mml,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if max_model_memory_limit has been set,
|
||||
// make sure the user defined MML is not greater than it
|
||||
if (maxModelMemoryLimit !== undefined && mml !== null) {
|
||||
const maxMmlBytes = numeral(maxModelMemoryLimit.toUpperCase()).value();
|
||||
const mmlBytes = numeral(mml).value();
|
||||
if (mmlBytes > maxMmlBytes) {
|
||||
messages.push({
|
||||
id: 'mml_greater_than_max_mml',
|
||||
maxModelMemoryLimit,
|
||||
mml,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (messages.length === 0 && runCalcModelMemoryTest === true) {
|
||||
messages.push({ id: 'success_mml' });
|
||||
}
|
||||
|
||||
return Promise.resolve(messages);
|
||||
}
|
|
@ -4,8 +4,10 @@
|
|||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
import expect from '@kbn/expect';
|
||||
import { validateModelMemoryLimit } from '../validate_model_memory_limit';
|
||||
import { APICaller } from 'kibana/server';
|
||||
import { CombinedJob, Detector } from '../../../common/types/anomaly_detection_jobs';
|
||||
import { ModelMemoryEstimate } from '../calculate_model_memory_limit/calculate_model_memory_limit';
|
||||
import { validateModelMemoryLimit } from './validate_model_memory_limit';
|
||||
|
||||
describe('ML - validateModelMemoryLimit', () => {
|
||||
// mock info endpoint response
|
||||
|
@ -61,29 +63,43 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
},
|
||||
};
|
||||
|
||||
// mock estimate model memory
|
||||
const modelMemoryEstimateResponse: ModelMemoryEstimate = {
|
||||
model_memory_estimate: '40mb',
|
||||
};
|
||||
|
||||
interface MockAPICallResponse {
|
||||
'ml.estimateModelMemory'?: ModelMemoryEstimate;
|
||||
}
|
||||
|
||||
// mock callWithRequest
|
||||
// used in three places:
|
||||
// - to retrieve the info endpoint
|
||||
// - to search for cardinality of split field
|
||||
// - to retrieve field capabilities used in search for split field cardinality
|
||||
function callWithRequest(call) {
|
||||
if (typeof call === undefined) {
|
||||
return Promise.reject();
|
||||
}
|
||||
const getMockCallWithRequest = ({
|
||||
'ml.estimateModelMemory': estimateModelMemory,
|
||||
}: MockAPICallResponse = {}) =>
|
||||
((call: string) => {
|
||||
if (typeof call === undefined) {
|
||||
return Promise.reject();
|
||||
}
|
||||
|
||||
let response = {};
|
||||
if (call === 'ml.info') {
|
||||
response = mlInfoResponse;
|
||||
} else if (call === 'search') {
|
||||
response = cardinalitySearchResponse;
|
||||
} else if (call === 'fieldCaps') {
|
||||
response = fieldCapsResponse;
|
||||
}
|
||||
return Promise.resolve(response);
|
||||
}
|
||||
let response = {};
|
||||
if (call === 'ml.info') {
|
||||
response = mlInfoResponse;
|
||||
} else if (call === 'search') {
|
||||
response = cardinalitySearchResponse;
|
||||
} else if (call === 'fieldCaps') {
|
||||
response = fieldCapsResponse;
|
||||
} else if (call === 'ml.estimateModelMemory') {
|
||||
response = estimateModelMemory || modelMemoryEstimateResponse;
|
||||
}
|
||||
return Promise.resolve(response);
|
||||
}) as APICaller;
|
||||
|
||||
function getJobConfig(influencers = [], detectors = []) {
|
||||
return {
|
||||
function getJobConfig(influencers: string[] = [], detectors: Detector[] = []) {
|
||||
return ({
|
||||
analysis_config: { detectors, influencers },
|
||||
data_description: { time_field: '@timestamp' },
|
||||
datafeed_config: {
|
||||
|
@ -92,11 +108,11 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
analysis_limits: {
|
||||
model_memory_limit: '20mb',
|
||||
},
|
||||
};
|
||||
} as unknown) as CombinedJob;
|
||||
}
|
||||
|
||||
// create a specified number of mock detectors
|
||||
function createDetectors(numberOfDetectors) {
|
||||
function createDetectors(numberOfDetectors: number): Detector[] {
|
||||
const dtrs = [];
|
||||
for (let i = 0; i < numberOfDetectors; i++) {
|
||||
dtrs.push({
|
||||
|
@ -105,28 +121,28 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
partition_field_name: 'instance',
|
||||
});
|
||||
}
|
||||
return dtrs;
|
||||
return dtrs as Detector[];
|
||||
}
|
||||
|
||||
// tests
|
||||
it('Called with no duration or split and mml within limit', () => {
|
||||
const job = getJobConfig();
|
||||
const duration = undefined;
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql([]);
|
||||
expect(ids).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
it('Called with no duration or split and mml above limit', () => {
|
||||
const job = getJobConfig();
|
||||
const duration = undefined;
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '31mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['mml_greater_than_max_mml']);
|
||||
expect(ids).toEqual(['mml_greater_than_max_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -134,11 +150,16 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(10);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '20mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(
|
||||
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '66mb' } }),
|
||||
job,
|
||||
duration
|
||||
).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['estimated_mml_greater_than_max_mml']);
|
||||
expect(ids).toEqual(['estimated_mml_greater_than_max_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -146,11 +167,16 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(2);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '30mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(
|
||||
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '24mb' } }),
|
||||
job,
|
||||
duration
|
||||
).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['success_mml']);
|
||||
expect(ids).toEqual(['success_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -158,11 +184,16 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(2);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '10mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(
|
||||
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '22mb' } }),
|
||||
job,
|
||||
duration
|
||||
).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
|
||||
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -171,11 +202,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
delete mlInfoResponse.limits.max_model_memory_limit;
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '10mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
|
||||
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -183,11 +215,16 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '20mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(
|
||||
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '19mb' } }),
|
||||
job,
|
||||
duration
|
||||
).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['success_mml']);
|
||||
expect(ids).toEqual(['success_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -195,11 +232,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '0mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['mml_value_invalid']);
|
||||
expect(ids).toEqual(['mml_value_invalid']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -207,11 +245,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '10mbananas';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['mml_value_invalid']);
|
||||
expect(ids).toEqual(['mml_value_invalid']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -219,11 +258,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '10';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['mml_value_invalid']);
|
||||
expect(ids).toEqual(['mml_value_invalid']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -231,11 +271,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = 'mb';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['mml_value_invalid']);
|
||||
expect(ids).toEqual(['mml_value_invalid']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -243,11 +284,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = 'asdf';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['mml_value_invalid']);
|
||||
expect(ids).toEqual(['mml_value_invalid']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -255,11 +297,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '1023KB';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['mml_value_invalid']);
|
||||
expect(ids).toEqual(['mml_value_invalid']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -267,11 +310,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '1024KB';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
|
||||
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -279,11 +323,12 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '6MB';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(getMockCallWithRequest(), job, duration).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['half_estimated_mml_greater_than_mml']);
|
||||
expect(ids).toEqual(['half_estimated_mml_greater_than_mml']);
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -291,11 +336,16 @@ describe('ML - validateModelMemoryLimit', () => {
|
|||
const dtrs = createDetectors(1);
|
||||
const job = getJobConfig(['instance'], dtrs);
|
||||
const duration = { start: 0, end: 1 };
|
||||
// @ts-ignore
|
||||
job.analysis_limits.model_memory_limit = '20MB';
|
||||
|
||||
return validateModelMemoryLimit(callWithRequest, job, duration).then(messages => {
|
||||
return validateModelMemoryLimit(
|
||||
getMockCallWithRequest({ 'ml.estimateModelMemory': { model_memory_estimate: '20mb' } }),
|
||||
job,
|
||||
duration
|
||||
).then(messages => {
|
||||
const ids = messages.map(m => m.id);
|
||||
expect(ids).to.eql(['success_mml']);
|
||||
expect(ids).toEqual(['success_mml']);
|
||||
});
|
||||
});
|
||||
});
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License;
|
||||
* you may not use this file except in compliance with the Elastic License.
|
||||
*/
|
||||
|
||||
import numeral from '@elastic/numeral';
|
||||
import { APICaller } from 'kibana/server';
|
||||
import { CombinedJob } from '../../../common/types/anomaly_detection_jobs';
|
||||
import { validateJobObject } from './validate_job_object';
|
||||
import { calculateModelMemoryLimitProvider } from '../calculate_model_memory_limit';
|
||||
import { ALLOWED_DATA_UNITS } from '../../../common/constants/validation';
|
||||
|
||||
// The minimum value the backend expects is 1MByte
|
||||
const MODEL_MEMORY_LIMIT_MINIMUM_BYTES = 1048576;
|
||||
|
||||
export async function validateModelMemoryLimit(
|
||||
callWithRequest: APICaller,
|
||||
job: CombinedJob,
|
||||
duration?: { start?: number; end?: number }
|
||||
) {
|
||||
validateJobObject(job);
|
||||
|
||||
// retrieve the model memory limit specified by the user in the job config.
|
||||
// note, this will probably be the auto generated value, unless the user has
|
||||
// over written it.
|
||||
const mml = job?.analysis_limits?.model_memory_limit?.toUpperCase() ?? null;
|
||||
|
||||
const messages = [];
|
||||
|
||||
// check that mml is a valid data format
|
||||
if (mml !== null) {
|
||||
const mmlSplit = mml.match(/\d+(\w+)/);
|
||||
const unit = mmlSplit && mmlSplit.length === 2 ? mmlSplit[1] : null;
|
||||
|
||||
if (unit === null || !ALLOWED_DATA_UNITS.includes(unit)) {
|
||||
messages.push({
|
||||
id: 'mml_value_invalid',
|
||||
mml,
|
||||
});
|
||||
// mml is not a valid data format.
|
||||
// abort all other tests
|
||||
return messages;
|
||||
}
|
||||
}
|
||||
|
||||
// if there is no duration, do not run the estimate test
|
||||
const runCalcModelMemoryTest =
|
||||
duration && typeof duration?.start !== undefined && duration?.end !== undefined;
|
||||
|
||||
// retrieve the max_model_memory_limit value from the server
|
||||
// this will be unset unless the user has set this on their cluster
|
||||
const maxModelMemoryLimit: string | undefined = (
|
||||
await callWithRequest('ml.info')
|
||||
)?.limits?.max_model_memory_limit?.toUpperCase();
|
||||
|
||||
if (runCalcModelMemoryTest) {
|
||||
const { modelMemoryLimit } = await calculateModelMemoryLimitProvider(callWithRequest)(
|
||||
job.analysis_config,
|
||||
job.datafeed_config.indices.join(','),
|
||||
job.datafeed_config.query,
|
||||
job.data_description.time_field,
|
||||
duration!.start as number,
|
||||
duration!.end as number,
|
||||
true
|
||||
);
|
||||
// @ts-ignore
|
||||
const mmlEstimateBytes: number = numeral(modelMemoryLimit).value();
|
||||
|
||||
let runEstimateGreaterThenMml = true;
|
||||
// if max_model_memory_limit has been set,
|
||||
// make sure the estimated value is not greater than it.
|
||||
if (typeof maxModelMemoryLimit !== 'undefined') {
|
||||
// @ts-ignore
|
||||
const maxMmlBytes: number = numeral(maxModelMemoryLimit).value();
|
||||
if (mmlEstimateBytes > maxMmlBytes) {
|
||||
runEstimateGreaterThenMml = false;
|
||||
messages.push({
|
||||
id: 'estimated_mml_greater_than_max_mml',
|
||||
maxModelMemoryLimit,
|
||||
modelMemoryLimit,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// check to see if the estimated mml is greater that the user
|
||||
// specified mml
|
||||
// do not run this if we've already found that it's larger than
|
||||
// the max mml
|
||||
if (runEstimateGreaterThenMml && mml !== null) {
|
||||
// @ts-ignore
|
||||
const mmlBytes: number = numeral(mml).value();
|
||||
if (mmlBytes < MODEL_MEMORY_LIMIT_MINIMUM_BYTES) {
|
||||
messages.push({
|
||||
id: 'mml_value_invalid',
|
||||
mml,
|
||||
});
|
||||
} else if (mmlEstimateBytes / 2 > mmlBytes) {
|
||||
messages.push({
|
||||
id: 'half_estimated_mml_greater_than_mml',
|
||||
maxModelMemoryLimit,
|
||||
mml,
|
||||
});
|
||||
} else if (mmlEstimateBytes > mmlBytes) {
|
||||
messages.push({
|
||||
id: 'estimated_mml_greater_than_mml',
|
||||
maxModelMemoryLimit,
|
||||
mml,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// if max_model_memory_limit has been set,
|
||||
// make sure the user defined MML is not greater than it
|
||||
if (maxModelMemoryLimit !== undefined && mml !== null) {
|
||||
// @ts-ignore
|
||||
const maxMmlBytes = numeral(maxModelMemoryLimit).value();
|
||||
// @ts-ignore
|
||||
const mmlBytes = numeral(mml).value();
|
||||
if (mmlBytes > maxMmlBytes) {
|
||||
messages.push({
|
||||
id: 'mml_greater_than_max_mml',
|
||||
maxModelMemoryLimit,
|
||||
mml,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (messages.length === 0 && runCalcModelMemoryTest === true) {
|
||||
messages.push({ id: 'success_mml' });
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
|
@ -148,7 +148,7 @@ export function jobRoutes({ router, mlLicense }: RouteInitialization) {
|
|||
params: schema.object({
|
||||
jobId: schema.string(),
|
||||
}),
|
||||
body: schema.object({ ...anomalyDetectionJobSchema }),
|
||||
body: schema.object(anomalyDetectionJobSchema),
|
||||
},
|
||||
},
|
||||
mlLicense.fullLicenseAPIGuard(async (context, request, response) => {
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
import Boom from 'boom';
|
||||
import { RequestHandlerContext } from 'kibana/server';
|
||||
import { schema, TypeOf } from '@kbn/config-schema';
|
||||
import { AnalysisConfig } from '../../common/types/anomaly_detection_jobs';
|
||||
import { wrapError } from '../client/error_wrapper';
|
||||
import { RouteInitialization } from '../types';
|
||||
import {
|
||||
|
@ -29,23 +30,12 @@ export function jobValidationRoutes({ router, mlLicense }: RouteInitialization,
|
|||
context: RequestHandlerContext,
|
||||
payload: CalculateModelMemoryLimitPayload
|
||||
) {
|
||||
const {
|
||||
indexPattern,
|
||||
splitFieldName,
|
||||
query,
|
||||
fieldNames,
|
||||
influencerNames,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs,
|
||||
} = payload;
|
||||
const { analysisConfig, indexPattern, query, timeFieldName, earliestMs, latestMs } = payload;
|
||||
|
||||
return calculateModelMemoryLimitProvider(context.ml!.mlClient.callAsCurrentUser)(
|
||||
analysisConfig as AnalysisConfig,
|
||||
indexPattern,
|
||||
splitFieldName,
|
||||
query,
|
||||
fieldNames,
|
||||
influencerNames,
|
||||
timeFieldName,
|
||||
earliestMs,
|
||||
latestMs
|
||||
|
@ -102,7 +92,7 @@ export function jobValidationRoutes({ router, mlLicense }: RouteInitialization,
|
|||
*
|
||||
* @api {post} /api/ml/validate/calculate_model_memory_limit Calculates model memory limit
|
||||
* @apiName CalculateModelMemoryLimit
|
||||
* @apiDescription Calculates the model memory limit
|
||||
* @apiDescription Calls _estimate_model_memory endpoint to retrieve model memory estimation.
|
||||
*
|
||||
* @apiSuccess {String} modelMemoryLimit
|
||||
*/
|
||||
|
|
|
@ -63,14 +63,16 @@ export const anomalyDetectionUpdateJobSchema = {
|
|||
groups: schema.maybe(schema.arrayOf(schema.maybe(schema.string()))),
|
||||
};
|
||||
|
||||
export const analysisConfigSchema = schema.object({
|
||||
bucket_span: schema.maybe(schema.string()),
|
||||
summary_count_field_name: schema.maybe(schema.string()),
|
||||
detectors: schema.arrayOf(detectorSchema),
|
||||
influencers: schema.arrayOf(schema.maybe(schema.string())),
|
||||
categorization_field_name: schema.maybe(schema.string()),
|
||||
});
|
||||
|
||||
export const anomalyDetectionJobSchema = {
|
||||
analysis_config: schema.object({
|
||||
bucket_span: schema.maybe(schema.string()),
|
||||
summary_count_field_name: schema.maybe(schema.string()),
|
||||
detectors: schema.arrayOf(detectorSchema),
|
||||
influencers: schema.arrayOf(schema.maybe(schema.string())),
|
||||
categorization_field_name: schema.maybe(schema.string()),
|
||||
}),
|
||||
analysis_config: analysisConfigSchema,
|
||||
analysis_limits: schema.maybe(
|
||||
schema.object({
|
||||
categorization_examples_limit: schema.maybe(schema.number()),
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
*/
|
||||
|
||||
import { schema } from '@kbn/config-schema';
|
||||
import { anomalyDetectionJobSchema } from './anomaly_detectors_schema';
|
||||
import { analysisConfigSchema, anomalyDetectionJobSchema } from './anomaly_detectors_schema';
|
||||
import { datafeedConfigSchema } from './datafeeds_schema';
|
||||
|
||||
export const estimateBucketSpanSchema = schema.object({
|
||||
|
@ -20,11 +20,9 @@ export const estimateBucketSpanSchema = schema.object({
|
|||
});
|
||||
|
||||
export const modelMemoryLimitSchema = schema.object({
|
||||
analysisConfig: analysisConfigSchema,
|
||||
indexPattern: schema.string(),
|
||||
splitFieldName: schema.string(),
|
||||
query: schema.any(),
|
||||
fieldNames: schema.arrayOf(schema.string()),
|
||||
influencerNames: schema.arrayOf(schema.maybe(schema.string())),
|
||||
timeFieldName: schema.string(),
|
||||
earliestMs: schema.number(),
|
||||
latestMs: schema.number(),
|
||||
|
|
|
@ -21,14 +21,20 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
|
||||
const testDataList = [
|
||||
{
|
||||
testTitleSuffix: 'with 0 metrics, 0 influencers and no split field',
|
||||
testTitleSuffix: 'when no partition field is provided with regular function',
|
||||
user: USER.ML_POWERUSER,
|
||||
requestBody: {
|
||||
indexPattern: 'ecommerce',
|
||||
splitFieldName: '',
|
||||
analysisConfig: {
|
||||
bucket_span: '15m',
|
||||
detectors: [
|
||||
{
|
||||
function: 'mean',
|
||||
},
|
||||
],
|
||||
influencers: [],
|
||||
},
|
||||
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
|
||||
fieldNames: ['__ml_event_rate_count__'],
|
||||
influencerNames: [],
|
||||
timeFieldName: 'order_date',
|
||||
earliestMs: 1560297859000,
|
||||
latestMs: 1562975136000,
|
||||
|
@ -38,7 +44,8 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
responseBody: {
|
||||
statusCode: 400,
|
||||
error: 'Bad Request',
|
||||
message: "[illegal_argument_exception] specified fields can't be null or empty",
|
||||
message:
|
||||
'[status_exception] Unless a count or temporal function is used one of field_name, by_field_name or over_field_name must be set',
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -47,72 +54,79 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
user: USER.ML_POWERUSER,
|
||||
requestBody: {
|
||||
indexPattern: 'ecommerce',
|
||||
splitFieldName: 'geoip.city_name',
|
||||
analysisConfig: {
|
||||
bucket_span: '15m',
|
||||
detectors: [
|
||||
{
|
||||
function: 'avg',
|
||||
field_name: 'geoip.city_name',
|
||||
by_field_name: 'geoip.city_name',
|
||||
},
|
||||
],
|
||||
influencers: ['geoip.city_name'],
|
||||
},
|
||||
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
|
||||
fieldNames: ['products.base_price'],
|
||||
influencerNames: ['geoip.city_name'],
|
||||
timeFieldName: 'order_date',
|
||||
earliestMs: 1560297859000,
|
||||
latestMs: 1562975136000,
|
||||
},
|
||||
expected: {
|
||||
responseCode: 200,
|
||||
responseBody: { modelMemoryLimit: '12MB' },
|
||||
responseBody: { modelMemoryLimit: '11MB', estimatedModelMemoryLimit: '11MB' },
|
||||
},
|
||||
},
|
||||
{
|
||||
testTitleSuffix: 'with 3 metrics, 3 influencers, split by city',
|
||||
testTitleSuffix: 'with 3 influencers, split by city',
|
||||
user: USER.ML_POWERUSER,
|
||||
requestBody: {
|
||||
indexPattern: 'ecommerce',
|
||||
splitFieldName: 'geoip.city_name',
|
||||
analysisConfig: {
|
||||
bucket_span: '15m',
|
||||
detectors: [
|
||||
{
|
||||
function: 'mean',
|
||||
by_field_name: 'geoip.city_name',
|
||||
field_name: 'geoip.city_name',
|
||||
},
|
||||
],
|
||||
influencers: ['geoip.city_name', 'customer_gender', 'customer_full_name.keyword'],
|
||||
},
|
||||
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
|
||||
fieldNames: ['products.base_price', 'taxful_total_price', 'products.discount_amount'],
|
||||
influencerNames: ['geoip.city_name', 'customer_gender', 'customer_full_name.keyword'],
|
||||
timeFieldName: 'order_date',
|
||||
earliestMs: 1560297859000,
|
||||
latestMs: 1562975136000,
|
||||
},
|
||||
expected: {
|
||||
responseCode: 200,
|
||||
responseBody: { modelMemoryLimit: '14MB' },
|
||||
responseBody: { estimatedModelMemoryLimit: '11MB', modelMemoryLimit: '11MB' },
|
||||
},
|
||||
},
|
||||
{
|
||||
testTitleSuffix: 'with 4 metrics, 4 influencers, split by customer_id',
|
||||
testTitleSuffix: '4 influencers, split by customer_id and filtering by country code',
|
||||
user: USER.ML_POWERUSER,
|
||||
requestBody: {
|
||||
indexPattern: 'ecommerce',
|
||||
splitFieldName: 'customer_id',
|
||||
query: { bool: { must: [{ match_all: {} }], filter: [], must_not: [] } },
|
||||
fieldNames: [
|
||||
'geoip.country_iso_code',
|
||||
'taxless_total_price',
|
||||
'taxful_total_price',
|
||||
'products.discount_amount',
|
||||
],
|
||||
influencerNames: [
|
||||
'customer_id',
|
||||
'geoip.country_iso_code',
|
||||
'products.discount_percentage',
|
||||
'products.discount_amount',
|
||||
],
|
||||
timeFieldName: 'order_date',
|
||||
earliestMs: 1560297859000,
|
||||
latestMs: 1562975136000,
|
||||
},
|
||||
expected: {
|
||||
responseCode: 200,
|
||||
responseBody: { modelMemoryLimit: '23MB' },
|
||||
},
|
||||
},
|
||||
{
|
||||
testTitleSuffix:
|
||||
'with 4 metrics, 4 influencers, split by customer_id and filtering by country code',
|
||||
user: USER.ML_POWERUSER,
|
||||
requestBody: {
|
||||
indexPattern: 'ecommerce',
|
||||
splitFieldName: 'customer_id',
|
||||
analysisConfig: {
|
||||
bucket_span: '2d',
|
||||
detectors: [
|
||||
{
|
||||
function: 'mean',
|
||||
by_field_name: 'customer_id.city_name',
|
||||
field_name: 'customer_id.city_name',
|
||||
},
|
||||
{
|
||||
function: 'avg',
|
||||
by_field_name: 'manufacturer.keyword',
|
||||
field_name: 'manufacturer.keyword',
|
||||
},
|
||||
],
|
||||
influencers: [
|
||||
'geoip.country_iso_code',
|
||||
'products.discount_percentage',
|
||||
'products.discount_amount',
|
||||
'day_of_week',
|
||||
],
|
||||
},
|
||||
query: {
|
||||
bool: {
|
||||
filter: {
|
||||
|
@ -122,25 +136,13 @@ export default ({ getService }: FtrProviderContext) => {
|
|||
},
|
||||
},
|
||||
},
|
||||
fieldNames: [
|
||||
'geoip.country_iso_code',
|
||||
'taxless_total_price',
|
||||
'taxful_total_price',
|
||||
'products.discount_amount',
|
||||
],
|
||||
influencerNames: [
|
||||
'customer_id',
|
||||
'geoip.country_iso_code',
|
||||
'products.discount_percentage',
|
||||
'products.discount_amount',
|
||||
],
|
||||
timeFieldName: 'order_date',
|
||||
earliestMs: 1560297859000,
|
||||
latestMs: 1562975136000,
|
||||
},
|
||||
expected: {
|
||||
responseCode: 200,
|
||||
responseBody: { modelMemoryLimit: '14MB' },
|
||||
responseBody: { estimatedModelMemoryLimit: '12MB', modelMemoryLimit: '12MB' },
|
||||
},
|
||||
},
|
||||
];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue