[ML] Replace all use of date_histogram interval with fixed_interval (#76876)

* [ML] Replace all use of date_histogram interval with fixed_interval

* [ML] Fix data visualizer API test
This commit is contained in:
Pete Harverson 2020-09-09 14:49:22 +01:00 committed by GitHub
parent bb0b8f80a4
commit a0495090c0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 99 additions and 74 deletions

View file

@ -69,9 +69,7 @@
"datafeed_id": "datafeed-farequote",
"job_id": "farequote",
"query_delay": "115823ms",
"indices": [
"farequote"
],
"indices": ["farequote"],
"query": {
"bool": {
"must": [
@ -103,7 +101,7 @@
"buckets": {
"date_histogram": {
"field": "@timestamp",
"interval": 900000,
"fixed_interval": "15m",
"offset": 0,
"order": {
"_key": "asc"

View file

@ -80,7 +80,7 @@ export class DataLoader {
earliest: number | undefined,
latest: number | undefined,
fields: FieldRequestConfig[],
interval?: string
interval?: number
): Promise<any[]> {
const stats = await ml.getVisualizerFieldStats({
indexPatternTitle: this._indexPatternTitle,

View file

@ -348,7 +348,7 @@ export const Page: FC = () => {
earliest,
latest,
existMetricFields,
aggInterval.expression
aggInterval.asMilliseconds()
);
// Add the metric stats to the existing stats in the corresponding config.

View file

@ -111,7 +111,7 @@ export const anomalyDataChange = function (
// Query 1 - load the raw metric data.
function getMetricData(config, range) {
const { jobId, detectorIndex, entityFields, interval } = config;
const { jobId, detectorIndex, entityFields, bucketSpanSeconds } = config;
const job = mlJobService.getJob(jobId);
@ -122,14 +122,14 @@ export const anomalyDataChange = function (
return mlResultsService
.getMetricData(
config.datafeedConfig.indices,
config.entityFields,
entityFields,
datafeedQuery,
config.metricFunction,
config.metricFieldName,
config.timeField,
range.min,
range.max,
config.interval
bucketSpanSeconds * 1000
)
.toPromise();
} else {
@ -175,7 +175,14 @@ export const anomalyDataChange = function (
};
return mlResultsService
.getModelPlotOutput(jobId, detectorIndex, criteriaFields, range.min, range.max, interval)
.getModelPlotOutput(
jobId,
detectorIndex,
criteriaFields,
range.min,
range.max,
bucketSpanSeconds * 1000
)
.toPromise()
.then((resp) => {
// Return data in format required by the explorer charts.
@ -218,7 +225,7 @@ export const anomalyDataChange = function (
[config.jobId],
range.min,
range.max,
config.interval,
config.bucketSpanSeconds * 1000,
1,
MAX_SCHEDULED_EVENTS
)
@ -252,7 +259,7 @@ export const anomalyDataChange = function (
config.timeField,
range.min,
range.max,
config.interval
config.bucketSpanSeconds * 1000
);
}

View file

@ -161,7 +161,7 @@ export class ResultsLoader {
[],
this._lastModelTimeStamp,
this._jobCreator.end,
`${this._chartInterval.getInterval().asMilliseconds()}ms`,
this._chartInterval.getInterval().asMilliseconds(),
agg.mlModelPlotAgg
)
.toPromise();
@ -211,7 +211,7 @@ export class ResultsLoader {
[this._jobCreator.jobId],
this._jobCreator.start,
this._jobCreator.end,
`${this._chartInterval.getInterval().asMilliseconds()}ms`,
this._chartInterval.getInterval().asMilliseconds(),
1
);
@ -233,7 +233,7 @@ export class ResultsLoader {
this._jobCreator.jobId,
this._jobCreator.start,
this._jobCreator.end,
`${this._chartInterval.getInterval().asMilliseconds()}ms`,
this._chartInterval.getInterval().asMilliseconds(),
this._detectorSplitFieldFilters
);

View file

@ -32,7 +32,7 @@ export function getScoresByRecord(
jobId: string,
earliestMs: number,
latestMs: number,
interval: string,
intervalMs: number,
firstSplitField: SplitFieldWithValue | null
): Promise<ProcessedResults> {
return new Promise((resolve, reject) => {
@ -104,7 +104,7 @@ export function getScoresByRecord(
byTime: {
date_histogram: {
field: 'timestamp',
interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 1,
extended_bounds: {
min: earliestMs,

View file

@ -180,7 +180,7 @@ export class AnomalyTimelineService {
// Pass the interval in seconds as the swim lane relies on a fixed number of seconds between buckets
// which wouldn't be the case if e.g. '1M' was used.
const interval = `${swimlaneBucketInterval.asSeconds()}s`;
const intervalMs = swimlaneBucketInterval.asMilliseconds();
let response;
if (viewBySwimlaneFieldName === VIEW_BY_JOB_LABEL) {
@ -190,7 +190,7 @@ export class AnomalyTimelineService {
jobIds,
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
interval,
intervalMs,
perPage,
fromPage
);
@ -201,7 +201,7 @@ export class AnomalyTimelineService {
fieldValues,
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
interval,
intervalMs,
swimlaneLimit,
perPage,
fromPage,
@ -269,7 +269,7 @@ export class AnomalyTimelineService {
selectedJobIds,
earliestMs,
latestMs,
this.getSwimlaneBucketInterval(selectedJobs, swimlaneContainerWidth).asSeconds() + 's',
this.getSwimlaneBucketInterval(selectedJobs, swimlaneContainerWidth).asMilliseconds(),
swimlaneLimit
);
return Object.keys(resp.results);

View file

@ -25,7 +25,7 @@ export const mlForecastService: {
entityFields: any[],
earliestMs: number,
latestMs: number,
interval: string,
intervalMs: number,
aggType: any
) => Observable<ForecastData>;

View file

@ -153,7 +153,7 @@ function getForecastData(
entityFields,
earliestMs,
latestMs,
interval,
intervalMs,
aggType
) {
// Extract the partition, by, over fields on which to filter.
@ -257,7 +257,7 @@ function getForecastData(
times: {
date_histogram: {
field: 'timestamp',
interval: interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 1,
},
aggs: {

View file

@ -485,7 +485,7 @@ export function mlApiServicesProvider(httpService: HttpService) {
earliest?: number;
latest?: number;
samplerShardSize?: number;
interval?: string;
interval?: number;
fields?: FieldRequestConfig[];
maxExamples?: number;
}) {

View file

@ -70,7 +70,7 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) {
timeFieldName: string,
earliestMs: number,
latestMs: number,
interval: string
intervalMs: number
): Observable<MetricData> {
// Build the criteria to use in the bool filter part of the request.
// Add criteria for the time range, entity fields,
@ -136,7 +136,7 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) {
byTime: {
date_histogram: {
field: timeFieldName,
interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 0,
},
},
@ -202,7 +202,7 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) {
criteriaFields: any[],
earliestMs: number,
latestMs: number,
interval: string,
intervalMs: number,
aggType?: { min: any; max: any }
): Observable<ModelPlotOutput> {
const obj: ModelPlotOutput = {
@ -291,7 +291,7 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) {
times: {
date_histogram: {
field: 'timestamp',
interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 0,
},
aggs: {
@ -446,7 +446,7 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) {
jobIds: string[] | undefined,
earliestMs: number,
latestMs: number,
interval: string,
intervalMs: number,
maxJobs: number,
maxEvents: number
): Observable<ScheduledEventsByBucket> {
@ -518,7 +518,7 @@ export function resultsServiceRxProvider(mlApiServices: MlApiServices) {
times: {
date_histogram: {
field: 'timestamp',
interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 1,
},
aggs: {

View file

@ -13,7 +13,7 @@ export function resultsServiceProvider(
jobIds: string[],
earliestMs: number,
latestMs: number,
interval: string | number,
intervalMs: number,
perPage?: number,
fromPage?: number
): Promise<any>;
@ -41,7 +41,7 @@ export function resultsServiceProvider(
influencerFieldValues: string[],
earliestMs: number,
latestMs: number,
interval: string,
intervalMs: number,
maxResults: number,
perPage: number,
fromPage: number,
@ -57,8 +57,25 @@ export function resultsServiceProvider(
timeFieldName: string,
earliestMs: number,
latestMs: number,
interval: string | number
intervalMs: number
): Promise<any>;
getEventDistributionData(
index: string,
splitField: string,
filterField: string,
query: any,
metricFunction: string, // ES aggregation name
metricFieldName: string,
timeFieldName: string,
earliestMs: number,
latestMs: number,
intervalMs: number
): Promise<any>;
getRecordMaxScoreByTime(
jobId: string,
criteriaFields: any[],
earliestMs: number,
latestMs: number,
intervalMs: number
): Promise<any>;
getEventDistributionData(): Promise<any>;
getRecordMaxScoreByTime(): Promise<any>;
};

View file

@ -28,7 +28,7 @@ export function resultsServiceProvider(mlApiServices) {
// Pass an empty array or ['*'] to search over all job IDs.
// Returned response contains a results property, with a key for job
// which has results for the specified time range.
getScoresByBucket(jobIds, earliestMs, latestMs, interval, perPage = 10, fromPage = 1) {
getScoresByBucket(jobIds, earliestMs, latestMs, intervalMs, perPage = 10, fromPage = 1) {
return new Promise((resolve, reject) => {
const obj = {
success: true,
@ -116,7 +116,7 @@ export function resultsServiceProvider(mlApiServices) {
byTime: {
date_histogram: {
field: 'timestamp',
interval: interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 1,
extended_bounds: {
min: earliestMs,
@ -492,7 +492,7 @@ export function resultsServiceProvider(mlApiServices) {
influencerFieldValues,
earliestMs,
latestMs,
interval,
intervalMs,
maxResults = ANOMALY_SWIM_LANE_HARD_LIMIT,
perPage = SWIM_LANE_DEFAULT_PAGE_SIZE,
fromPage = 1,
@ -615,7 +615,7 @@ export function resultsServiceProvider(mlApiServices) {
byTime: {
date_histogram: {
field: 'timestamp',
interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 1,
},
aggs: {
@ -1033,7 +1033,7 @@ export function resultsServiceProvider(mlApiServices) {
// Extra query object can be supplied, or pass null if no additional query.
// Returned response contains a results property, which is an object
// of document counts against time (epoch millis).
getEventRateData(index, query, timeFieldName, earliestMs, latestMs, interval) {
getEventRateData(index, query, timeFieldName, earliestMs, latestMs, intervalMs) {
return new Promise((resolve, reject) => {
const obj = { success: true, results: {} };
@ -1074,7 +1074,7 @@ export function resultsServiceProvider(mlApiServices) {
eventRate: {
date_histogram: {
field: timeFieldName,
interval: interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 0,
extended_bounds: {
min: earliestMs,
@ -1118,7 +1118,7 @@ export function resultsServiceProvider(mlApiServices) {
timeFieldName,
earliestMs,
latestMs,
interval
intervalMs
) {
return new Promise((resolve, reject) => {
if (splitField === undefined) {
@ -1187,7 +1187,7 @@ export function resultsServiceProvider(mlApiServices) {
byTime: {
date_histogram: {
field: timeFieldName,
interval: interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: AGGREGATION_MIN_DOC_COUNT,
},
aggs: {
@ -1277,7 +1277,7 @@ export function resultsServiceProvider(mlApiServices) {
// criteria, time range, and aggregation interval.
// criteriaFields parameter must be an array, with each object in the array having 'fieldName'
// 'fieldValue' properties.
getRecordMaxScoreByTime(jobId, criteriaFields, earliestMs, latestMs, interval) {
getRecordMaxScoreByTime(jobId, criteriaFields, earliestMs, latestMs, intervalMs) {
return new Promise((resolve, reject) => {
const obj = {
success: true,
@ -1331,7 +1331,7 @@ export function resultsServiceProvider(mlApiServices) {
times: {
date_histogram: {
field: 'timestamp',
interval: interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 1,
},
aggs: {

View file

@ -29,7 +29,7 @@ function getMetricData(
entityFields: EntityField[],
earliestMs: number,
latestMs: number,
interval: string
intervalMs: number
): Observable<ModelPlotOutput> {
if (
isModelPlotChartableForDetector(job, detectorIndex) &&
@ -76,7 +76,7 @@ function getMetricData(
criteriaFields,
earliestMs,
latestMs,
interval
intervalMs
);
} else {
const obj: ModelPlotOutput = {
@ -96,7 +96,7 @@ function getMetricData(
chartConfig.timeField,
earliestMs,
latestMs,
interval
intervalMs
)
.pipe(
map((resp) => {

View file

@ -629,7 +629,7 @@ export class TimeSeriesExplorer extends React.Component {
nonBlankEntities,
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
stateUpdate.contextAggregationInterval.expression
stateUpdate.contextAggregationInterval.asMilliseconds()
)
.toPromise()
.then((resp) => {
@ -652,7 +652,7 @@ export class TimeSeriesExplorer extends React.Component {
this.getCriteriaFields(detectorIndex, entityControls),
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
stateUpdate.contextAggregationInterval.expression
stateUpdate.contextAggregationInterval.asMilliseconds()
)
.then((resp) => {
const fullRangeRecordScoreData = processRecordScoreResults(resp.results);
@ -703,7 +703,7 @@ export class TimeSeriesExplorer extends React.Component {
nonBlankEntities,
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
stateUpdate.contextAggregationInterval.expression,
stateUpdate.contextAggregationInterval.asMilliseconds(),
aggType
)
.toPromise()

View file

@ -61,7 +61,7 @@ export function getFocusData(
nonBlankEntities,
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
focusAggregationInterval.expression
focusAggregationInterval.asMilliseconds()
),
// Query 2 - load all the records across selected time range for the chart anomaly markers.
mlResultsService.getRecordsForCriteria(
@ -77,7 +77,7 @@ export function getFocusData(
[selectedJob.job_id],
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
focusAggregationInterval.expression,
focusAggregationInterval.asMilliseconds(),
1,
MAX_SCHEDULED_EVENTS
),
@ -123,7 +123,7 @@ export function getFocusData(
nonBlankEntities,
searchBounds.min.valueOf(),
searchBounds.max.valueOf(),
focusAggregationInterval.expression,
focusAggregationInterval.asMilliseconds(),
aggType
);
})()

View file

@ -56,7 +56,7 @@ export function polledDataCheckerFactory({ asCurrentUser }) {
date_histogram: {
min_doc_count: 1,
field: this.timeField,
interval: `${intervalMs}ms`,
fixed_interval: `${intervalMs}ms`,
},
},
},

View file

@ -166,7 +166,7 @@ export function singleSeriesCheckerFactory({ asCurrentUser }) {
non_empty_buckets: {
date_histogram: {
field: this.timeField,
interval: `${intervalMs}ms`,
fixed_interval: `${intervalMs}ms`,
},
},
},

View file

@ -10,7 +10,7 @@
"buckets": {
"date_histogram": {
"field": "timestamp",
"interval": 3600000
"fixed_interval": "1h"
},
"aggregations": {
"timestamp": {

View file

@ -468,7 +468,7 @@ export class DataVisualizer {
timeFieldName: string,
earliestMs: number,
latestMs: number,
interval: number,
intervalMs: number,
maxExamples: number
): Promise<BatchStats[]> {
// Batch up fields by type, getting stats for multiple fields at a time.
@ -526,7 +526,7 @@ export class DataVisualizer {
timeFieldName,
earliestMs,
latestMs,
interval
intervalMs
);
batchStats.push(stats);
}
@ -710,7 +710,7 @@ export class DataVisualizer {
timeFieldName: string,
earliestMs: number,
latestMs: number,
interval: number
intervalMs: number
): Promise<DocumentCountStats> {
const index = indexPatternTitle;
const size = 0;
@ -718,11 +718,12 @@ export class DataVisualizer {
// Don't use the sampler aggregation as this can lead to some potentially
// confusing date histogram results depending on the date range of data amongst shards.
const aggs = {
eventRate: {
date_histogram: {
field: timeFieldName,
interval,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 1,
},
},
@ -756,7 +757,7 @@ export class DataVisualizer {
return {
documentCounts: {
interval,
interval: intervalMs,
buckets,
},
};

View file

@ -114,7 +114,7 @@ function getSearchJsonFromConfig(
times: {
date_histogram: {
field: timeField,
interval: intervalMs,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 0,
extended_bounds: {
min: start,

View file

@ -142,7 +142,7 @@ function getPopulationSearchJsonFromConfig(
times: {
date_histogram: {
field: timeField,
interval: intervalMs,
fixed_interval: `${intervalMs}ms`,
min_doc_count: 0,
extended_bounds: {
min: start,

View file

@ -20,7 +20,7 @@
"type": "index-pattern",
"id": "be14ceb0-66b1-11e9-91c9-ffa52374d341",
"attributes": {
"typeMeta": "{\"params\":{\"rollup_index\":\"cloud_roll_index\"},\"aggs\":{\"histogram\":{\"NetworkOut\":{\"agg\":\"histogram\",\"interval\":5},\"CPUUtilization\":{\"agg\":\"histogram\",\"interval\":5},\"NetworkIn\":{\"agg\":\"histogram\",\"interval\":5}},\"avg\":{\"NetworkOut\":{\"agg\":\"avg\"},\"CPUUtilization\":{\"agg\":\"avg\"},\"NetworkIn\":{\"agg\":\"avg\"},\"DiskReadBytes\":{\"agg\":\"avg\"}},\"min\":{\"NetworkOut\":{\"agg\":\"min\"},\"NetworkIn\":{\"agg\":\"min\"}},\"value_count\":{\"NetworkOut\":{\"agg\":\"value_count\"},\"DiskReadBytes\":{\"agg\":\"value_count\"},\"CPUUtilization\":{\"agg\":\"value_count\"},\"NetworkIn\":{\"agg\":\"value_count\"}},\"max\":{\"CPUUtilization\":{\"agg\":\"max\"},\"DiskReadBytes\":{\"agg\":\"max\"}},\"date_histogram\":{\"@timestamp\":{\"agg\":\"date_histogram\",\"delay\":\"1d\",\"interval\":\"5m\",\"time_zone\":\"UTC\"}},\"terms\":{\"instance\":{\"agg\":\"terms\"},\"sourcetype.keyword\":{\"agg\":\"terms\"},\"region\":{\"agg\":\"terms\"}},\"sum\":{\"DiskReadBytes\":{\"agg\":\"sum\"},\"NetworkOut\":{\"agg\":\"sum\"}}}}",
"typeMeta": "{\"params\":{\"rollup_index\":\"cloud_roll_index\"},\"aggs\":{\"histogram\":{\"NetworkOut\":{\"agg\":\"histogram\",\"interval\":5},\"CPUUtilization\":{\"agg\":\"histogram\",\"interval\":5},\"NetworkIn\":{\"agg\":\"histogram\",\"interval\":5}},\"avg\":{\"NetworkOut\":{\"agg\":\"avg\"},\"CPUUtilization\":{\"agg\":\"avg\"},\"NetworkIn\":{\"agg\":\"avg\"},\"DiskReadBytes\":{\"agg\":\"avg\"}},\"min\":{\"NetworkOut\":{\"agg\":\"min\"},\"NetworkIn\":{\"agg\":\"min\"}},\"value_count\":{\"NetworkOut\":{\"agg\":\"value_count\"},\"DiskReadBytes\":{\"agg\":\"value_count\"},\"CPUUtilization\":{\"agg\":\"value_count\"},\"NetworkIn\":{\"agg\":\"value_count\"}},\"max\":{\"CPUUtilization\":{\"agg\":\"max\"},\"DiskReadBytes\":{\"agg\":\"max\"}},\"date_histogram\":{\"@timestamp\":{\"agg\":\"date_histogram\",\"delay\":\"1d\",\"fixed_interval\":\"5m\",\"time_zone\":\"UTC\"}},\"terms\":{\"instance\":{\"agg\":\"terms\"},\"sourcetype.keyword\":{\"agg\":\"terms\"},\"region\":{\"agg\":\"terms\"}},\"sum\":{\"DiskReadBytes\":{\"agg\":\"sum\"},\"NetworkOut\":{\"agg\":\"sum\"}}}}",
"title": "cloud_roll_index",
"type": "rollup"
},

View file

@ -37,7 +37,7 @@
{
"agg": "date_histogram",
"delay": "1d",
"interval": "5m",
"fixed_interval": "5m",
"time_zone": "UTC"
}
],
@ -123,7 +123,7 @@
{
"agg": "date_histogram",
"delay": "1d",
"interval": "5m",
"fixed_interval": "5m",
"time_zone": "UTC"
}
],
@ -174,7 +174,7 @@
{
"agg": "date_histogram",
"delay": "1d",
"interval": "5m",
"fixed_interval": "5m",
"time_zone": "UTC"
}
]

View file

@ -20,6 +20,7 @@
"DataVisualizer",
"GetOverallStats",
"GetStatsForFields",
"GetHistogramsForFields",
"AnomalyDetectors",
"CreateAnomalyDetectors",

View file

@ -84,7 +84,7 @@ export function dataVisualizerRoutes({ router, mlLicense }: RouteInitialization)
/**
* @apiGroup DataVisualizer
*
* @api {post} /api/ml/data_visualizer/get_field_stats/:indexPatternTitle Get histograms for fields
* @api {post} /api/ml/data_visualizer/get_field_histograms/:indexPatternTitle Get histograms for fields
* @apiName GetHistogramsForFields
* @apiDescription Returns the histograms on a list fields in the specified index pattern.
*

View file

@ -32,8 +32,8 @@ export const dataVisualizerFieldStatsSchema = schema.object({
earliest: schema.maybe(schema.number()),
/** Latest timestamp for search, as epoch ms (optional). */
latest: schema.maybe(schema.number()),
/** Aggregation interval to use for obtaining document counts over time (optional). */
interval: schema.maybe(schema.string()),
/** Aggregation interval, in milliseconds, to use for obtaining document counts over time (optional). */
interval: schema.maybe(schema.number()),
/** Maximum number of examples to return for text type fields. */
maxExamples: schema.number(),
});

View file

@ -33,7 +33,7 @@ export default ({ getService }: FtrProviderContext) => {
],
samplerShardSize: -1, // No sampling, as otherwise counts could vary on each run.
timeFieldName: '@timestamp',
interval: '1d',
interval: 86400000,
maxExamples: 10,
},
expected: {
@ -41,7 +41,7 @@ export default ({ getService }: FtrProviderContext) => {
responseBody: [
{
documentCounts: {
interval: '1d',
interval: 86400000,
buckets: {
'1454803200000': 846,
'1454889600000': 846,
@ -145,6 +145,7 @@ export default ({ getService }: FtrProviderContext) => {
],
samplerShardSize: -1, // No sampling, as otherwise counts could vary on each run.
timeFieldName: '@timestamp',
interval: 86400000,
maxExamples: 10,
},
expected: {