[ML] Consolidate query_utils into package @kbn/ml-query-utils (#149224)

Consolidates more `query_utils` into package `@kbn/ml-query-utils`.
This commit is contained in:
Walter Rafelsberger 2023-01-24 13:07:43 +01:00 committed by GitHub
parent 83d799a9c7
commit 05d04ce1e0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
24 changed files with 127 additions and 203 deletions

View file

@ -1,3 +1,3 @@
# @kbn/ml-query-utils
Query utilities.
Query utilities to be used in UIs maintained by the @elastic/ml-ui team.

View file

@ -6,3 +6,5 @@
*/
export { addExcludeFrozenToQuery } from './src/add_exclude_frozen_to_query';
export { buildBaseFilterCriteria } from './src/build_base_filter_criteria';
export { getSafeAggregationName } from './src/get_safe_aggregation_name';

View file

@ -7,7 +7,7 @@
import { addExcludeFrozenToQuery } from './add_exclude_frozen_to_query';
describe('Util: addExcludeFrozenToQuery()', () => {
describe('addExcludeFrozenToQuery', () => {
test('Validation checks.', () => {
expect(
addExcludeFrozenToQuery({

View file

@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { buildBaseFilterCriteria } from './build_base_filter_criteria';
describe('buildBaseFilterCriteria', () => {
const earliestMs = 1483228800000; // 1 Jan 2017 00:00:00
const latestMs = 1485907199000; // 31 Jan 2017 23:59:59
const query = {
query_string: {
query: 'region:sa-east-1',
analyze_wildcard: true,
default_field: '*',
},
};
test('returns correct criteria for time range', () => {
expect(buildBaseFilterCriteria('timestamp', earliestMs, latestMs)).toEqual([
{
range: {
timestamp: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
},
]);
});
test('returns correct criteria for time range and query', () => {
expect(buildBaseFilterCriteria('timestamp', earliestMs, latestMs, query)).toEqual([
{
range: {
timestamp: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
},
query,
]);
});
});

View file

@ -8,12 +8,16 @@
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { Query } from '@kbn/es-query';
/*
* Contains utility functions for building and processing queries.
/**
* Builds the base filter criteria used in queries,
* adding criteria for the time range and an optional query.
*
* @param timeFieldName - optional time field name of the data view
* @param earliestMs - optional earliest timestamp of the selected time range
* @param latestMs - optional latest timestamp of the selected time range
* @param query - optional query
* @returns filter criteria
*/
// Builds the base filter criteria used in queries,
// adding criteria for the time range and an optional query.
export function buildBaseFilterCriteria(
timeFieldName?: string,
earliestMs?: number,
@ -21,6 +25,7 @@ export function buildBaseFilterCriteria(
query?: Query['query']
): estypes.QueryDslQueryContainer[] {
const filterCriteria = [];
if (timeFieldName && earliestMs && latestMs) {
filterCriteria.push({
range: {
@ -39,11 +44,3 @@ export function buildBaseFilterCriteria(
return filterCriteria;
}
// Returns a name which is safe to use in elasticsearch aggregations for the supplied
// field name. Aggregation names must be alpha-numeric and can only contain '_' and '-' characters,
// so if the supplied field names contains disallowed characters, the provided index
// identifier is used to return a safe 'dummy' name in the format 'field_index' e.g. field_0, field_1
export function getSafeAggregationName(fieldName: string, index: number): string {
return fieldName.match(/^[a-zA-Z0-9-_.]+$/) ? fieldName : `field_${index}`;
}

View file

@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { getSafeAggregationName } from './get_safe_aggregation_name';
describe('getSafeAggregationName', () => {
test('"foo" should be "foo"', () => {
expect(getSafeAggregationName('foo', 0)).toBe('foo');
});
test('"foo.bar" should be "foo.bar"', () => {
expect(getSafeAggregationName('foo.bar', 0)).toBe('foo.bar');
});
test('"foo&bar" should be "field_0"', () => {
expect(getSafeAggregationName('foo&bar', 0)).toBe('field_0');
});
});

View file

@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
/**
* Returns a name which is safe to use in elasticsearch aggregations for the supplied
* field name. Aggregation names must be alpha-numeric and can only contain '_' and '-' characters,
* so if the supplied field names contains disallowed characters, the provided index
* identifier is used to return a safe 'dummy' name in the format 'field_index' e.g. field_0, field_1
*
* @param fieldName - the field name to check
* @param index - the index number to be used for the safe aggregation name
* @returns safe aggregation name
*/
export function getSafeAggregationName(fieldName: string, index: number): string {
return fieldName.match(/^[a-zA-Z0-9-_.]+$/) ? fieldName : `field_${index}`;
}

View file

@ -17,5 +17,6 @@
],
"kbn_references": [
"@kbn/ml-is-populated-object",
"@kbn/es-query",
]
}

View file

@ -9,7 +9,7 @@ import type { ChangePoint } from '@kbn/ml-agg-utils';
import type { GroupTableItem } from '../../components/spike_analysis_table/types';
import { buildBaseFilterCriteria } from './query_utils';
import { buildExtendedBaseFilterCriteria } from './build_extended_base_filter_criteria';
const selectedChangePointMock: ChangePoint = {
doc_count: 53408,
@ -41,9 +41,9 @@ const selectedGroupMock: GroupTableItem = {
};
describe('query_utils', () => {
describe('buildBaseFilterCriteria', () => {
describe('buildExtendedBaseFilterCriteria', () => {
it('returns range filter based on minimum supplied arguments', () => {
const baseFilterCriteria = buildBaseFilterCriteria('the-time-field-name', 1234, 5678);
const baseFilterCriteria = buildExtendedBaseFilterCriteria('the-time-field-name', 1234, 5678);
expect(baseFilterCriteria).toEqual([
{
@ -59,7 +59,7 @@ describe('query_utils', () => {
});
it('returns filters including default query with supplied arguments provided via UI', () => {
const baseFilterCriteria = buildBaseFilterCriteria(
const baseFilterCriteria = buildExtendedBaseFilterCriteria(
'@timestamp',
1640082000012,
1640103600906,
@ -81,7 +81,7 @@ describe('query_utils', () => {
});
it('includes a term filter when including a selectedChangePoint', () => {
const baseFilterCriteria = buildBaseFilterCriteria(
const baseFilterCriteria = buildExtendedBaseFilterCriteria(
'@timestamp',
1640082000012,
1640103600906,
@ -105,7 +105,7 @@ describe('query_utils', () => {
});
it('includes a term filter with must_not when excluding a selectedChangePoint', () => {
const baseFilterCriteria = buildBaseFilterCriteria(
const baseFilterCriteria = buildExtendedBaseFilterCriteria(
'@timestamp',
1640082000012,
1640103600906,
@ -130,7 +130,7 @@ describe('query_utils', () => {
});
it('includes multiple term filters when including a selectedGroupMock', () => {
const baseFilterCriteria = buildBaseFilterCriteria(
const baseFilterCriteria = buildExtendedBaseFilterCriteria(
'@timestamp',
1640082000012,
1640103600906,
@ -185,7 +185,7 @@ describe('query_utils', () => {
});
it('includes a must_not with nested term filters when excluding a selectedGroup', () => {
const baseFilterCriteria = buildBaseFilterCriteria(
const baseFilterCriteria = buildExtendedBaseFilterCriteria(
'@timestamp',
1640082000012,
1640103600906,

View file

@ -12,6 +12,9 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import type { Query } from '@kbn/es-query';
import type { ChangePoint, FieldValuePair } from '@kbn/ml-agg-utils';
import { buildBaseFilterCriteria } from '@kbn/ml-query-utils';
import type { GroupTableItem } from '../../components/spike_analysis_table/types';
/*
@ -20,7 +23,7 @@ import type { GroupTableItem } from '../../components/spike_analysis_table/types
// Builds the base filter criteria used in queries,
// adding criteria for the time range and an optional query.
export function buildBaseFilterCriteria(
export function buildExtendedBaseFilterCriteria(
timeFieldName?: string,
earliestMs?: number,
latestMs?: number,
@ -29,22 +32,7 @@ export function buildBaseFilterCriteria(
includeSelectedChangePoint = true,
selectedGroup?: GroupTableItem | null
): estypes.QueryDslQueryContainer[] {
const filterCriteria = [];
if (timeFieldName && earliestMs && latestMs) {
filterCriteria.push({
range: {
[timeFieldName]: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
});
}
if (query && typeof query === 'object') {
filterCriteria.push(query);
}
const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, query);
const groupFilter = [];
if (selectedGroup) {

View file

@ -13,7 +13,7 @@ import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import type { ChangePoint } from '@kbn/ml-agg-utils';
import type { Query } from '@kbn/es-query';
import { buildBaseFilterCriteria } from './application/utils/query_utils';
import { buildExtendedBaseFilterCriteria } from './application/utils/build_extended_base_filter_criteria';
import { GroupTableItem } from './components/spike_analysis_table/types';
export interface DocumentCountStats {
@ -54,7 +54,7 @@ export const getDocumentCountStatsRequest = (params: DocumentStatsSearchStrategy
} = params;
const size = 0;
const filterCriteria = buildBaseFilterCriteria(
const filterCriteria = buildExtendedBaseFilterCriteria(
timeFieldName,
earliestMs,
latestMs,

View file

@ -46,6 +46,7 @@
"@kbn/ml-local-storage",
"@kbn/ml-date-picker",
"@kbn/ml-local-storage",
"@kbn/ml-query-utils",
],
"exclude": [
"target/**/*",

View file

@ -1,38 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
/*
* Contains utility functions for building and processing queries.
*/
// Builds the base filter criteria used in queries,
// adding criteria for the time range and an optional query.
export function buildBaseFilterCriteria(
timeFieldName?: string,
earliestMs?: number,
latestMs?: number,
query?: object
) {
const filterCriteria = [];
if (timeFieldName && earliestMs && latestMs) {
filterCriteria.push({
range: {
[timeFieldName]: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
});
}
if (query) {
filterCriteria.push(query);
}
return filterCriteria;
}

View file

@ -12,6 +12,7 @@ import { last, cloneDeep } from 'lodash';
import { mergeMap, switchMap } from 'rxjs/operators';
import { Comparators } from '@elastic/eui';
import type { ISearchOptions } from '@kbn/data-plugin/common';
import { buildBaseFilterCriteria, getSafeAggregationName } from '@kbn/ml-query-utils';
import type {
DataStatsFetchProgress,
FieldStatsSearchStrategyReturnBase,
@ -22,10 +23,6 @@ import type {
import { useDataVisualizerKibana } from '../../kibana_context';
import type { FieldRequestConfig } from '../../../../common/types';
import type { DataVisualizerIndexBasedAppState } from '../types/index_data_visualizer_state';
import {
buildBaseFilterCriteria,
getSafeAggregationName,
} from '../../../../common/utils/query_utils';
import type { FieldStats, FieldStatsError } from '../../../../common/types/field_stats';
import { getInitialProgress, getReducer } from '../progress_utils';
import { MAX_EXAMPLES_DEFAULT } from '../search_strategy/requests/constants';

View file

@ -11,8 +11,8 @@ import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { DataPublicPluginStart, ISearchOptions } from '@kbn/data-plugin/public';
import seedrandom from 'seedrandom';
import { isDefined } from '@kbn/ml-is-defined';
import { buildBaseFilterCriteria } from '@kbn/ml-query-utils';
import { RANDOM_SAMPLER_PROBABILITIES } from '../../constants/random_sampler';
import { buildBaseFilterCriteria } from '../../../../../common/utils/query_utils';
import type {
DocumentCountStats,
OverallStatsSearchStrategyParams,

View file

@ -16,8 +16,8 @@ import type {
} from '@kbn/data-plugin/public';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import type { SearchHit } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { buildBaseFilterCriteria } from '@kbn/ml-query-utils';
import { getUniqGeoOrStrExamples } from '../../../common/util/example_utils';
import { buildBaseFilterCriteria } from '../../../../../common/utils/query_utils';
import type {
Field,
FieldExamples,

View file

@ -11,11 +11,8 @@ import { Query } from '@kbn/es-query';
import type { IKibanaSearchResponse } from '@kbn/data-plugin/common';
import type { AggCardinality } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { buildBaseFilterCriteria, getSafeAggregationName } from '@kbn/ml-query-utils';
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
import {
buildBaseFilterCriteria,
getSafeAggregationName,
} from '../../../../../common/utils/query_utils';
import { getDatafeedAggregations } from '../../../../../common/utils/datafeed_utils';
import { AggregatableField, NonAggregatableField } from '../../types/overall_stats';
import { Aggs, SamplingOption } from '../../../../../common/types/field_stats';

View file

@ -55,6 +55,7 @@
"@kbn/ml-local-storage",
"@kbn/ml-date-picker",
"@kbn/ml-is-defined",
"@kbn/ml-query-utils",
],
"exclude": [
"target/**/*",

View file

@ -17,7 +17,6 @@ import {
mlFunctionToESAggregation,
isJobIdValid,
prefixDatafeedId,
getSafeAggregationName,
getLatestDataOrBucketTimestamp,
getEarliestDatafeedStartTime,
resolveMaxTimeInterval,
@ -566,18 +565,6 @@ describe('ML - job utils', () => {
});
});
describe('getSafeAggregationName', () => {
test('"foo" should be "foo"', () => {
expect(getSafeAggregationName('foo', 0)).toBe('foo');
});
test('"foo.bar" should be "foo.bar"', () => {
expect(getSafeAggregationName('foo.bar', 0)).toBe('foo.bar');
});
test('"foo&bar" should be "field_0"', () => {
expect(getSafeAggregationName('foo&bar', 0)).toBe('field_0');
});
});
describe('getLatestDataOrBucketTimestamp', () => {
test('returns expected value when no gap in data at end of bucket processing', () => {
expect(getLatestDataOrBucketTimestamp(1549929594000, 1549928700000)).toBe(1549929594000);

View file

@ -424,14 +424,6 @@ export function createDatafeedId(jobId: string) {
return `datafeed-${jobId}`;
}
// Returns a name which is safe to use in elasticsearch aggregations for the supplied
// field name. Aggregation names must be alpha-numeric and can only contain '_' and '-' characters,
// so if the supplied field names contains disallowed characters, the provided index
// identifier is used to return a safe 'dummy' name in the format 'field_index' e.g. field_0, field_1
export function getSafeAggregationName(fieldName: string, index: number): string {
return fieldName.match(/^[a-zA-Z0-9-_.]+$/) ? fieldName : `field_${index}`;
}
export function uniqWithIsEqual<T extends any[]>(arr: T): T {
return arr.reduce((dedupedArray, value) => {
if (dedupedArray.filter((compareValue: any) => isEqual(compareValue, value)).length === 0) {

View file

@ -16,7 +16,7 @@ const completeData: ChartDataWithNullValues = [
[1666846800000, 6],
];
describe('buildBaseFilterCriteria', () => {
describe('fillMissingChartData', () => {
it('returns chart data with missing timestamps in middle of dataset filled in to null', () => {
const dataWithMissingValues: ChartDataWithNullValues = [
[1666828800000, 7],

View file

@ -1,51 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { buildBaseFilterCriteria } from './query_utils';
describe('ML - query utils', () => {
describe('buildBaseFilterCriteria', () => {
const earliestMs = 1483228800000; // 1 Jan 2017 00:00:00
const latestMs = 1485907199000; // 31 Jan 2017 23:59:59
const query = {
query_string: {
query: 'region:sa-east-1',
analyze_wildcard: true,
default_field: '*',
},
};
test('returns correct criteria for time range', () => {
expect(buildBaseFilterCriteria('timestamp', earliestMs, latestMs)).toEqual([
{
range: {
timestamp: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
},
]);
});
test('returns correct criteria for time range and query', () => {
expect(buildBaseFilterCriteria('timestamp', earliestMs, latestMs, query)).toEqual([
{
range: {
timestamp: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
},
query,
]);
});
});
});

View file

@ -1,38 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
/*
* Contains utility functions for building and processing queries.
*/
// Builds the base filter criteria used in queries,
// adding criteria for the time range and an optional query.
export function buildBaseFilterCriteria(
timeFieldName?: string,
earliestMs?: number,
latestMs?: number,
query?: object
) {
const filterCriteria = [];
if (timeFieldName && earliestMs && latestMs) {
filterCriteria.push({
range: {
[timeFieldName]: {
gte: earliestMs,
lte: latestMs,
format: 'epoch_millis',
},
},
});
}
if (query) {
filterCriteria.push(query);
}
return filterCriteria;
}

View file

@ -15,9 +15,8 @@ import {
} from '@kbn/ml-agg-utils';
import type { AggCardinality, FieldsForHistograms } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { buildBaseFilterCriteria, getSafeAggregationName } from '@kbn/ml-query-utils';
import { ML_JOB_FIELD_TYPES } from '../../../common/constants/field_types';
import { getSafeAggregationName } from '../../../common/util/job_utils';
import { buildBaseFilterCriteria } from '../../lib/query_utils';
import { RuntimeMappings } from '../../../common/types/fields';
import { getDatafeedAggregations } from '../../../common/util/datafeed_utils';
import { Datafeed } from '../../../common/types/anomaly_detection_jobs';