[ML] Explain Log Rate Spikes: rename frequent_items to frequent_item_sets (#150150)

## Summary

`frequent_items` has been renamed to `frequent_item_sets` in
elasticsearch. This change follow ups on the rename.

### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

### For maintainers

- [ ] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Hendrik Muhs 2023-02-06 16:24:26 +01:00 committed by GitHub
parent 447de664fa
commit d2f5bcc71a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 75 additions and 75 deletions

View file

@ -7,7 +7,7 @@
import type { ItemsetResult } from '../../types';
export const filteredFrequentItems: ItemsetResult[] = [
export const filteredFrequentItemSets: ItemsetResult[] = [
{
set: { response_code: '500', url: 'home.php' },
size: 2,

View file

@ -7,7 +7,7 @@
import type { ItemsetResult } from '../../types';
export const frequentItems: ItemsetResult[] = [
export const frequentItemSets: ItemsetResult[] = [
{
set: { response_code: '500', url: 'home.php' },
size: 2,

View file

@ -44,7 +44,7 @@ import type { AiopsLicense } from '../types';
import { duplicateIdentifier } from './queries/duplicate_identifier';
import { fetchChangePointPValues } from './queries/fetch_change_point_p_values';
import { fetchIndexInfo } from './queries/fetch_index_info';
import { dropDuplicates, fetchFrequentItems } from './queries/fetch_frequent_items';
import { dropDuplicates, fetchFrequentItemSets } from './queries/fetch_frequent_item_sets';
import { getHistogramQuery } from './queries/get_histogram_query';
import { getGroupFilter } from './queries/get_group_filter';
import { getChangePointGroups } from './queries/get_change_point_groups';
@ -423,11 +423,11 @@ export const defineExplainLogRateSpikesRoute = (
})
);
// Deduplicated change points we pass to the `frequent_items` aggregation.
// Deduplicated change points we pass to the `frequent_item_sets` aggregation.
const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier);
try {
const { fields, df } = await fetchFrequentItems(
const { fields, df } = await fetchFrequentItemSets(
client,
request.body.index,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
@ -442,7 +442,7 @@ export const defineExplainLogRateSpikesRoute = (
);
if (shouldStop) {
logDebugMessage('shouldStop after fetching frequent_items.');
logDebugMessage('shouldStop after fetching frequent_item_sets.');
end();
return;
}

View file

@ -7,7 +7,7 @@
import type { ChangePoint } from '@kbn/ml-agg-utils';
// To optimize the `frequent_items` query, we identify duplicate change points by count attributes.
// To optimize the `frequent_item_sets` query, we identify duplicate change points by count attributes.
// Note this is a compromise and not 100% accurate because there could be change points that
// have the exact same counts but still don't co-occur.
export const duplicateIdentifier: Array<keyof ChangePoint> = [

View file

@ -16,16 +16,16 @@ import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import type { ChangePointDuplicateGroup, ItemsetResult } from '../../../common/types';
const FREQUENT_ITEMS_FIELDS_LIMIT = 15;
const FREQUENT_ITEM_SETS_FIELDS_LIMIT = 15;
interface FrequentItemsAggregation extends estypes.AggregationsSamplerAggregation {
interface FrequentItemSetsAggregation extends estypes.AggregationsSamplerAggregation {
fi: {
buckets: Array<{ key: Record<string, string[]>; doc_count: number; support: number }>;
};
}
interface RandomSamplerAggregation {
sample: FrequentItemsAggregation;
sample: FrequentItemSetsAggregation;
}
function isRandomSamplerAggregation(arg: unknown): arg is RandomSamplerAggregation {
@ -56,7 +56,7 @@ export function groupDuplicates(cps: ChangePoint[], uniqueFields: Array<keyof Ch
return groups;
}
export async function fetchFrequentItems(
export async function fetchFrequentItemSets(
client: ElasticsearchClient,
index: string,
searchQuery: estypes.QueryDslQueryContainer,
@ -77,7 +77,7 @@ export async function fetchFrequentItems(
// Get up to 15 unique fields from change points with retained order
const fields = sortedChangePoints.reduce<string[]>((p, c) => {
if (p.length < FREQUENT_ITEMS_FIELDS_LIMIT && !p.some((d) => d === c.fieldName)) {
if (p.length < FREQUENT_ITEM_SETS_FIELDS_LIMIT && !p.some((d) => d === c.fieldName)) {
p.push(c.fieldName);
}
return p;
@ -107,10 +107,10 @@ export async function fetchFrequentItems(
field,
}));
const frequentItemsAgg: Record<string, estypes.AggregationsAggregationContainer> = {
const frequentItemSetsAgg: Record<string, estypes.AggregationsAggregationContainer> = {
fi: {
// @ts-expect-error `frequent_items` is not yet part of `AggregationsAggregationContainer`
frequent_items: {
// @ts-expect-error `frequent_item_sets` is not yet part of `AggregationsAggregationContainer`
frequent_item_sets: {
minimum_set_size: 2,
size: 200,
minimum_support: 0.1,
@ -127,20 +127,20 @@ export async function fetchFrequentItems(
probability: sampleProbability,
seed: RANDOM_SAMPLER_SEED,
},
aggs: frequentItemsAgg,
aggs: frequentItemSetsAgg,
},
};
const esBody = {
query,
aggs: sampleProbability < 1 ? randomSamplerAgg : frequentItemsAgg,
aggs: sampleProbability < 1 ? randomSamplerAgg : frequentItemSetsAgg,
size: 0,
track_total_hits: true,
};
const body = await client.search<
unknown,
{ sample: FrequentItemsAggregation } | FrequentItemsAggregation
{ sample: FrequentItemSetsAggregation } | FrequentItemSetsAggregation
>(
{
index,
@ -151,8 +151,8 @@ export async function fetchFrequentItems(
);
if (body.aggregations === undefined) {
logger.error(`Failed to fetch frequent_items, got: \n${JSON.stringify(body, null, 2)}`);
emitError(`Failed to fetch frequent_items.`);
logger.error(`Failed to fetch frequent_item_sets, got: \n${JSON.stringify(body, null, 2)}`);
emitError(`Failed to fetch frequent_item_sets.`);
return {
fields: [],
df: [],
@ -162,17 +162,17 @@ export async function fetchFrequentItems(
const totalDocCountFi = (body.hits.total as estypes.SearchTotalHits).value;
const frequentItems = isRandomSamplerAggregation(body.aggregations)
const frequentItemSets = isRandomSamplerAggregation(body.aggregations)
? body.aggregations.sample.fi
: body.aggregations.fi;
const shape = frequentItems.buckets.length;
const shape = frequentItemSets.buckets.length;
let maximum = shape;
if (maximum > 50000) {
maximum = 50000;
}
const fiss = frequentItems.buckets;
const fiss = frequentItemSets.buckets;
fiss.length = maximum;
const results: ItemsetResult[] = [];

View file

@ -6,7 +6,7 @@
*/
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
import { frequentItemSets } from '../../../common/__mocks__/artificial_logs/frequent_item_sets';
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
import { finalChangePointGroups } from '../../../common/__mocks__/artificial_logs/final_change_point_groups';
@ -14,7 +14,7 @@ import { getChangePointGroups } from './get_change_point_groups';
describe('getChangePointGroups', () => {
it('gets change point groups', () => {
const changePointGroups = getChangePointGroups(frequentItems, changePoints, fields);
const changePointGroups = getChangePointGroups(frequentItemSets, changePoints, fields);
expect(changePointGroups).toEqual(finalChangePointGroups);
});

View file

@ -8,12 +8,12 @@
import type { ChangePoint, ChangePointGroup } from '@kbn/ml-agg-utils';
import { duplicateIdentifier } from './duplicate_identifier';
import { dropDuplicates, groupDuplicates } from './fetch_frequent_items';
import { dropDuplicates, groupDuplicates } from './fetch_frequent_item_sets';
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
import { getMarkedDuplicates } from './get_marked_duplicates';
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves';
import { getFilteredFrequentItems } from './get_filtered_frequent_items';
import { getFilteredFrequentItemSets } from './get_filtered_frequent_item_sets';
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
import { getMissingChangePoints } from './get_missing_change_points';
import { transformChangePointToGroup } from './transform_change_point_to_group';
@ -24,22 +24,22 @@ export function getChangePointGroups(
changePoints: ChangePoint[],
fields: string[]
): ChangePointGroup[] {
// These are the deduplicated change points we pass to the `frequent_items` aggregation.
// These are the deduplicated change points we pass to the `frequent_item_sets` aggregation.
const deduplicatedChangePoints = dropDuplicates(changePoints, duplicateIdentifier);
// We use the grouped change points to later repopulate
// the `frequent_items` result with the missing duplicates.
// the `frequent_item_sets` result with the missing duplicates.
const groupedChangePoints = groupDuplicates(changePoints, duplicateIdentifier).filter(
(g) => g.group.length > 1
);
const filteredDf = getFilteredFrequentItems(itemsets, changePoints);
const filteredDf = getFilteredFrequentItemSets(itemsets, changePoints);
// `frequent_items` returns lot of different small groups of field/value pairs that co-occur.
// `frequent_item_sets` returns lot of different small groups of field/value pairs that co-occur.
// The following steps analyse these small groups, identify overlap between these groups,
// and then summarize them in larger groups where possible.
// Get a tree structure based on `frequent_items`.
// Get a tree structure based on `frequent_item_sets`.
const { root } = getSimpleHierarchicalTree(filteredDf, true, false, fields);
// Each leave of the tree will be a summarized group of co-occuring field/value pairs.
@ -48,7 +48,7 @@ export function getChangePointGroups(
// To be able to display a more cleaned up results table in the UI, we identify field/value pairs
// that occur in multiple groups. This will allow us to highlight field/value pairs that are
// unique to a group in a better way. This step will also re-add duplicates we identified in the
// beginning and didn't pass on to the `frequent_items` agg.
// beginning and didn't pass on to the `frequent_item_sets` agg.
const fieldValuePairCounts = getFieldValuePairCounts(treeLeaves);
const changePointGroupsWithMarkedDuplicates = getMarkedDuplicates(
treeLeaves,
@ -59,7 +59,7 @@ export function getChangePointGroups(
groupedChangePoints
);
// Some field/value pairs might not be part of the `frequent_items` result set, for example
// Some field/value pairs might not be part of the `frequent_item_sets` result set, for example
// because they don't co-occur with other field/value pairs or because of the limits we set on the query.
// In this next part we identify those missing pairs and add them as individual groups.
const missingChangePoints = getMissingChangePoints(deduplicatedChangePoints, changePointGroups);

View file

@ -7,7 +7,7 @@
import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups';
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
import { filteredFrequentItemSets } from '../../../common/__mocks__/artificial_logs/filtered_frequent_item_sets';
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
@ -30,7 +30,7 @@ describe('getFieldValuePairCounts', () => {
it('returns a nested record with field/value pair counts for artificial logs', () => {
const simpleHierarchicalTree = getSimpleHierarchicalTree(
filteredFrequentItems,
filteredFrequentItemSets,
true,
false,
fields

View file

@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
import { frequentItemSets } from '../../../common/__mocks__/artificial_logs/frequent_item_sets';
import { filteredFrequentItemSets } from '../../../common/__mocks__/artificial_logs/filtered_frequent_item_sets';
import { getFilteredFrequentItemSets } from './get_filtered_frequent_item_sets';
describe('getFilteredFrequentItemSets', () => {
it('filter frequent item set based on provided change points', () => {
expect(getFilteredFrequentItemSets(frequentItemSets, changePoints)).toStrictEqual(
filteredFrequentItemSets
);
});
});

View file

@ -11,10 +11,10 @@ import type { ChangePoint } from '@kbn/ml-agg-utils';
import type { ItemsetResult } from '../../../common/types';
// The way the `frequent_items` aggregation works could return item sets that include
// The way the `frequent_item_sets` aggregation works could return item sets that include
// field/value pairs that are not part of the original list of significant change points.
// This cleans up groups and removes those unrelated field/value pairs.
export function getFilteredFrequentItems(
export function getFilteredFrequentItemSets(
itemsets: ItemsetResult[],
changePoints: ChangePoint[]
): ItemsetResult[] {

View file

@ -1,20 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { changePoints } from '../../../common/__mocks__/artificial_logs/change_points';
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
import { getFilteredFrequentItems } from './get_filtered_frequent_items';
describe('getFilteredFrequentItems', () => {
it('filter frequent item set based on provided change points', () => {
expect(getFilteredFrequentItems(frequentItems, changePoints)).toStrictEqual(
filteredFrequentItems
);
});
});

View file

@ -10,7 +10,7 @@ import { changePoints } from '../../../common/__mocks__/artificial_logs/change_p
import { duplicateIdentifier } from './duplicate_identifier';
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
import { groupDuplicates } from './fetch_frequent_items';
import { groupDuplicates } from './fetch_frequent_item_sets';
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
import { getMarkedDuplicates } from './get_marked_duplicates';

View file

@ -7,7 +7,7 @@
import { changePointGroups } from '../../../common/__mocks__/farequote/change_point_groups';
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
import { filteredFrequentItemSets } from '../../../common/__mocks__/artificial_logs/filtered_frequent_item_sets';
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
import { getMarkedDuplicates } from './get_marked_duplicates';
@ -59,7 +59,7 @@ describe('markDuplicates', () => {
it('marks duplicates based on change point groups for artificial logs', () => {
const simpleHierarchicalTree = getSimpleHierarchicalTree(
filteredFrequentItems,
filteredFrequentItemSets,
true,
false,
fields

View file

@ -10,7 +10,7 @@ import { changePoints } from '../../../common/__mocks__/artificial_logs/change_p
import { duplicateIdentifier } from './duplicate_identifier';
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
import { dropDuplicates, groupDuplicates } from './fetch_frequent_items';
import { dropDuplicates, groupDuplicates } from './fetch_frequent_item_sets';
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
import { getMarkedDuplicates } from './get_marked_duplicates';
import { getMissingChangePoints } from './get_missing_change_points';

View file

@ -6,7 +6,7 @@
*/
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
import { filteredFrequentItemSets } from '../../../common/__mocks__/artificial_logs/filtered_frequent_item_sets';
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
@ -16,7 +16,7 @@ describe('getSimpleHierarchicalTree', () => {
// and make it comparable against a static representation.
expect(
JSON.parse(
JSON.stringify(getSimpleHierarchicalTree(filteredFrequentItems, true, false, fields))
JSON.stringify(getSimpleHierarchicalTree(filteredFrequentItemSets, true, false, fields))
)
).toEqual({
root: {

View file

@ -6,7 +6,7 @@
*/
import { fields } from '../../../common/__mocks__/artificial_logs/fields';
import { filteredFrequentItems } from '../../../common/__mocks__/artificial_logs/filtered_frequent_items';
import { filteredFrequentItemSets } from '../../../common/__mocks__/artificial_logs/filtered_frequent_item_sets';
import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves';
@ -14,7 +14,7 @@ import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_
describe('getSimpleHierarchicalTreeLeaves', () => {
it('returns the hierarchical tree leaves', () => {
const simpleHierarchicalTree = getSimpleHierarchicalTree(
filteredFrequentItems,
filteredFrequentItemSets,
true,
false,
fields

View file

@ -5,12 +5,12 @@
* 2.0.
*/
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
import { frequentItemSets } from '../../../common/__mocks__/artificial_logs/frequent_item_sets';
import { getValueCounts } from './get_value_counts';
describe('getValueCounts', () => {
it('get value counts for field response_code', () => {
expect(getValueCounts(frequentItems, 'response_code')).toEqual({
expect(getValueCounts(frequentItemSets, 'response_code')).toEqual({
'200': 1,
'404': 1,
'500': 3,
@ -18,11 +18,11 @@ describe('getValueCounts', () => {
});
it('get value counts for field url', () => {
expect(getValueCounts(frequentItems, 'url')).toEqual({ 'home.php': 6 });
expect(getValueCounts(frequentItemSets, 'url')).toEqual({ 'home.php': 6 });
});
it('get value counts for field user', () => {
expect(getValueCounts(frequentItems, 'user')).toEqual({
expect(getValueCounts(frequentItemSets, 'user')).toEqual({
Mary: 1,
Paul: 1,
Peter: 3,

View file

@ -5,19 +5,19 @@
* 2.0.
*/
import { frequentItems } from '../../../common/__mocks__/artificial_logs/frequent_items';
import { frequentItemSets } from '../../../common/__mocks__/artificial_logs/frequent_item_sets';
import { getValuesDescending } from './get_values_descending';
describe('getValuesDescending', () => {
it('get descending values for field response_code', () => {
expect(getValuesDescending(frequentItems, 'response_code')).toEqual(['500', '200', '404']);
expect(getValuesDescending(frequentItemSets, 'response_code')).toEqual(['500', '200', '404']);
});
it('get descending values for field url', () => {
expect(getValuesDescending(frequentItems, 'url')).toEqual(['home.php']);
expect(getValuesDescending(frequentItemSets, 'url')).toEqual(['home.php']);
});
it('get descending values for field user', () => {
expect(getValuesDescending(frequentItems, 'user')).toEqual(['Peter', 'Mary', 'Paul']);
expect(getValuesDescending(frequentItemSets, 'user')).toEqual(['Peter', 'Mary', 'Paul']);
});
});

View file

@ -10,7 +10,7 @@ import { changePoints } from '../../../common/__mocks__/artificial_logs/change_p
import { duplicateIdentifier } from './duplicate_identifier';
import { getGroupsWithReaddedDuplicates } from './get_groups_with_readded_duplicates';
import { dropDuplicates, groupDuplicates } from './fetch_frequent_items';
import { dropDuplicates, groupDuplicates } from './fetch_frequent_item_sets';
import { getFieldValuePairCounts } from './get_field_value_pair_counts';
import { getMarkedDuplicates } from './get_marked_duplicates';
import { getMissingChangePoints } from './get_missing_change_points';