[ML] Explain log rate spikes: Add field histograms to analysis result. (#136295)

- Extends the analysis API to return histogram data.
- Adds a column with MiniHistogram components to the analysis results table.
- Moves/consolidates fetchHistogramsForFields to @kbn/ml-agg-utils (also used in Data Visualizer and Data Grid Mini Histograms).
- So far fetchHistogramsForFields auto-identified the necessary interval and min/max. To be able to generate histogram data for the log rate spikes charts an options was added to use that information up front for the data to be fetched. This allows the buckets for the chart data for the overall (green bars) and the field/value-filtered (orange bars) histogram to have the exact same buckets.
This commit is contained in:
Walter Rafelsberger 2022-07-20 11:44:19 +02:00 committed by GitHub
parent 5aad18fa35
commit 092fb354ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 672 additions and 314 deletions

View file

@ -7,7 +7,6 @@
import { get } from 'lodash';
import type { Client } from '@elastic/elasticsearch';
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { KBN_FIELD_TYPES } from '@kbn/field-types';
@ -16,39 +15,14 @@ import { stringHash } from '@kbn/ml-string-hash';
import { buildSamplerAggregation } from './build_sampler_aggregation';
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
// TODO Temporary type definition until we can import from `@kbn/core`.
// Copied from src/core/server/elasticsearch/client/types.ts
// as these types aren't part of any package yet. Once they are, remove this completely
/**
* Client used to query the elasticsearch cluster.
* @deprecated At some point use the one from src/core/server/elasticsearch/client/types.ts when it is made into a package. If it never is, then keep using this one.
* @public
*/
type ElasticsearchClient = Omit<
Client,
'connectionPool' | 'serializer' | 'extend' | 'close' | 'diagnostic'
>;
import type { ElasticsearchClient, HistogramField, NumericColumnStatsMap } from './types';
const MAX_CHART_COLUMNS = 20;
interface HistogramField {
fieldName: string;
type: string;
}
interface NumericColumnStats {
interval: number;
min: number;
max: number;
}
type NumericColumnStatsMap = Record<string, NumericColumnStats>;
/**
* Returns aggregation intervals for the supplied document fields.
*/
export const getAggIntervals = async (
export const fetchAggIntervals = async (
client: ElasticsearchClient,
indexPattern: string,
query: estypes.QueryDslQueryContainer,

View file

@ -0,0 +1,254 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import get from 'lodash/get';
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { KBN_FIELD_TYPES } from '@kbn/field-types';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { stringHash } from '@kbn/ml-string-hash';
import { buildSamplerAggregation } from './build_sampler_aggregation';
import { fetchAggIntervals } from './fetch_agg_intervals';
import { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
import type {
AggCardinality,
ElasticsearchClient,
HistogramField,
NumericColumnStats,
NumericColumnStatsMap,
} from './types';
const MAX_CHART_COLUMNS = 20;
interface AggHistogram {
histogram: {
field: string;
interval: number;
};
}
interface AggTerms {
terms: {
field: string;
size: number;
};
}
interface NumericDataItem {
key: number;
key_as_string?: string;
doc_count: number;
}
/**
* Interface to describe the data structure returned for numeric based charts.
*/
export interface NumericChartData {
data: NumericDataItem[];
id: string;
interval: number;
stats: [number, number];
type: 'numeric';
}
/**
* Numeric based histogram field interface, limited to `date` and `number`.
*/
export interface NumericHistogramField extends HistogramField {
type: KBN_FIELD_TYPES.DATE | KBN_FIELD_TYPES.NUMBER;
}
type NumericHistogramFieldWithColumnStats = NumericHistogramField & NumericColumnStats;
function isNumericHistogramField(arg: unknown): arg is NumericHistogramField {
return (
isPopulatedObject(arg, ['fieldName', 'type']) &&
(arg.type === KBN_FIELD_TYPES.DATE || arg.type === KBN_FIELD_TYPES.NUMBER)
);
}
function isNumericHistogramFieldWithColumnStats(
arg: unknown
): arg is NumericHistogramFieldWithColumnStats {
return (
isPopulatedObject(arg, ['fieldName', 'type', 'min', 'max', 'interval']) &&
(arg.type === KBN_FIELD_TYPES.DATE || arg.type === KBN_FIELD_TYPES.NUMBER)
);
}
interface OrdinalDataItem {
key: string;
key_as_string?: string;
doc_count: number;
}
interface OrdinalChartData {
type: 'ordinal' | 'boolean';
cardinality: number;
data: OrdinalDataItem[];
id: string;
}
interface OrdinalHistogramField extends HistogramField {
type: KBN_FIELD_TYPES.STRING | KBN_FIELD_TYPES.BOOLEAN;
}
function isOrdinalHistogramField(arg: unknown): arg is OrdinalHistogramField {
return (
isPopulatedObject(arg, ['fieldName', 'type']) &&
(arg.type === KBN_FIELD_TYPES.STRING || arg.type === KBN_FIELD_TYPES.BOOLEAN)
);
}
interface UnsupportedChartData {
id: string;
type: 'unsupported';
}
interface UnsupportedHistogramField extends HistogramField {
type: Exclude<
KBN_FIELD_TYPES,
KBN_FIELD_TYPES.STRING | KBN_FIELD_TYPES.BOOLEAN | KBN_FIELD_TYPES.DATE | KBN_FIELD_TYPES.NUMBER
>;
}
type ChartRequestAgg = AggHistogram | AggCardinality | AggTerms;
/**
* All types of histogram field definitions for fetching histogram data.
*/
export type FieldsForHistograms = Array<
| NumericHistogramField
| NumericHistogramFieldWithColumnStats
| OrdinalHistogramField
| UnsupportedHistogramField
>;
/**
* Fetches data to be used in mini histogram charts. Supports auto-identifying
* the histogram interval and min/max values.
*
* @param client Elasticsearch Client
* @param indexPattern index pattern to be queried
* @param query Elasticsearch query
* @param fields the fields the histograms should be generated for
* @param samplerShardSize shard_size parameter of the sampler aggregation
* @param runtimeMappings optional runtime mappings
* @returns an array of histogram data for each supplied field
*/
export const fetchHistogramsForFields = async (
client: ElasticsearchClient,
indexPattern: string,
query: any,
fields: FieldsForHistograms,
samplerShardSize: number,
runtimeMappings?: estypes.MappingRuntimeFields
) => {
const aggIntervals = {
...(await fetchAggIntervals(
client,
indexPattern,
query,
fields.filter((f) => !isNumericHistogramFieldWithColumnStats(f)),
samplerShardSize,
runtimeMappings
)),
...fields.filter(isNumericHistogramFieldWithColumnStats).reduce((p, field) => {
const { interval, min, max, fieldName } = field;
p[stringHash(fieldName)] = { interval, min, max };
return p;
}, {} as NumericColumnStatsMap),
};
const chartDataAggs = fields.reduce((aggs, field) => {
const id = stringHash(field.fieldName);
if (isNumericHistogramField(field)) {
if (aggIntervals[id] !== undefined) {
aggs[`${id}_histogram`] = {
histogram: {
field: field.fieldName,
interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1,
},
};
}
} else if (isOrdinalHistogramField(field)) {
if (field.type === KBN_FIELD_TYPES.STRING) {
aggs[`${id}_cardinality`] = {
cardinality: {
field: field.fieldName,
},
};
}
aggs[`${id}_terms`] = {
terms: {
field: field.fieldName,
size: MAX_CHART_COLUMNS,
},
};
}
return aggs;
}, {} as Record<string, ChartRequestAgg>);
if (Object.keys(chartDataAggs).length === 0) {
return [];
}
const body = await client.search(
{
index: indexPattern,
size: 0,
body: {
query,
aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
size: 0,
...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
},
},
{ maxRetries: 0 }
);
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
return fields.map((field) => {
const id = stringHash(field.fieldName);
if (isNumericHistogramField(field)) {
if (aggIntervals[id] === undefined) {
return {
type: 'numeric',
data: [],
interval: 0,
stats: [0, 0],
id: field.fieldName,
} as NumericChartData;
}
return {
data: aggregations[`${id}_histogram`].buckets,
interval: aggIntervals[id].interval,
stats: [aggIntervals[id].min, aggIntervals[id].max],
type: 'numeric',
id: field.fieldName,
} as NumericChartData;
} else if (isOrdinalHistogramField(field)) {
return {
type: field.type === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean',
cardinality:
field.type === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2,
data: aggregations[`${id}_terms`].buckets,
id: field.fieldName,
} as OrdinalChartData;
}
return {
type: 'unsupported',
id: field.fieldName,
} as UnsupportedChartData;
});
};

View file

@ -6,7 +6,19 @@
*/
export { buildSamplerAggregation } from './build_sampler_aggregation';
export { getAggIntervals } from './get_agg_intervals';
export { fetchAggIntervals } from './fetch_agg_intervals';
export { fetchHistogramsForFields } from './fetch_histograms_for_fields';
export { getSamplerAggregationsResponsePath } from './get_sampler_aggregations_response_path';
export type { NumberValidationResult } from './validate_number';
export { numberValidator } from './validate_number';
export type { FieldsForHistograms } from './fetch_histograms_for_fields';
export type {
AggCardinality,
ChangePoint,
ChangePointHistogram,
ChangePointHistogramItem,
HistogramField,
NumericColumnStats,
NumericColumnStatsMap,
} from './types';
export type { NumberValidationResult } from './validate_number';

View file

@ -0,0 +1,101 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { Client } from '@elastic/elasticsearch';
import { KBN_FIELD_TYPES } from '@kbn/field-types';
// TODO Temporary type definition until we can import from `@kbn/core`.
// Copied from src/core/server/elasticsearch/client/types.ts
// as these types aren't part of any package yet. Once they are, remove this completely
/**
* Client used to query the elasticsearch cluster.
* @deprecated At some point use the one from src/core/server/elasticsearch/client/types.ts when it is made into a package. If it never is, then keep using this one.
* @public
*/
export type ElasticsearchClient = Omit<
Client,
'connectionPool' | 'serializer' | 'extend' | 'close' | 'diagnostic'
>;
interface FieldAggCardinality {
field: string;
percent?: any;
}
interface ScriptAggCardinality {
script: any;
}
/**
* Interface for cardinality aggregation.
*/
export interface AggCardinality {
cardinality: FieldAggCardinality | ScriptAggCardinality;
}
/**
* Field/value pair definition.
*/
export interface FieldValuePair {
fieldName: string;
// For dynamic fieldValues we only identify fields as `string`,
// but for example `http.response.status_code` which is part of
// of the list of predefined field candidates is of type long/number.
fieldValue: string | number;
}
/**
* Interface to describe attributes used for histograms.
*/
export interface NumericColumnStats {
interval: number;
min: number;
max: number;
}
/**
* Record/Map of histogram attributes where the key is the aggregation name.
*/
export type NumericColumnStatsMap = Record<string, NumericColumnStats>;
/**
* Parameters to identify which histogram data needs to be generated for a field.
*/
export interface HistogramField {
fieldName: string;
type: KBN_FIELD_TYPES;
}
/**
* Change point meta data for a field/value pair.
*/
export interface ChangePoint extends FieldValuePair {
doc_count: number;
bg_count: number;
score: number;
pValue: number | null;
normalizedScore: number;
histogram?: ChangePointHistogramItem[];
}
/**
* Change point histogram data item.
*/
export interface ChangePointHistogramItem {
doc_count_overall: number;
doc_count_change_point: number;
key: number;
key_as_string: string;
}
/**
* Change point histogram data for a field/value pair.
*/
export interface ChangePointHistogram extends FieldValuePair {
histogram: ChangePointHistogramItem[];
}

View file

@ -5,10 +5,11 @@
* 2.0.
*/
import type { ChangePoint } from '../../types';
import type { ChangePoint, ChangePointHistogram } from '@kbn/ml-agg-utils';
export const API_ACTION_NAME = {
ADD_CHANGE_POINTS: 'add_change_points',
ADD_CHANGE_POINTS_HISTOGRAM: 'add_change_points_histogram',
ERROR: 'error',
RESET: 'reset',
UPDATE_LOADING_STATE: 'update_loading_state',
@ -29,6 +30,20 @@ export function addChangePointsAction(
};
}
interface ApiActionAddChangePointsHistogram {
type: typeof API_ACTION_NAME.ADD_CHANGE_POINTS_HISTOGRAM;
payload: ChangePointHistogram[];
}
export function addChangePointsHistogramAction(
payload: ApiActionAddChangePointsHistogram['payload']
): ApiActionAddChangePointsHistogram {
return {
type: API_ACTION_NAME.ADD_CHANGE_POINTS_HISTOGRAM,
payload,
};
}
interface ApiActionError {
type: typeof API_ACTION_NAME.ERROR;
payload: string;
@ -69,6 +84,7 @@ export function updateLoadingStateAction(
export type AiopsExplainLogRateSpikesApiAction =
| ApiActionAddChangePoints
| ApiActionAddChangePointsHistogram
| ApiActionError
| ApiActionReset
| ApiActionUpdateLoadingState;

View file

@ -7,6 +7,7 @@
export {
addChangePointsAction,
addChangePointsHistogramAction,
errorAction,
resetAction,
updateLoadingStateAction,

View file

@ -38,6 +38,7 @@ describe('streamReducer', () => {
bg_count: 100,
score: 0.1,
pValue: 0.01,
normalizedScore: 0.123,
},
])
);

View file

@ -5,7 +5,7 @@
* 2.0.
*/
import type { ChangePoint } from '../types';
import type { ChangePoint } from '@kbn/ml-agg-utils';
import { API_ACTION_NAME, AiopsExplainLogRateSpikesApiAction } from './explain_log_rate_spikes';
@ -34,6 +34,17 @@ export function streamReducer(
switch (action.type) {
case API_ACTION_NAME.ADD_CHANGE_POINTS:
return { ...state, changePoints: [...state.changePoints, ...action.payload] };
case API_ACTION_NAME.ADD_CHANGE_POINTS_HISTOGRAM:
const changePoints = state.changePoints.map((cp) => {
const cpHistogram = action.payload.find(
(h) => h.fieldName === cp.fieldName && h.fieldValue && cp.fieldName
);
if (cpHistogram) {
cp.histogram = cpHistogram.histogram;
}
return cp;
});
return { ...state, changePoints };
case API_ACTION_NAME.RESET:
return initialState;
case API_ACTION_NAME.UPDATE_LOADING_STATE:

View file

@ -1,19 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export interface FieldValuePair {
fieldName: string;
fieldValue: string;
isFallbackResult?: boolean;
}
export interface ChangePoint extends FieldValuePair {
doc_count: number;
bg_count: number;
score: number;
pValue: number | null;
}

View file

@ -5,4 +5,4 @@
* 2.0.
*/
export { getHistogramsForFields } from '@kbn/ml-plugin/server';
export { MiniHistogram } from './mini_histogram';

View file

@ -0,0 +1,70 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import React, { FC } from 'react';
import { Chart, BarSeries, PartialTheme, ScaleType, Settings } from '@elastic/charts';
import type { ChangePointHistogramItem } from '@kbn/ml-agg-utils';
interface MiniHistogramProps {
chartData: ChangePointHistogramItem[];
label: string;
}
export const MiniHistogram: FC<MiniHistogramProps> = ({ chartData, label }) => {
const theme: PartialTheme = {
chartMargins: {
left: 0,
right: 0,
top: 0,
bottom: 0,
},
chartPaddings: {
left: 0,
right: 0,
top: 0,
bottom: 0,
},
scales: {
barsPadding: 0.1,
},
};
return (
<div
style={{
width: '80px',
height: '24px',
margin: '0px',
}}
>
<Chart>
<Settings theme={theme} showLegend={false} />
<BarSeries
id="doc_count_overall"
xScaleType={ScaleType.Time}
yScaleType={ScaleType.Linear}
xAccessor={'key'}
yAccessors={['doc_count_overall']}
data={chartData}
stackAccessors={[0]}
/>
<BarSeries
id={label}
xScaleType={ScaleType.Time}
yScaleType={ScaleType.Linear}
xAccessor={'key'}
yAccessors={['doc_count_change_point']}
data={chartData}
stackAccessors={[0]}
color={['orange']}
/>
</Chart>
</div>
);
};

View file

@ -6,10 +6,12 @@
*/
import React, { FC, useCallback, useMemo, useState } from 'react';
import { EuiBadge, EuiBasicTable, EuiBasicTableColumn, RIGHT_ALIGNMENT } from '@elastic/eui';
import { EuiBadge, EuiBasicTable, EuiBasicTableColumn } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { ChangePoint } from '../../../common/types';
import { ImpactBar } from './impact_bar';
import type { ChangePoint } from '@kbn/ml-agg-utils';
import { MiniHistogram } from '../mini_histogram';
import { getFailedTransactionsCorrelationImpactLabel } from './get_failed_transactions_correlation_impact_label';
const PAGINATION_SIZE_OPTIONS = [5, 10, 20, 50];
@ -29,25 +31,45 @@ export const SpikeAnalysisTable: FC<Props> = ({ changePointData, error, loading
const columns: Array<EuiBasicTableColumn<ChangePoint>> = [
{
field: 'score',
field: 'fieldName',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldNameLabel',
{ defaultMessage: 'Field name' }
),
sortable: true,
},
{
field: 'fieldValue',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldValueLabel',
{ defaultMessage: 'Field value' }
),
render: (_, { fieldValue }) => String(fieldValue).slice(0, 50),
sortable: true,
},
{
field: 'pValue',
name: (
<>
{i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.pValueLabel',
'xpack.aiops.correlations.failedTransactions.correlationsTable.logRateLabel',
{
defaultMessage: 'Score',
defaultMessage: 'Log rate',
}
)}
</>
),
align: RIGHT_ALIGNMENT,
render: (_, { score }) => {
return (
<>
<ImpactBar size="m" value={Number(score.toFixed(2))} label={score.toFixed(2)} />
</>
);
render: (_, { histogram, fieldName, fieldValue }) => {
return histogram ? (
<MiniHistogram chartData={histogram} label={`${fieldName}:${fieldValue}`} />
) : null;
},
sortable: false,
},
{
field: 'pValue',
name: 'p-value',
render: (pValue: number) => pValue.toPrecision(3),
sortable: true,
},
{
@ -68,29 +90,6 @@ export const SpikeAnalysisTable: FC<Props> = ({ changePointData, error, loading
},
sortable: true,
},
{
field: 'fieldName',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldNameLabel',
{ defaultMessage: 'Field name' }
),
sortable: true,
},
{
field: 'fieldValue',
name: i18n.translate(
'xpack.aiops.correlations.failedTransactions.correlationsTable.fieldValueLabel',
{ defaultMessage: 'Field value' }
),
render: (_, { fieldValue }) => String(fieldValue).slice(0, 50),
sortable: true,
},
{
field: 'pValue',
name: 'p-value',
render: (pValue: number) => pValue.toPrecision(3),
sortable: true,
},
];
const onChange = useCallback((tableSettings) => {
@ -105,7 +104,12 @@ export const SpikeAnalysisTable: FC<Props> = ({ changePointData, error, loading
const itemCount = changePointData?.length ?? 0;
return {
pageOfItems: changePointData?.slice(pageStart, pageStart + pageSize),
pageOfItems: changePointData
// Temporary default sorting by ascending pValue until we add native table sorting
?.sort((a, b) => {
return (a?.pValue ?? 1) - (b?.pValue ?? 0);
})
.slice(pageStart, pageStart + pageSize),
pagination: {
pageIndex,
pageSize,
@ -119,7 +123,7 @@ export const SpikeAnalysisTable: FC<Props> = ({ changePointData, error, loading
<EuiBasicTable
compressed
columns={columns}
items={pageOfItems ?? []}
items={pageOfItems}
noItemsMessage={noDataText}
onChange={onChange}
pagination={pagination}

View file

@ -7,13 +7,18 @@
import { chunk } from 'lodash';
import { asyncForEach } from '@kbn/std';
import type { IRouter } from '@kbn/core/server';
import { KBN_FIELD_TYPES } from '@kbn/field-types';
import type { Logger } from '@kbn/logging';
import type { DataRequestHandlerContext } from '@kbn/data-plugin/server';
import { streamFactory } from '@kbn/aiops-utils';
import type { ChangePoint, NumericChartData, NumericHistogramField } from '@kbn/ml-agg-utils';
import { fetchHistogramsForFields } from '@kbn/ml-agg-utils';
import {
addChangePointsAction,
addChangePointsHistogramAction,
aiopsExplainLogRateSpikesSchema,
errorAction,
resetAction,
@ -21,7 +26,6 @@ import {
AiopsExplainLogRateSpikesApiAction,
} from '../../common/api/explain_log_rate_spikes';
import { API_ENDPOINT } from '../../common/api';
import type { ChangePoint } from '../../common/types';
import type { AiopsLicense } from '../types';
@ -30,7 +34,8 @@ import { fetchChangePointPValues } from './queries/fetch_change_point_p_values';
// Overall progress is a float from 0 to 1.
const LOADED_FIELD_CANDIDATES = 0.2;
const PROGRESS_STEP_P_VALUES = 0.8;
const PROGRESS_STEP_P_VALUES = 0.6;
const PROGRESS_STEP_HISTOGRAMS = 0.2;
export const defineExplainLogRateSpikesRoute = (
router: IRouter<DataRequestHandlerContext>,
@ -151,6 +156,105 @@ export const defineExplainLogRateSpikesRoute = (
}
}
if (changePoints?.length === 0) {
push(
updateLoadingStateAction({
ccsWarning: false,
loaded: 1,
loadingState: `Done.`,
})
);
end();
return;
}
const histogramFields: [NumericHistogramField] = [
{ fieldName: request.body.timeFieldName, type: KBN_FIELD_TYPES.DATE },
];
const [overallTimeSeries] = (await fetchHistogramsForFields(
client,
request.body.index,
{ match_all: {} },
// fields
histogramFields,
// samplerShardSize
-1,
undefined
)) as [NumericChartData];
// time series filtered by fields
if (changePoints) {
await asyncForEach(changePoints, async (cp, index) => {
if (changePoints) {
const histogramQuery = {
bool: {
filter: [
{
term: { [cp.fieldName]: cp.fieldValue },
},
],
},
};
const [cpTimeSeries] = (await fetchHistogramsForFields(
client,
request.body.index,
histogramQuery,
// fields
[
{
fieldName: request.body.timeFieldName,
type: KBN_FIELD_TYPES.DATE,
interval: overallTimeSeries.interval,
min: overallTimeSeries.stats[0],
max: overallTimeSeries.stats[1],
},
],
// samplerShardSize
-1,
undefined
)) as [NumericChartData];
const histogram =
overallTimeSeries.data.map((o, i) => {
const current = cpTimeSeries.data.find(
(d1) => d1.key_as_string === o.key_as_string
) ?? {
doc_count: 0,
};
return {
key: o.key,
key_as_string: o.key_as_string ?? '',
doc_count_change_point: current.doc_count,
doc_count_overall: Math.max(0, o.doc_count - current.doc_count),
};
}) ?? [];
const { fieldName, fieldValue } = cp;
loaded += (1 / changePoints.length) * PROGRESS_STEP_HISTOGRAMS;
push(
updateLoadingStateAction({
ccsWarning: false,
loaded,
loadingState: `Loading histogram data.`,
})
);
push(
addChangePointsHistogramAction([
{
fieldName,
fieldValue,
histogram,
},
])
);
}
});
}
end();
})();

View file

@ -8,9 +8,9 @@ import { uniqBy } from 'lodash';
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient } from '@kbn/core/server';
import { ChangePoint } from '@kbn/ml-agg-utils';
import { SPIKE_ANALYSIS_THRESHOLD } from '../../../common/constants';
import type { AiopsExplainLogRateSpikesSchema } from '../../../common/api/explain_log_rate_spikes';
import { ChangePoint } from '../../../common/types';
import { getQueryWithParams } from './get_query_with_params';
import { getRequestBase } from './get_request_base';
@ -109,6 +109,13 @@ export const fetchChangePointPValues = async (
for (const bucket of overallResult.buckets) {
const pValue = Math.exp(-bucket.score);
// Scale the score into a value from 0 - 1
// using a concave piecewise linear function in -log(p-value)
const normalizedScore =
0.5 * Math.min(Math.max((bucket.score - 3.912) / 2.995, 0), 1) +
0.25 * Math.min(Math.max((bucket.score - 6.908) / 6.908, 0), 1) +
0.25 * Math.min(Math.max((bucket.score - 13.816) / 101.314, 0), 1);
if (typeof pValue === 'number' && pValue < SPIKE_ANALYSIS_THRESHOLD) {
result.push({
fieldName,
@ -117,6 +124,7 @@ export const fetchChangePointPValues = async (
bg_count: bucket.doc_count,
score: bucket.score,
pValue,
normalizedScore,
});
}
}

View file

@ -7,7 +7,7 @@
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import type { FieldValuePair } from '../../../common/types';
import type { FieldValuePair } from '@kbn/ml-agg-utils';
import type { AiopsExplainLogRateSpikesSchema } from '../../../common/api/explain_log_rate_spikes';
import { getFilters } from './get_filters';

View file

@ -32,11 +32,6 @@ export function isValidField(arg: unknown): arg is Field {
return isPopulatedObject(arg, ['fieldName', 'type']) && typeof arg.fieldName === 'string';
}
export interface HistogramField {
fieldName: string;
type: string;
}
export interface Distribution {
percentiles: Array<{ value?: number; percent: number; minValue: number; maxValue: number }>;
minPercentile: number;
@ -107,13 +102,6 @@ export interface FieldExamples {
examples: unknown[];
}
export interface NumericColumnStats {
interval: number;
min: number;
max: number;
}
export type NumericColumnStatsMap = Record<string, NumericColumnStats>;
export interface AggHistogram {
histogram: estypes.AggregationsHistogramAggregation;
}

View file

@ -9,6 +9,7 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { get } from 'lodash';
import { Query } from '@kbn/es-query';
import { IKibanaSearchResponse } from '@kbn/data-plugin/common';
import type { AggCardinality } from '@kbn/ml-agg-utils';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import {
@ -17,7 +18,7 @@ import {
} from '../../../../../common/utils/query_utils';
import { getDatafeedAggregations } from '../../../../../common/utils/datafeed_utils';
import { AggregatableField, NonAggregatableField } from '../../types/overall_stats';
import { AggCardinality, Aggs } from '../../../../../common/types/field_stats';
import { Aggs } from '../../../../../common/types/field_stats';
export const checkAggregatableFieldsExistRequest = (
dataViewTitle: string,

View file

@ -97,19 +97,6 @@ export const mlCategory: Field = {
aggregatable: false,
};
export interface FieldAggCardinality {
field: string;
percent?: any;
}
export interface ScriptAggCardinality {
script: any;
}
export interface AggCardinality {
cardinality: FieldAggCardinality | ScriptAggCardinality;
}
export type RollupFields = Record<FieldId, [Record<'agg', ES_AGGREGATION>]>;
export type RuntimeMappings = estypes.MappingRuntimeFields;

View file

@ -21,7 +21,6 @@ export {
UnknownMLCapabilitiesError,
InsufficientMLCapabilities,
MLPrivilegesUninitialized,
getHistogramsForFields,
} from './shared';
export const plugin = (ctx: PluginInitializerContext) => new MlServerPlugin(ctx);

View file

@ -8,18 +8,17 @@
import { get, each, last, find } from 'lodash';
import { IScopedClusterClient } from '@kbn/core/server';
import { KBN_FIELD_TYPES } from '@kbn/data-plugin/server';
import {
buildSamplerAggregation,
getAggIntervals,
fetchHistogramsForFields,
getSamplerAggregationsResponsePath,
} from '@kbn/ml-agg-utils';
import { stringHash } from '@kbn/ml-string-hash';
import type { AggCardinality, FieldsForHistograms } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { ML_JOB_FIELD_TYPES } from '../../../common/constants/field_types';
import { getSafeAggregationName } from '../../../common/util/job_utils';
import { buildBaseFilterCriteria } from '../../lib/query_utils';
import { AggCardinality, RuntimeMappings } from '../../../common/types/fields';
import { RuntimeMappings } from '../../../common/types/fields';
import { getDatafeedAggregations } from '../../../common/util/datafeed_utils';
import { Datafeed } from '../../../common/types/anomaly_detection_jobs';
@ -28,8 +27,6 @@ const SAMPLER_TOP_TERMS_SHARD_SIZE = 5000;
const AGGREGATABLE_EXISTS_REQUEST_BATCH_SIZE = 200;
const FIELDS_REQUEST_BATCH_SIZE = 10;
const MAX_CHART_COLUMNS = 20;
interface FieldData {
fieldName: string;
existsInDocs: boolean;
@ -46,11 +43,6 @@ export interface Field {
cardinality: number;
}
export interface HistogramField {
fieldName: string;
type: string;
}
interface Distribution {
percentiles: any[];
minPercentile: number;
@ -114,57 +106,6 @@ interface FieldExamples {
examples: any[];
}
interface AggHistogram {
histogram: {
field: string;
interval: number;
};
}
interface AggTerms {
terms: {
field: string;
size: number;
};
}
interface NumericDataItem {
key: number;
key_as_string?: string;
doc_count: number;
}
interface NumericChartData {
data: NumericDataItem[];
id: string;
interval: number;
stats: [number, number];
type: 'numeric';
}
interface OrdinalDataItem {
key: string;
key_as_string?: string;
doc_count: number;
}
interface OrdinalChartData {
type: 'ordinal' | 'boolean';
cardinality: number;
data: OrdinalDataItem[];
id: string;
}
interface UnsupportedChartData {
id: string;
type: 'unsupported';
}
type ChartRequestAgg = AggHistogram | AggCardinality | AggTerms;
// type ChartDataItem = NumericDataItem | OrdinalDataItem;
type ChartData = NumericChartData | OrdinalChartData | UnsupportedChartData;
type BatchStats =
| NumericFieldStats
| StringFieldStats
@ -173,126 +114,11 @@ type BatchStats =
| DocumentCountStats
| FieldExamples;
// export for re-use by transforms plugin
export const getHistogramsForFields = async (
client: IScopedClusterClient,
indexPattern: string,
query: any,
fields: HistogramField[],
samplerShardSize: number,
runtimeMappings?: RuntimeMappings
) => {
const { asCurrentUser } = client;
const aggIntervals = await getAggIntervals(
client.asCurrentUser,
indexPattern,
query,
fields,
samplerShardSize,
runtimeMappings
);
const chartDataAggs = fields.reduce((aggs, field) => {
const fieldName = field.fieldName;
const fieldType = field.type;
const id = stringHash(fieldName);
if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
if (aggIntervals[id] !== undefined) {
aggs[`${id}_histogram`] = {
histogram: {
field: fieldName,
interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1,
},
};
}
} else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
if (fieldType === KBN_FIELD_TYPES.STRING) {
aggs[`${id}_cardinality`] = {
cardinality: {
field: fieldName,
},
};
}
aggs[`${id}_terms`] = {
terms: {
field: fieldName,
size: MAX_CHART_COLUMNS,
},
};
}
return aggs;
}, {} as Record<string, ChartRequestAgg>);
if (Object.keys(chartDataAggs).length === 0) {
return [];
}
const body = await asCurrentUser.search(
{
index: indexPattern,
size: 0,
body: {
query,
aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize),
size: 0,
...(isPopulatedObject(runtimeMappings) ? { runtime_mappings: runtimeMappings } : {}),
},
},
{ maxRetries: 0 }
);
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggregations = aggsPath.length > 0 ? get(body.aggregations, aggsPath) : body.aggregations;
const chartsData: ChartData[] = fields.map((field): ChartData => {
const fieldName = field.fieldName;
const fieldType = field.type;
const id = stringHash(field.fieldName);
if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) {
if (aggIntervals[id] === undefined) {
return {
type: 'numeric',
data: [],
interval: 0,
stats: [0, 0],
id: fieldName,
};
}
return {
data: aggregations[`${id}_histogram`].buckets,
interval: aggIntervals[id].interval,
stats: [aggIntervals[id].min, aggIntervals[id].max],
type: 'numeric',
id: fieldName,
};
} else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) {
return {
type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean',
cardinality:
fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2,
data: aggregations[`${id}_terms`].buckets,
id: fieldName,
};
}
return {
type: 'unsupported',
id: fieldName,
};
});
return chartsData;
};
export class DataVisualizer {
private _client: IScopedClusterClient;
private _asCurrentUser: IScopedClusterClient['asCurrentUser'];
constructor(client: IScopedClusterClient) {
this._asCurrentUser = client.asCurrentUser;
this._client = client;
}
// Obtains overall stats on the fields in the supplied index pattern, returning an object
@ -388,12 +214,12 @@ export class DataVisualizer {
async getHistogramsForFields(
indexPattern: string,
query: any,
fields: HistogramField[],
fields: FieldsForHistograms,
samplerShardSize: number,
runtimeMappings?: RuntimeMappings
): Promise<any> {
return await getHistogramsForFields(
this._client,
return await fetchHistogramsForFields(
this._asCurrentUser,
indexPattern,
query,
fields,

View file

@ -5,4 +5,4 @@
* 2.0.
*/
export { getHistogramsForFields, DataVisualizer } from './data_visualizer';
export { DataVisualizer } from './data_visualizer';

View file

@ -9,10 +9,10 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import Boom from '@hapi/boom';
import { IScopedClusterClient } from '@kbn/core/server';
import { duration } from 'moment';
import type { AggCardinality } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { parseInterval } from '../../../common/util/parse_interval';
import { initCardinalityFieldsCache } from './fields_aggs_cache';
import { AggCardinality } from '../../../common/types/fields';
import { isValidAggregationField } from '../../../common/util/validation_utils';
import { getDatafeedAggregations } from '../../../common/util/datafeed_utils';
import { Datafeed, IndicesOptions } from '../../../common/types/anomaly_detection_jobs';

View file

@ -6,9 +6,9 @@
*/
import { IScopedClusterClient } from '@kbn/core/server';
import { FieldsForHistograms } from '@kbn/ml-agg-utils';
import { wrapError } from '../client/error_wrapper';
import { DataVisualizer } from '../models/data_visualizer';
import { HistogramField } from '../models/data_visualizer/data_visualizer';
import {
dataVisualizerFieldHistogramsSchema,
indexPatternSchema,
@ -20,7 +20,7 @@ function getHistogramsForFields(
client: IScopedClusterClient,
indexPattern: string,
query: any,
fields: HistogramField[],
fields: FieldsForHistograms,
samplerShardSize: number,
runtimeMappings: RuntimeMappings
) {

View file

@ -9,4 +9,3 @@ export * from '../common/types/anomalies';
export * from '../common/types/anomaly_detection_jobs';
export * from './lib/capabilities/errors';
export type { ModuleSetupPayload } from './shared_services/providers/modules';
export { getHistogramsForFields } from './models/data_visualizer';

View file

@ -5,12 +5,13 @@
* 2.0.
*/
import { fetchHistogramsForFields } from '@kbn/ml-agg-utils';
import { dataViewTitleSchema, DataViewTitleSchema } from '../../../common/api_schemas/common';
import {
fieldHistogramsRequestSchema,
FieldHistogramsRequestSchema,
} from '../../../common/api_schemas/field_histograms';
import { getHistogramsForFields } from '../../shared_imports';
import { RouteDependencies } from '../../types';
import { addBasePath } from '..';
@ -33,8 +34,8 @@ export function registerFieldHistogramsRoutes({ router, license }: RouteDependen
try {
const esClient = (await ctx.core).elasticsearch.client;
const resp = await getHistogramsForFields(
esClient,
const resp = await fetchHistogramsForFields(
esClient.asCurrentUser,
dataViewTitle,
query,
fields,

View file

@ -34,11 +34,12 @@ export default ({ getService }: FtrProviderContext) => {
};
const expected = {
chunksLength: 8,
actionsLength: 7,
chunksLength: 12,
actionsLength: 11,
noIndexChunksLength: 4,
noIndexActionsLength: 3,
actionFilter: 'add_change_points',
changePointFilter: 'add_change_points',
histogramFilter: 'add_change_points_histogram',
errorFilter: 'error',
changePoints: [
{
@ -60,6 +61,7 @@ export default ({ getService }: FtrProviderContext) => {
normalizedScore: 0.7661649691018979,
},
],
histogramLength: 20,
};
describe('POST /internal/aiops/explain_log_rate_spikes', () => {
@ -100,7 +102,7 @@ export default ({ getService }: FtrProviderContext) => {
expect(typeof d.type).to.be('string');
});
const addChangePointsActions = data.filter((d) => d.type === expected.actionFilter);
const addChangePointsActions = data.filter((d) => d.type === expected.changePointFilter);
expect(addChangePointsActions.length).to.greaterThan(0);
const changePoints = addChangePointsActions
@ -120,6 +122,15 @@ export default ({ getService }: FtrProviderContext) => {
expect(cp.doc_count).to.equal(ecp.doc_count);
expect(cp.bg_count).to.equal(ecp.bg_count);
});
const histogramActions = data.filter((d) => d.type === expected.histogramFilter);
const histograms = histogramActions.flatMap((d) => d.payload);
// for each change point we should get a histogram
expect(histogramActions.length).to.be(changePoints.length);
// each histogram should have a length of 20 items.
histograms.forEach((h, index) => {
expect(h.histogram.length).to.be(20);
});
});
it('should return data in chunks with streaming', async () => {
@ -148,7 +159,7 @@ export default ({ getService }: FtrProviderContext) => {
}
expect(data.length).to.be(expected.actionsLength);
const addChangePointsActions = data.filter((d) => d.type === expected.actionFilter);
const addChangePointsActions = data.filter((d) => d.type === expected.changePointFilter);
expect(addChangePointsActions.length).to.greaterThan(0);
const changePoints = addChangePointsActions
@ -168,6 +179,15 @@ export default ({ getService }: FtrProviderContext) => {
expect(cp.doc_count).to.equal(ecp.doc_count);
expect(cp.bg_count).to.equal(ecp.bg_count);
});
const histogramActions = data.filter((d) => d.type === expected.histogramFilter);
const histograms = histogramActions.flatMap((d) => d.payload);
// for each change point we should get a histogram
expect(histogramActions.length).to.be(changePoints.length);
// each histogram should have a length of 20 items.
histograms.forEach((h, index) => {
expect(h.histogram.length).to.be(20);
});
}
});