[ML] AIOps: Adds cardinality check to Log Rate Analysis (#181129)

## Summary

Part of #181111.

This filters field/value items from the results if the field has a
cardinality of just 1 since it wouldn't be useful as a result.

Before (you can easily spot fields with cardinality of 1 because the
mini histogram in the table is just one color which means the value is
present in all logs):


![image](2904c026-5a69-43b7-b80f-87923368b506)

After:


![image](7a9bffae-9991-4584-91f1-cff9fdc1eaf1)

### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Walter Rafelsberger 2024-05-21 11:32:57 +02:00 committed by GitHub
parent df74eb609c
commit d43c8f94f3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -56,8 +56,18 @@ export const getSignificantTermRequest = (
];
}
const pValueAgg: Record<'change_point_p_value', estypes.AggregationsAggregationContainer> = {
change_point_p_value: {
const pValueAgg: Record<
'sig_term_p_value' | 'distinct_count',
estypes.AggregationsAggregationContainer
> = {
// Used to identify fields with only one distinct value which we'll ignore in the analysis.
distinct_count: {
cardinality: {
field: fieldName,
},
},
// Used to calculate the p-value for terms of the field.
sig_term_p_value: {
significant_terms: {
field: fieldName,
background_filter: {
@ -158,13 +168,26 @@ export const fetchSignificantTermPValues = async (
}
const overallResult = (
randomSamplerWrapper.unwrap(resp.aggregations) as Record<'change_point_p_value', Aggs>
).change_point_p_value;
randomSamplerWrapper.unwrap(resp.aggregations) as Record<'sig_term_p_value', Aggs>
).sig_term_p_value;
const distinctCount = (
randomSamplerWrapper.unwrap(resp.aggregations) as Record<
'distinct_count',
estypes.AggregationsCardinalityAggregate
>
).distinct_count.value;
for (const bucket of overallResult.buckets) {
const pValue = Math.exp(-bucket.score);
if (typeof pValue === 'number' && pValue < LOG_RATE_ANALYSIS_SETTINGS.P_VALUE_THRESHOLD) {
if (
typeof pValue === 'number' &&
// Skip items where the p-value is not significant.
pValue < LOG_RATE_ANALYSIS_SETTINGS.P_VALUE_THRESHOLD &&
// Skip items where the field has only one distinct value.
distinctCount > 1
) {
result.push({
key: `${fieldName}:${String(bucket.key)}`,
type: SIGNIFICANT_ITEM_TYPE.KEYWORD,