[ML] Add comments to anomaly detection types and constants in public API (#123813)

* [ML] Add comments to anomaly detection types and constants in public API

* [ML] Clarify comment for job groups

* [ML] More edits to summary_job docs

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Pete Harverson 2022-01-28 14:25:08 +00:00 committed by GitHub
parent c3934477bb
commit d80e3194a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 489 additions and 48 deletions

View file

@ -5,29 +5,106 @@
* 2.0.
*/
/**
* Labels displayed in the ML UI to indicate the severity of the anomaly according
* to the normalized anomaly score.
*/
export enum ANOMALY_SEVERITY {
/**
* Anomalies are displayed as critical severity when the score is greater than or equal to 75.
*/
CRITICAL = 'critical',
/**
* Anomalies are displayed as major severity when the score is greater than or equal to 50 and less than 75.
*/
MAJOR = 'major',
/**
* Anomalies are displayed as minor severity when the score is greater than or equal to 25 and less than 50.
*/
MINOR = 'minor',
/**
* Anomalies are displayed as warning severity when the score is greater than or equal to 3 and less than 25.
* Note in some parts of the UI, warning severity is used when the score is greater than or equal to 0.
*/
WARNING = 'warning',
/**
* Anomalies are displayed as low severity in some parts of the ML UI when the score is greater than or equal to 0 and less than 3.
*/
LOW = 'low',
/**
* Anomalies are displayed as unknown severity if the anomaly score is not known.
*/
UNKNOWN = 'unknown',
}
/**
* Anomaly score numeric thresholds to indicate the severity of the anomaly.
*/
export enum ANOMALY_THRESHOLD {
/**
* Threshold at which anomalies are labelled in the UI as critical.
*/
CRITICAL = 75,
/**
* Threshold at which anomalies are labelled in the UI as major.
*/
MAJOR = 50,
/**
* Threshold at which anomalies are labelled in the UI as minor.
*/
MINOR = 25,
/**
* Threshold at which anomalies are labelled in the UI as warning.
*/
WARNING = 3,
/**
* Threshold at which anomalies are labelled in the UI as low.
*/
LOW = 0,
}
/**
* RGB hex codes used to indicate the severity of an anomaly according to its anomaly score.
*/
export const SEVERITY_COLORS = {
/**
* Color used in the UI to indicate a critical anomaly, with a score greater than or equal to 75.
*/
CRITICAL: '#fe5050',
/**
* Color used in the UI to indicate a major anomaly, with a score greater than or equal to 50 and less than 75 .
*/
MAJOR: '#fba740',
/**
* Color used in the UI to indicate a minor anomaly, with a score greater than or equal to 25 and less than 50.
*/
MINOR: '#fdec25',
/**
* Color used in the UI to indicate a warning anomaly, with a score greater than or equal to 3 and less than 25.
* Note in some parts of the UI, warning severity is used when the score is greater than or equal to 0.
*/
WARNING: '#8bc8fb',
/**
* Color used in some parts of the UI to indicate a low severity anomaly, with a score greater than or equal to 0 and less than 3.
*/
LOW: '#d2e9f7',
/**
* Color used in the UI to indicate an anomaly for which the score is unknown.
*/
BLANK: '#ffffff',
};

View file

@ -5,6 +5,9 @@
* 2.0.
*/
/**
* The status of the datafeed.
*/
export enum DATAFEED_STATE {
STARTED = 'started',
STARTING = 'starting',
@ -13,6 +16,9 @@ export enum DATAFEED_STATE {
DELETED = 'deleted',
}
/**
* The status of the anomaly detection job forecast.
*/
export enum FORECAST_REQUEST_STATE {
FAILED = 'failed',
FINISHED = 'finished',
@ -20,6 +26,9 @@ export enum FORECAST_REQUEST_STATE {
STARTED = 'started',
}
/**
* The status of the anomaly detection job.
*/
export enum JOB_STATE {
CLOSED = 'closed',
CLOSING = 'closing',

View file

@ -8,37 +8,158 @@
import { PARTITION_FIELDS, ANOMALY_RESULT_TYPE } from '../constants/anomalies';
import type { KibanaUrlConfig } from './custom_urls';
/**
* Influencers are the entities that have contributed to, or are to blame for, the anomalies.
* Influencer results are available only if an influencer_field_name is specified in the job configuration.
*/
export interface Influencer {
/**
* The field name of the influencer.
*/
influencer_field_name: string;
/**
* The entities that influenced, contributed to, or were to blame for the anomaly.
*/
influencer_field_values: string[];
}
export type MLAnomalyDoc = AnomalyRecordDoc;
/**
* Anomaly record document. Records contain the detailed analytical results.
* They describe the anomalous activity that has been identified in the input data based on the detector configuration.
*/
export interface AnomalyRecordDoc {
/**
* Index signature to cover dynamic attributes added to the record depending on the fields being analyzed.
* For example, if the job is analyzing hostname as a by field, then a field hostname is added to the result document.
*/
[key: string]: any;
/**
* The identifier for the anomaly detection job.
*/
job_id: string;
/**
* The type of the result document, which is 'record' for record level results.
*/
result_type: string;
/**
* The probability of the individual anomaly occurring, in the range 0 to 1.
* This value can be held to a high precision of over 300 decimal places,
* so the record_score is provided as a human-readable and friendly interpretation of this.
*/
probability: number;
/**
* A normalized score between 0-100, which is based on the probability of the anomalousness of this record.
* Unlike initial_record_score, this value will be updated by a re-normalization process as new data is analyzed.
*/
record_score: number;
/**
* A normalized score between 0-100, which is based on the probability of the anomalousness of this record.
* This is the initial value that was calculated at the time the bucket was processed.
*/
initial_record_score: number;
/**
* The length of the bucket in seconds. This value matches the bucket_span that is specified in the job.
*/
bucket_span: number;
/**
* A unique identifier for the detector. This identifier is based on the order of the detectors
* in the analysis configuration, starting at zero.
*/
detector_index: number;
/**
* If true, this is an interim result. In other words, the results are calculated based on partial input data.
*/
is_interim: boolean;
/**
* The start time of the bucket for which these results were calculated.
*/
timestamp: number;
/**
* The field used to segment the analysis.
* When you use this property, you have completely independent baselines for each value of this field.
*/
partition_field_name?: string;
/**
* The value of the partition field.
*/
partition_field_value?: string | number;
/**
* The function in which the anomaly occurs, as specified in the detector configuration. For example, max.
*/
function: string;
/**
* The description of the function in which the anomaly occurs, as specified in the detector configuration.
*/
function_description: string;
typical?: number[];
actual?: number[];
influencers?: Influencer[];
by_field_name?: string;
/**
* Certain functions require a field to operate on, for example, sum().
* For those functions, this value is the name of the field to be analyzed.
*/
field_name?: string;
/**
* The typical value for the bucket, according to analytical modeling.
*/
typical?: number[];
/**
* The actual value for the bucket.
*/
actual?: number[];
/**
* If influencers was specified in the detector configuration, this array contains influencers
* that contributed to or were to blame for an anomaly.
*/
influencers?: Influencer[];
/**
* The field used to split the data. In particular, this property is used for analyzing the splits
* with respect to their own history. It is used for finding unusual values in the context of the split.
*/
by_field_name?: string;
/**
* The value of the by field.
*/
by_field_value?: string;
multi_bucket_impact?: number;
/**
* The field used to split the data. In particular, this property is used for analyzing
* the splits with respect to the history of all splits.
* It is used for finding unusual values in the population of all splits.
*/
over_field_name?: string;
/**
* The value of the over field.
*/
over_field_value?: string;
/**
* For population analysis, this property contains an array of anomaly records that are the causes
* for the anomaly that has been identified for the over field. If no over fields exist, this field is not present.
* This sub-resource contains the most anomalous records for the over_field_name.
* The causes resource contains similar elements to the record resource.
* Probability and scores are not applicable to causes.
*/
causes?: Array<{
function: string;
function_description: string;
@ -53,24 +174,107 @@ export interface AnomalyRecordDoc {
partition_field_name?: string;
partition_field_value?: string | number;
}>;
/**
* An indication of how strongly an anomaly is multi bucket or single bucket.
* The value is on a scale of -5.0 to +5.0 where -5.0 means the anomaly is
* purely single bucket and +5.0 means the anomaly is purely multi bucket.
*/
multi_bucket_impact?: number;
}
/**
* Anomaly table record, representing the fields shown in the ML UI anomalies table.
*/
export interface AnomaliesTableRecord {
/**
* The start time of the interval for which the anomaly data in the table is being aggregated.
* Anomalies in the table are commonly aggregated by day, hour, or at the bucket span of the job.
*/
time: number;
/**
* The source anomaly record document, containing the full source anomaly record fields.
*/
source: AnomalyRecordDoc;
/**
* Unique identifier for the table row.
*/
rowId: string;
/**
* Identifier for the anomaly detection job.
*/
jobId: string;
/**
* A unique identifier for the detector.
* This identifier is based on the order of the detectors in the analysis configuration, starting at zero.
*/
detectorIndex: number;
/**
* Severity of the anomaly displaying the anomaly record_score, a normalized score between 0-100,
* which is based on the probability of the anomalousness of this record.
*/
severity: number;
/**
* The entity name of the anomaly, looking first for a by_field, then over_field,
* then partition_field, returning undefined if none of these fields are present.
*/
entityName?: string;
/**
* The value of the entity field.
*/
entityValue?: any;
/**
* If influencers was specified in the detector configuration, this array contains influencers
* that contributed to or were to blame for an anomaly.
*/
influencers?: Array<{ [key: string]: any }>;
/**
* The actual value for the anomaly.
*/
actual?: number[];
/**
* Property used by the table to sort anomalies by their actual value,
* which is a single numeric value rather than the underlying arrays.
*/
actualSort?: any;
/**
* The typical value for the anomaly.
*/
typical?: number[];
/**
* Property used by the table to sort anomalies by their typical value,
* which is a single numeric value rather than the underlying arrays.
*/
typicalSort?: any;
/**
* Property used by the table to sort anomalies by the description of how the
* actual value compares to the typical value.
*/
metricDescriptionSort?: number;
/**
* List of custom URL drilldowns from the table row to other pages such as
* Discover, Dashboard or other web pages.
*/
customUrls?: KibanaUrlConfig[];
/**
* Returns true if the anomaly record represented by the table row is for a time series
* which can be plotted by the ML UI in an anomaly chart.
*/
isTimeSeriesViewRecord?: boolean;
}
@ -95,4 +299,7 @@ export interface AnomalyCategorizerStatsDoc {
export type EntityFieldType = 'partition_field' | 'over_field' | 'by_field';
/**
* The type of the anomaly result, such as bucket, influencer or record.
*/
export type AnomalyResultType = typeof ANOMALY_RESULT_TYPE[keyof typeof ANOMALY_RESULT_TYPE];

View file

@ -14,32 +14,142 @@ import type { MlJobBlocked } from './job';
export type { Datafeed } from './datafeed';
export type { DatafeedStats } from './datafeed_stats';
/**
* A summary of an anomaly detection job.
*/
export interface MlSummaryJob {
/**
* The identifier for the anomaly detection job.
*/
id: string;
/**
* A description of the job.
*/
description: string;
/**
* A list of job groups. A job can belong to no groups, one or many.
*/
groups: string[];
/**
* The number of input documents that have been processed by the anomaly detection job.
* This value includes documents with missing fields, since they are nonetheless analyzed.
*/
processed_record_count?: number;
/**
* The status of the mathematical models, which can take the values ok, soft_limit or hard_limit.
*/
memory_status?: string;
/**
* The status of the job.
*/
jobState: string;
/**
* An array of index names used by the datafeed. Wildcards are supported.
*/
datafeedIndices: string[];
/**
* Flag indicating whether a datafeed exists for the job.
*/
hasDatafeed: boolean;
/**
* The identifier for the datafeed.
*/
datafeedId: string;
/**
* The status of the datafeed.
*/
datafeedState: string;
/**
* The timestamp of the latest chronologically input document.
*/
latestTimestampMs?: number;
/**
* The timestamp of the earliest chronologically input document.
*/
earliestTimestampMs?: number;
/**
* The latest of the timestamp of the latest chronologically input document or the latest bucket that was processed.
*/
latestResultsTimestampMs?: number;
/**
* Used in older implementations of the job config, where the datafeed was placed inside the job for convenience.
* This will be populated if the job's id has been passed to the /api/ml/jobs/jobs_summary endpoint.
*/
fullJob?: CombinedJob;
/**
* The name of the node that runs the job.
*/
nodeName?: string;
/**
* Audit message for the job.
*/
auditMessage?: Partial<AuditMessage>;
/**
* Flag indicating whether results of the job can be viewed in the Single Metric Viewer.
*/
isSingleMetricViewerJob: boolean;
/**
* For jobs which cannot be viewed in the Single Metric Viewer, a message indicating the reason why
* results for the job cannot be viewed in the Single Metric Viewer.
*/
isNotSingleMetricViewerJobMessage?: string;
/**
* When present, it explains that a task is currently running on the job, which is stopping
* any other actions from being performed on the job.
*/
blocked?: MlJobBlocked;
/**
* Value of the latest timestamp for the job used for sorting.
*/
latestTimestampSortValue?: number;
/**
* The earlist of the timestamp of the earliest chronologically input document or the earliest bucket that was processed.
*/
earliestStartTimestampMs?: number;
/**
* Indicates whether the job is currently awaiting assignment to a node before opening.
*/
awaitingNodeAssignment: boolean;
/**
* List of anomaly detection alerting rules configured for the job.
*/
alertingRules?: MlAnomalyDetectionAlertRule[];
/**
* List of tags that have been added to the job.
*/
jobTags: Record<string, string>;
/**
* The size of the interval that the analysis is aggregated into, typically between 5m and 1h.
*/
bucketSpanSeconds: number;
/**
* Advanced configuration option. Contains custom meta data about the job. For example, it can contain custom URL information.
*/
customSettings?: MlCustomSettings;
}

View file

@ -124,14 +124,18 @@ export function isCategorizationAnomaly(anomaly: AnomaliesTableRecord): boolean
}
/**
* Return formatted severity score.
* Returns formatted severity score.
* @param score - A normalized score between 0-100, which is based on the probability of the anomalousness of this record
*/
export function getFormattedSeverityScore(score: number): string {
return score < 1 ? '< 1' : String(parseInt(String(score), 10));
}
// Returns a severity label (one of critical, major, minor, warning or unknown)
// for the supplied normalized anomaly score (a value between 0 and 100).
/**
* Returns a severity label (one of critical, major, minor, warning or unknown)
* for the supplied normalized anomaly score (a value between 0 and 100).
* @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record
*/
export function getSeverity(normalizedScore: number): SeverityType {
const severityTypesList = getSeverityTypes();
@ -148,6 +152,11 @@ export function getSeverity(normalizedScore: number): SeverityType {
}
}
/**
* Returns a severity type (indicating a critical, major, minor, warning or low severity anomaly)
* for the supplied normalized anomaly score (a value between 0 and 100).
* @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record
*/
export function getSeverityType(normalizedScore: number): ANOMALY_SEVERITY {
if (normalizedScore >= 75) {
return ANOMALY_SEVERITY.CRITICAL;
@ -164,9 +173,12 @@ export function getSeverityType(normalizedScore: number): ANOMALY_SEVERITY {
}
}
// Returns a severity label (one of critical, major, minor, warning, low or unknown)
// for the supplied normalized anomaly score (a value between 0 and 100), where scores
// less than 3 are assigned a severity of 'low'.
/**
* Returns a severity label (one of critical, major, minor, warning, low or unknown)
* for the supplied normalized anomaly score (a value between 0 and 100), where scores
* less than 3 are assigned a severity of 'low'.
* @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record
*/
export function getSeverityWithLow(normalizedScore: number): SeverityType {
const severityTypesList = getSeverityTypes();
@ -185,8 +197,11 @@ export function getSeverityWithLow(normalizedScore: number): SeverityType {
}
}
// Returns a severity RGB color (one of critical, major, minor, warning, low_warning or unknown)
// for the supplied normalized anomaly score (a value between 0 and 100).
/**
* Returns a severity RGB color (one of critical, major, minor, warning, low or blank)
* for the supplied normalized anomaly score (a value between 0 and 100).
* @param normalizedScore - A normalized score between 0-100, which is based on the probability of the anomalousness of this record
*/
export function getSeverityColor(normalizedScore: number): string {
if (normalizedScore >= ANOMALY_THRESHOLD.CRITICAL) {
return SEVERITY_COLORS.CRITICAL;
@ -203,9 +218,12 @@ export function getSeverityColor(normalizedScore: number): string {
}
}
// Returns a label to use for the multi-bucket impact of an anomaly
// according to the value of the multi_bucket_impact field of a record,
// which ranges from -5 to +5.
/**
* Returns a label to use for the multi-bucket impact of an anomaly
* according to the value of the multi_bucket_impact field of a record,
* which ranges from -5 to +5.
* @param multiBucketImpact - Value of the multi_bucket_impact field of a record, from -5 to +5
*/
export function getMultiBucketImpactLabel(multiBucketImpact: number): string {
if (multiBucketImpact >= MULTI_BUCKET_IMPACT.HIGH) {
return i18n.translate('xpack.ml.anomalyUtils.multiBucketImpact.highLabel', {
@ -226,9 +244,12 @@ export function getMultiBucketImpactLabel(multiBucketImpact: number): string {
}
}
// Returns the name of the field to use as the entity name from the source record
// obtained from Elasticsearch. The function looks first for a by_field, then over_field,
// then partition_field, returning undefined if none of these fields are present.
/**
* Returns the name of the field to use as the entity name from the source record
* obtained from Elasticsearch. The function looks first for a by_field, then over_field,
* then partition_field, returning undefined if none of these fields are present.
* @param record - anomaly record result for which to obtain the entity field name.
*/
export function getEntityFieldName(record: AnomalyRecordDoc): string | undefined {
// Analyses with by and over fields, will have a top-level by_field_name, but
// the by_field_value(s) will be in the nested causes array.
@ -245,9 +266,12 @@ export function getEntityFieldName(record: AnomalyRecordDoc): string | undefined
}
}
// Returns the value of the field to use as the entity value from the source record
// obtained from Elasticsearch. The function looks first for a by_field, then over_field,
// then partition_field, returning undefined if none of these fields are present.
/**
* Returns the value of the field to use as the entity value from the source record
* obtained from Elasticsearch. The function looks first for a by_field, then over_field,
* then partition_field, returning undefined if none of these fields are present.
* @param record - anomaly record result for which to obtain the entity field value.
*/
export function getEntityFieldValue(record: AnomalyRecordDoc): string | number | undefined {
if (record.by_field_value !== undefined) {
return record.by_field_value;
@ -262,8 +286,11 @@ export function getEntityFieldValue(record: AnomalyRecordDoc): string | number |
}
}
// Returns the list of partitioning entity fields for the source record as a list
// of objects in the form { fieldName: airline, fieldValue: AAL, fieldType: partition }
/**
* Returns the list of partitioning entity fields for the source record as a list
* of objects in the form { fieldName: airline, fieldValue: AAL, fieldType: partition }
* @param record - anomaly record result for which to obtain the entity field list.
*/
export function getEntityFieldList(record: AnomalyRecordDoc): EntityField[] {
const entityFields: EntityField[] = [];
if (record.partition_field_name !== undefined) {
@ -296,21 +323,30 @@ export function getEntityFieldList(record: AnomalyRecordDoc): EntityField[] {
return entityFields;
}
// Returns whether actual values should be displayed for a record with the specified function description.
// Note that the 'function' field in a record contains what the user entered e.g. 'high_count',
// whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'.
/**
* Returns whether actual values should be displayed for a record with the specified function description.
* Note that the 'function' field in a record contains what the user entered e.g. 'high_count',
* whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'.
* @param functionDescription - function_description value for the anomaly record
*/
export function showActualForFunction(functionDescription: string): boolean {
return DISPLAY_ACTUAL_FUNCTIONS.indexOf(functionDescription) > -1;
}
// Returns whether typical values should be displayed for a record with the specified function description.
// Note that the 'function' field in a record contains what the user entered e.g. 'high_count',
// whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'.
/**
* Returns whether typical values should be displayed for a record with the specified function description.
* Note that the 'function' field in a record contains what the user entered e.g. 'high_count',
* whereas the 'function_description' field holds a ML-built display hint for function e.g. 'count'.
* @param functionDescription - function_description value for the anomaly record
*/
export function showTypicalForFunction(functionDescription: string): boolean {
return DISPLAY_TYPICAL_FUNCTIONS.indexOf(functionDescription) > -1;
}
// Returns whether a rule can be configured against the specified anomaly.
/**
* Returns whether a rule can be configured against the specified anomaly.
* @param record - anomaly record result
*/
export function isRuleSupported(record: AnomalyRecordDoc): boolean {
// A rule can be configured with a numeric condition if the function supports it,
// and/or with scope if there is a partitioning fields.
@ -320,23 +356,25 @@ export function isRuleSupported(record: AnomalyRecordDoc): boolean {
);
}
// Two functions for converting aggregation type names.
// ML and ES use different names for the same function.
// Possible values for ML aggregation type are (defined in lib/model/CAnomalyDetector.cc):
// count
// distinct_count
// rare
// info_content
// mean
// median
// min
// max
// varp
// sum
// lat_long
// time
// The input to toES and the output from toML correspond to the value of the
// function_description field of anomaly records.
/**
* Two functions for converting aggregation type names.
* ML and ES use different names for the same function.
* Possible values for ML aggregation type are (defined in lib/model/CAnomalyDetector.cc):
* count
* distinct_count
* rare
* info_content
* mean
* median
* min
* max
* varp
* sum
* lat_long
* time
* The input to toES and the output from toML correspond to the value of the
* function_description field of anomaly records.
*/
export const aggregationTypeTransform = {
toES(oldAggType: string): string {
let newAggType = oldAggType;