[ML] AIOps: Update fields filter popover to be able to filter fields from analysis (not just grouping) (#188913)

## Summary

Part of #187684.

So far the popover to filter fields was only available when grouping was
enabled. This PR updates the behavior so it's available all the time and
can be used to exclude field candidates from the analysis. If we detect
the index to be based on an ECS schema, we auto-select a set of
predefined fields.

Changes in this PR:

- Creates a new route
`/internal/aiops/log_rate_analysis/field_candidates` to be able to fetch
field candidates independent of the main streaming API call.
- Fixes the code to consider "remaining" field candidates to also
consider text field candidates. This was originally developed to allow
to continue an analysis that errored for some reason. We use that option
to also pass on the custom field list from the field selection popover.
- Fetching the field candidates is done in a new redux slice
`logRateAnalysisFieldCandidatesSlice` using an async thunk.
- Filters the list of field candidates by a predefined field of allowed
fields when an ECS schema gets detected.
- Renames `fieldCandidates` to `keywordFieldCandidates` for clearer
distinction against `textFieldCandidates`.
- Refactors `getLogRateAnalysisTypeForCounts` args to a config object.
- Bump the API version for the full log rate analysis to version 3. We
missed bumping the version in
https://github.com/elastic/kibana/pull/188648. This update manages
proper versioning between v2 and v3, also the API integration tests
cover both versions.


[aiops-log-rate-analysis-fields-filter-0001.webm](https://github.com/user-attachments/assets/e3ed8d5b-f01c-42ef-8033-caa7135b8cc0)

### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was
used on any tests changed
- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Walter Rafelsberger 2024-07-30 10:12:53 +02:00 committed by GitHub
parent c6548c806d
commit f391ed6bcf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
38 changed files with 2006 additions and 634 deletions

View file

@ -11,6 +11,7 @@
export const AIOPS_PLUGIN_ID = 'aiops';
export const AIOPS_API_ENDPOINT = {
LOG_RATE_ANALYSIS_FIELD_CANDIDATES: '/internal/aiops/log_rate_analysis/field_candidates',
LOG_RATE_ANALYSIS: '/internal/aiops/log_rate_analysis',
CATEGORIZATION_FIELD_VALIDATION: '/internal/aiops/categorization_field_validation',
} as const;

View file

@ -6,11 +6,16 @@
*/
import type { AiopsLogRateAnalysisSchemaV2 } from './schema_v2';
import type { AiopsLogRateAnalysisSchemaV3 } from './schema_v3';
export type AiopsLogRateAnalysisApiVersion = '2';
export type AiopsLogRateAnalysisApiVersion = '2' | '3';
const LATEST_API_VERSION: AiopsLogRateAnalysisApiVersion = '2';
const LATEST_API_VERSION: AiopsLogRateAnalysisApiVersion = '3';
export type AiopsLogRateAnalysisSchema<
T extends AiopsLogRateAnalysisApiVersion = typeof LATEST_API_VERSION
> = T extends '2' ? AiopsLogRateAnalysisSchemaV2 : never;
> = T extends '2'
? AiopsLogRateAnalysisSchemaV2
: T extends '3'
? AiopsLogRateAnalysisSchemaV3
: never;

View file

@ -8,7 +8,7 @@
import type { TypeOf } from '@kbn/config-schema';
import { schema } from '@kbn/config-schema';
const significantItem = schema.object({
export const significantItem = schema.object({
key: schema.string(),
type: schema.oneOf([schema.literal('keyword'), schema.literal('log_pattern')]),
fieldName: schema.string(),
@ -33,7 +33,14 @@ const significantItem = schema.object({
unique: schema.maybe(schema.boolean()),
});
export const aiopsLogRateAnalysisSchemaV2 = schema.object({
const overridesV2 = schema.object({
loaded: schema.maybe(schema.number()),
remainingFieldCandidates: schema.maybe(schema.arrayOf(schema.string())),
significantItems: schema.maybe(schema.arrayOf(significantItem)),
regroupOnly: schema.maybe(schema.boolean()),
});
export const aiopsLogRateAnalysisBase = schema.object({
start: schema.number(),
end: schema.number(),
searchQuery: schema.string(),
@ -50,18 +57,15 @@ export const aiopsLogRateAnalysisSchemaV2 = schema.object({
/** Settings to override headers derived compression and flush fix */
compressResponse: schema.maybe(schema.boolean()),
flushFix: schema.maybe(schema.boolean()),
/** Overrides to skip steps of the analysis with existing data */
overrides: schema.maybe(
schema.object({
loaded: schema.maybe(schema.number()),
remainingFieldCandidates: schema.maybe(schema.arrayOf(schema.string())),
significantItems: schema.maybe(schema.arrayOf(significantItem)),
regroupOnly: schema.maybe(schema.boolean()),
})
),
/** Probability used for the random sampler aggregations */
sampleProbability: schema.maybe(schema.number()),
});
export const aiopsLogRateAnalysisSchemaV2 = schema.intersection([
aiopsLogRateAnalysisBase,
/** Overrides to skip steps of the analysis with existing data */
schema.object({ overrides: schema.maybe(overridesV2) }),
]);
export type AiopsLogRateAnalysisSchemaV2 = TypeOf<typeof aiopsLogRateAnalysisSchemaV2>;
export type AiopsLogRateAnalysisSchemaSignificantItem = TypeOf<typeof significantItem>;

View file

@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { TypeOf } from '@kbn/config-schema';
import { schema } from '@kbn/config-schema';
import { aiopsLogRateAnalysisBase, significantItem } from './schema_v2';
const overridesV3 = schema.object({
loaded: schema.maybe(schema.number()),
remainingKeywordFieldCandidates: schema.maybe(schema.arrayOf(schema.string())),
remainingTextFieldCandidates: schema.maybe(schema.arrayOf(schema.string())),
significantItems: schema.maybe(schema.arrayOf(significantItem)),
regroupOnly: schema.maybe(schema.boolean()),
});
export const aiopsLogRateAnalysisSchemaV3 = schema.intersection([
aiopsLogRateAnalysisBase,
/** Overrides to skip steps of the analysis with existing data */
schema.object({ overrides: schema.maybe(overridesV3) }),
]);
export type AiopsLogRateAnalysisSchemaV3 = TypeOf<typeof aiopsLogRateAnalysisSchemaV3>;
export type AiopsLogRateAnalysisSchemaSignificantItem = TypeOf<typeof significantItem>;

View file

@ -27,7 +27,8 @@ export interface StreamState {
errors: string[];
loaded: number;
loadingState: string;
remainingFieldCandidates?: string[];
remainingKeywordFieldCandidates?: string[];
remainingTextFieldCandidates?: string[];
groupsMissing?: boolean;
zeroDocsFallback: boolean;
}
@ -97,7 +98,8 @@ export const logRateAnalysisResultsSlice = createSlice({
ccsWarning: boolean;
loaded: number;
loadingState: string;
remainingFieldCandidates?: string[];
remainingKeywordFieldCandidates?: string[];
remainingTextFieldCandidates?: string[];
groupsMissing?: boolean;
}>
) => {

View file

@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { containsECSIdentifierField, filterByECSFields } from './ecs_fields';
describe('containsECSIdentifierFields', () => {
it('should return true if the array contains all ECS identifier fields', () => {
const fields = ['host.name', 'service.name', 'log.level', 'other.field', 'ecs.version'];
expect(containsECSIdentifierField(fields)).toBe(true);
});
it('should return false if the array does not contain all ECS identifier fields', () => {
const fields = ['host.name', 'service.name', 'non.ecs.field1', 'another.non.ecs.field'];
expect(containsECSIdentifierField(fields)).toBe(false);
});
it('should return false for an empty array', () => {
const fields: string[] = [];
expect(containsECSIdentifierField(fields)).toBe(false);
});
});
describe('filterByECSFields', () => {
it('should filter out non-ECS fields', () => {
const fields = ['event.dataset', 'host.name', 'random.field', 'other.field'];
const expected = ['event.dataset', 'host.name'];
expect(filterByECSFields(fields)).toEqual(expected);
});
it('should include fields prefixed with label.', () => {
const fields = ['event.dataset', 'host.name', 'random.field', 'label.customField'];
const expected = ['event.dataset', 'host.name', 'label.customField'];
expect(filterByECSFields(fields)).toEqual(expected);
});
it('should return an empty array if no ECS or label. prefixed fields are present', () => {
const fields = ['random.field', 'another.field'];
expect(filterByECSFields(fields)).toEqual([]);
});
it('should handle an empty array input', () => {
const fields: string[] = [];
expect(filterByECSFields(fields)).toEqual([]);
});
});

View file

@ -0,0 +1,242 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
// A selection of fields derived from
// https://www.elastic.co/guide/en/ecs/current/ecs-field-reference.html
const ecsFields = [
// Base fields
// https://www.elastic.co/guide/en/ecs/current/ecs-base.html
// Only picking `tags` here since `@timestamp` is not applicable,
// `message` is part of text field candidates and `label` acts
// as a prefix which we'll handle in filterByECSFields.
'tags',
// Agent fields
// https://www.elastic.co/guide/en/ecs/current/ecs-agent.html
'agent.name',
'agent.type',
'agent.version',
// Client fields
// https://www.elastic.co/guide/en/ecs/current/ecs-client.html
'client.domain',
'client.ip', // of type IP
'client.port',
// Cloud fields
// https://www.elastic.co/guide/en/ecs/current/ecs-cloud.html
'cloud.account.id',
'cloud.account.name',
'cloud.availability_zone',
'cloud.instance.id', // Instance ID in the cloud environment
'cloud.machine.type', // Type of the cloud machine
'cloud.project.id',
'cloud.provider', // Name of the cloud provider
'cloud.region', // Region in which the host is running
'cloud.service.name',
// Container fields
// https://www.elastic.co/guide/en/ecs/current/ecs-container.html
'container.id', // Unique identifier of the container
'container.image.hash.all',
'container.image.name',
'container.image.tag',
'container.name', // Name of the container
'container.runtime',
'container.security_context.privileged', // boolean type
// Data Stream fields
// https://www.elastic.co/guide/en/ecs/current/ecs-data_stream.html
'data_stream.dataset',
'data_stream.namespace',
'data_stream.type',
// Destination fields
// https://www.elastic.co/guide/en/ecs/current/ecs-destination.html
'destination.address',
// ECS fields
// https://www.elastic.co/guide/en/ecs/current/ecs-ecs.html
'ecs.version',
// Error fields
// https://www.elastic.co/guide/en/ecs/current/ecs-error.html
'error.code',
'error.id',
'error.type',
// Event fields
// https://www.elastic.co/guide/en/ecs/current/ecs-event.html
'event.action',
'event.agent_id_status',
'event.category',
'event.code',
'event.dataset',
'event.kind',
'event.module',
'event.outcome',
'event.provider',
'event.reason',
'event.reference',
'event.timezone',
'event.type',
'event.url',
// File fields
// https://www.elastic.co/guide/en/ecs/current/ecs-file.html
'file.attributes',
'file.path',
'file.type',
// Host fields
// https://www.elastic.co/guide/en/ecs/current/ecs-host.html
'host.architecture',
'host.domain',
'host.ip',
'host.name',
// HTTP fields
// https://www.elastic.co/guide/en/ecs/current/ecs-http.html
'http.request.method',
'http.request.mime_type',
'http.request.referrer',
'http.response.mime_type',
'http.response.status_code',
'http.version',
// Log fields
// https://www.elastic.co/guide/en/ecs/current/ecs-log.html
'log.file.path',
'log.level',
'log.logger',
'log.origin.file.name',
'log.origin.function',
// Network fields
// https://www.elastic.co/guide/en/ecs/current/ecs-network.html
'network.application',
'network.direction',
'network.name',
'network.protocol',
'network.type',
// Orchestrator fields
// https://www.elastic.co/guide/en/ecs/current/ecs-orchestrator.html
'orchestrator.cluster.name',
'orchestrator.cluster.version',
'orchestrator.namespace',
'orchestrator.organization',
'orchestrator.type',
// Process fields
// https://www.elastic.co/guide/en/ecs/current/ecs-process.html
'process.command_line',
'process.env_vars',
'process.name',
// Rule fields
// https://www.elastic.co/guide/en/ecs/current/ecs-rule.html
'rule.author',
'rule.category',
'rule.description',
'rule.name',
'rule.ruleset',
'rule.version',
// Server fields
// https://www.elastic.co/guide/en/ecs/current/ecs-server.html
'server.domain',
'server.ip', // IP field
'server.port',
// Service fields
// https://www.elastic.co/guide/en/ecs/current/ecs-service.html
'service.environment',
'service.name',
'service.node.name',
'service.node.role', // deprecated
'service.node.roles',
'service.state',
'service.type',
'service.version',
// Source fields
// https://www.elastic.co/guide/en/ecs/current/ecs-source.html
'source.domain',
'source.ip', // IP field
'source.port',
// URL fields
// https://www.elastic.co/guide/en/ecs/current/ecs-url.html
'url.domain',
'url.full',
// User fields
// https://www.elastic.co/guide/en/ecs/current/ecs-user.html
'user.name',
'user.domain',
'user.email',
'user.roles',
// User agent fields
// https://www.elastic.co/guide/en/ecs/current/ecs-user_agent.html
'user_agent.device.name',
'user_agent.name',
'user_agent.original',
'user_agent.version',
];
// Must have field to identify an index as ECS compliant.
// The only other field that's required is `@timestamp` but that's not
// part of the list since it's not a field that can be used for analysis.
// https://www.elastic.co/guide/en/ecs/1.12/ecs-ecs.html
const ecsIdentifierField = 'ecs.version';
// These are the fields that are expected to be nested within other ECS fields.
const ecsPostfixes = [
// For geo fields, we default to the more human readable variants.
// Those are supposed to be used nested within e.g. `client.*` that's
// why we cannot have them in the `ecsFields` array.
// https://www.elastic.co/guide/en/ecs/current/ecs-geo.html
'.geo.city_name',
'.geo.continent_name',
'.geo.country_name',
'.geo.region_name',
'.geo.timezone',
// The os fields are expected to be nested at: host.os, observer.os, user_agent.os
// https://www.elastic.co/guide/en/ecs/current/ecs-os.html
'.os.family',
'.os.full',
'.os.kernel',
'.os.name',
'.os.platform',
'.os.version',
// The risk fields are expected to be nested at: host.risk, user.risk
// https://www.elastic.co/guide/en/ecs/current/ecs-risk.html
'.risk.calculated_level',
'.risk.static_level',
// User fields are also in the ECS fields array but can be nested too.
'.user.domain',
'.user.email',
'.user.roles',
];
export function containsECSIdentifierField(fieldsArray: string[]): boolean {
return fieldsArray.includes(ecsIdentifierField);
}
export function filterByECSFields(fieldsArray: string[]) {
return fieldsArray.filter(
(field) =>
ecsFields.includes(field) ||
field.startsWith('label.') ||
ecsPostfixes.some((postfix) => field.endsWith(postfix))
);
}

View file

@ -17,38 +17,41 @@ const windowParameters = {
describe('getLogRateAnalysisTypeForCounts', () => {
it('returns SPIKE when normalized deviation count is higher than baseline count', () => {
const baselineCount = 100;
const deviationCount = 200;
const result = getLogRateAnalysisTypeForCounts(baselineCount, deviationCount, windowParameters);
const result = getLogRateAnalysisTypeForCounts({
baselineCount: 100,
deviationCount: 200,
windowParameters,
});
expect(result).toEqual(LOG_RATE_ANALYSIS_TYPE.SPIKE);
});
it('returns DIP when normalized deviation count is lower than baseline count', () => {
const baselineCount = 20000;
const deviationCount = 10;
const result = getLogRateAnalysisTypeForCounts(baselineCount, deviationCount, windowParameters);
const result = getLogRateAnalysisTypeForCounts({
baselineCount: 20000,
deviationCount: 10,
windowParameters,
});
expect(result).toEqual(LOG_RATE_ANALYSIS_TYPE.DIP);
});
it('handles zero baseline count without throwing error', () => {
const baselineCount = 0;
const deviationCount = 100;
const result = getLogRateAnalysisTypeForCounts(baselineCount, deviationCount, windowParameters);
const result = getLogRateAnalysisTypeForCounts({
baselineCount: 0,
deviationCount: 100,
windowParameters,
});
expect(result).toBe(LOG_RATE_ANALYSIS_TYPE.SPIKE);
});
it('handles zero deviation count without throwing error', () => {
const baselineCount = 100;
const deviationCount = 0;
const result = getLogRateAnalysisTypeForCounts(baselineCount, deviationCount, windowParameters);
const result = getLogRateAnalysisTypeForCounts({
baselineCount: 100,
deviationCount: 0,
windowParameters,
});
expect(result).toBe(LOG_RATE_ANALYSIS_TYPE.DIP);
});
});

View file

@ -8,19 +8,20 @@
import { LOG_RATE_ANALYSIS_TYPE, type LogRateAnalysisType } from './log_rate_analysis_type';
import type { WindowParameters } from './window_parameters';
interface GetLogRateAnalysisTypeForCountsParams {
baselineCount: number;
deviationCount: number;
windowParameters: WindowParameters;
}
/**
* Identify the log rate analysis type based on the baseline/deviation doc counts.
*
* @param baselineCount The baseline doc count.
* @param deviationCount The deviation doc count.
* @param windowParameters The window parameters with baseline and deviation time range.
* @returns The log rate analysis type.
*/
export function getLogRateAnalysisTypeForCounts(
baselineCount: number,
deviationCount: number,
windowParameters: WindowParameters
): LogRateAnalysisType {
export function getLogRateAnalysisTypeForCounts({
baselineCount,
deviationCount,
windowParameters,
}: GetLogRateAnalysisTypeForCountsParams): LogRateAnalysisType {
const { baselineMin, baselineMax, deviationMin, deviationMax } = windowParameters;
const deviationDuration = deviationMax - deviationMin;

View file

@ -10,7 +10,7 @@ export const fieldCapsPgBenchMock = {
fields: {
// The next two fields are not in the original field caps response,
// but are added here to test the logic to ignore fields that are not
// in the white list. It's based on a real world example where the mapping
// in the safe list. It's based on a real world example where the mapping
// included a double mapping of text+integer.
ignore_this_text_field: {
text: { type: 'text', metadata_field: false, searchable: true, aggregatable: false },

View file

@ -0,0 +1,354 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ElasticsearchClient } from '@kbn/core/server';
import { paramsSearchQueryMock } from './__mocks__/params_search_query';
import { fieldCapsPgBenchMock } from './__mocks__/field_caps_pgbench';
import { fieldCapsEcommerceMock } from './__mocks__/field_caps_ecommerce';
import { fieldCapsLargeArraysMock } from './__mocks__/field_caps_large_arrays';
import { fetchFieldCandidates } from './fetch_field_candidates';
describe('fetchFieldCandidates', () => {
it('returns field candidates for "my" fields', async () => {
const esClientFieldCapsMock = jest.fn(() => ({
fields: {
// Should end up as a field candidate
myIpFieldName: { ip: { aggregatable: true } },
// Should end up as a field candidate
myKeywordFieldName: { keyword: { aggregatable: true } },
// Should not end up as a field candidate, it's a keyword but non-aggregatable
myKeywordFieldNameToBeIgnored: { keyword: { aggregatable: false } },
// Should not end up as a field candidate since fields of type number will not be considered
myNumericFieldName: { number: {} },
// Should end up as a text field candidate
message: { text: { aggregatable: false } },
// Should note end up as a text field candidate
myTextField: { text: { aggregatable: false } },
},
}));
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
} as unknown as ElasticsearchClient;
const {
keywordFieldCandidates,
textFieldCandidates,
selectedKeywordFieldCandidates,
selectedTextFieldCandidates,
} = await fetchFieldCandidates({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
expect(keywordFieldCandidates).toEqual(['myIpFieldName', 'myKeywordFieldName']);
expect(textFieldCandidates).toEqual(['message']);
expect(selectedKeywordFieldCandidates).toEqual(['myIpFieldName', 'myKeywordFieldName']);
expect(selectedTextFieldCandidates).toEqual(['message']);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
});
it('returns field candidates for pgBench mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsPgBenchMock);
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
} as unknown as ElasticsearchClient;
const {
keywordFieldCandidates,
textFieldCandidates,
selectedKeywordFieldCandidates,
selectedTextFieldCandidates,
} = await fetchFieldCandidates({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
expect(keywordFieldCandidates).toEqual([
'_metadata.elastic_apm_trace_id',
'_metadata.elastic_apm_transaction_id',
'_metadata.message_template',
'_metadata.metadata_event_dataset',
'_metadata.user_id',
'agent.ephemeral_id',
'agent.hostname',
'agent.id',
'agent.name',
'agent.type',
'agent.version',
'client.geo.city_name',
'client.geo.continent_name',
'client.geo.country_iso_code',
'client.geo.country_name',
'client.geo.region_iso_code',
'client.geo.region_name',
'client.ip',
'cloud.account.id',
'cloud.availability_zone',
'cloud.instance.id',
'cloud.instance.name',
'cloud.machine.type',
'cloud.project.id',
'cloud.provider',
'cloud.service.name',
'container.id',
'container.image.name',
'container.labels.annotation_io_kubernetes_container_hash',
'container.labels.annotation_io_kubernetes_container_restartCount',
'container.labels.annotation_io_kubernetes_container_terminationMessagePath',
'container.labels.annotation_io_kubernetes_container_terminationMessagePolicy',
'container.labels.annotation_io_kubernetes_pod_terminationGracePeriod',
'container.labels.io_kubernetes_container_logpath',
'container.labels.io_kubernetes_container_name',
'container.labels.io_kubernetes_docker_type',
'container.labels.io_kubernetes_pod_name',
'container.labels.io_kubernetes_pod_namespace',
'container.labels.io_kubernetes_pod_uid',
'container.labels.io_kubernetes_sandbox_id',
'container.name',
'container.runtime',
'data_stream.dataset',
'data_stream.namespace',
'data_stream.type',
'details',
'ecs.version',
'elasticapm_labels.span.id',
'elasticapm_labels.trace.id',
'elasticapm_labels.transaction.id',
'elasticapm_span_id',
'elasticapm_trace_id',
'elasticapm_transaction_id',
'event.category',
'event.dataset',
'event.kind',
'event.module',
'event.timezone',
'event.type',
'fileset.name',
'host.architecture',
'host.containerized',
'host.hostname',
'host.ip',
'host.mac',
'host.name',
'host.os.codename',
'host.os.family',
'host.os.kernel',
'host.os.name',
'host.os.platform',
'host.os.type',
'host.os.version',
'hostname',
'input.type',
'kubernetes.container.name',
'kubernetes.labels.app',
'kubernetes.labels.pod-template-hash',
'kubernetes.namespace',
'kubernetes.namespace_labels.kubernetes_io/metadata_name',
'kubernetes.namespace_uid',
'kubernetes.node.hostname',
'kubernetes.node.labels.addon_gke_io/node-local-dns-ds-ready',
'kubernetes.node.labels.beta_kubernetes_io/arch',
'kubernetes.node.labels.beta_kubernetes_io/instance-type',
'kubernetes.node.labels.beta_kubernetes_io/os',
'kubernetes.node.labels.cloud_google_com/gke-boot-disk',
'kubernetes.node.labels.cloud_google_com/gke-container-runtime',
'kubernetes.node.labels.cloud_google_com/gke-nodepool',
'kubernetes.node.labels.cloud_google_com/gke-os-distribution',
'kubernetes.node.labels.cloud_google_com/machine-family',
'kubernetes.node.labels.failure-domain_beta_kubernetes_io/region',
'kubernetes.node.labels.failure-domain_beta_kubernetes_io/zone',
'kubernetes.node.labels.kubernetes_io/arch',
'kubernetes.node.labels.kubernetes_io/hostname',
'kubernetes.node.labels.kubernetes_io/os',
'kubernetes.node.labels.node_kubernetes_io/instance-type',
'kubernetes.node.labels.node_type',
'kubernetes.node.labels.topology_kubernetes_io/region',
'kubernetes.node.labels.topology_kubernetes_io/zone',
'kubernetes.node.name',
'kubernetes.node.uid',
'kubernetes.pod.ip',
'kubernetes.pod.name',
'kubernetes.pod.uid',
'kubernetes.replicaset.name',
'labels.userId',
'log.file.path',
'log.flags',
'log.level',
'log.logger',
'log.origin.file.name',
'log.origin.function',
'log.original',
'name',
'postgresql.log.database',
'postgresql.log.query',
'postgresql.log.query_step',
'postgresql.log.timestamp',
'process.executable',
'process.name',
'process.thread.name',
'related.user',
'req.headers.accept',
'req.headers.accept-encoding',
'req.headers.cache-control',
'req.headers.connection',
'req.headers.content-length',
'req.headers.content-type',
'req.headers.cookie',
'req.headers.host',
'req.headers.origin',
'req.headers.pragma',
'req.headers.referer',
'req.headers.traceparent',
'req.headers.tracestate',
'req.headers.user-agent',
'req.headers.x-real-ip',
'req.method',
'req.remoteAddress',
'req.url',
'service.name',
'service.type',
'span.id',
'stack',
'stream',
'trace.id',
'transaction.id',
'type',
'user.name',
]);
expect(selectedKeywordFieldCandidates).toEqual([
'agent.name',
'agent.type',
'agent.version',
'client.geo.city_name',
'client.geo.continent_name',
'client.geo.country_name',
'client.geo.region_name',
'client.ip',
'cloud.account.id',
'cloud.availability_zone',
'cloud.instance.id',
'cloud.machine.type',
'cloud.project.id',
'cloud.provider',
'cloud.service.name',
'container.id',
'container.image.name',
'container.name',
'container.runtime',
'data_stream.dataset',
'data_stream.namespace',
'data_stream.type',
'ecs.version',
'event.category',
'event.dataset',
'event.kind',
'event.module',
'event.timezone',
'event.type',
'host.architecture',
'host.ip',
'host.name',
'host.os.family',
'host.os.kernel',
'host.os.name',
'host.os.platform',
'host.os.version',
'log.file.path',
'log.level',
'log.logger',
'log.origin.file.name',
'log.origin.function',
'process.name',
'service.name',
'service.type',
'user.name',
]);
expect(textFieldCandidates).toEqual(['error.message', 'message']);
expect(selectedTextFieldCandidates).toEqual(['error.message', 'message']);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
});
it('returns field candidates for ecommerce mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsEcommerceMock);
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
} as unknown as ElasticsearchClient;
const {
keywordFieldCandidates,
textFieldCandidates,
selectedKeywordFieldCandidates,
selectedTextFieldCandidates,
} = await fetchFieldCandidates({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
const expectedKeywordFieldCandidates = [
'category.keyword',
'currency',
'customer_first_name.keyword',
'customer_full_name.keyword',
'customer_gender',
'customer_id',
'customer_last_name.keyword',
'customer_phone',
'day_of_week',
'email',
'event.dataset',
'geoip.city_name',
'geoip.continent_name',
'geoip.country_iso_code',
'geoip.region_name',
'manufacturer.keyword',
'order_id',
'products._id.keyword',
'products.category.keyword',
'products.manufacturer.keyword',
'products.product_name.keyword',
'products.sku',
'sku',
'type',
'user',
];
expect(keywordFieldCandidates).toEqual(expectedKeywordFieldCandidates);
expect(textFieldCandidates).toEqual([]);
expect(selectedKeywordFieldCandidates).toEqual(expectedKeywordFieldCandidates);
expect(selectedTextFieldCandidates).toEqual([]);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
});
it('returns field candidates and total hits for large-arrays mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsLargeArraysMock);
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
} as unknown as ElasticsearchClient;
const {
keywordFieldCandidates,
textFieldCandidates,
selectedKeywordFieldCandidates,
selectedTextFieldCandidates,
} = await fetchFieldCandidates({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
expect(keywordFieldCandidates).toEqual(['items']);
expect(textFieldCandidates).toEqual([]);
expect(selectedKeywordFieldCandidates).toEqual(['items']);
expect(selectedTextFieldCandidates).toEqual([]);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
});
});

View file

@ -0,0 +1,130 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { ElasticsearchClient } from '@kbn/core/server';
import { ES_FIELD_TYPES } from '@kbn/field-types';
import { containsECSIdentifierField, filterByECSFields } from '../ecs_fields';
import type { AiopsLogRateAnalysisSchema } from '../api/schema';
// Supported field names for text fields for log rate analysis.
// If we analyse all detected text fields, we might run into performance
// issues with the `categorize_text` aggregation. Until this is resolved, we
// rely on a predefined white list of supported text fields.
export const TEXT_FIELD_SAFE_LIST = ['message', 'error.message'];
export const SUPPORTED_ES_FIELD_TYPES = [
ES_FIELD_TYPES.KEYWORD,
ES_FIELD_TYPES.IP,
ES_FIELD_TYPES.BOOLEAN,
];
export const SUPPORTED_ES_FIELD_TYPES_TEXT = [ES_FIELD_TYPES.TEXT, ES_FIELD_TYPES.MATCH_ONLY_TEXT];
// This override is meant to be used to force certain fields to be considered as
// text fields when both text and keyword type is available.
export interface FetchFieldCandidatesParamsArguments {
textFieldCandidatesOverrides?: string[];
}
export interface FetchFieldCandidatesParams {
esClient: ElasticsearchClient;
abortSignal?: AbortSignal;
arguments: AiopsLogRateAnalysisSchema & FetchFieldCandidatesParamsArguments;
}
export interface FetchFieldCandidatesResponse {
isECS: boolean;
keywordFieldCandidates: string[];
selectedKeywordFieldCandidates: string[];
textFieldCandidates: string[];
selectedTextFieldCandidates: string[];
}
export const fetchFieldCandidates = async ({
esClient,
abortSignal,
arguments: args,
}: FetchFieldCandidatesParams): Promise<FetchFieldCandidatesResponse> => {
const { textFieldCandidatesOverrides = [], ...params } = args;
// Get all supported fields
const respMapping = await esClient.fieldCaps(
{
fields: '*',
filters: '-metadata,-parent',
include_empty_fields: false,
index: params.index,
index_filter: {
range: {
[params.timeFieldName]: {
gte: params.deviationMin,
lte: params.deviationMax,
},
},
},
types: [...SUPPORTED_ES_FIELD_TYPES, ...SUPPORTED_ES_FIELD_TYPES_TEXT],
},
{ signal: abortSignal, maxRetries: 0 }
);
const allFieldNames: string[] = [];
const acceptableFields: Set<string> = new Set();
const acceptableTextFields: Set<string> = new Set();
Object.entries(respMapping.fields).forEach(([key, value]) => {
const fieldTypes = Object.keys(value) as ES_FIELD_TYPES[];
const isSupportedType = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES.includes(type));
const isAggregatable = fieldTypes.some((type) => value[type].aggregatable);
const isTextField = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES_TEXT.includes(type));
// Check if fieldName is something we can aggregate on
if (isSupportedType && isAggregatable) {
acceptableFields.add(key);
}
if (isTextField && TEXT_FIELD_SAFE_LIST.includes(key)) {
acceptableTextFields.add(key);
}
allFieldNames.push(key);
});
const textFieldCandidatesOverridesWithKeywordPostfix = textFieldCandidatesOverrides.map(
(d) => `${d}.keyword`
);
const keywordFieldCandidates: string[] = [...acceptableFields].filter(
(field) => !textFieldCandidatesOverridesWithKeywordPostfix.includes(field)
);
const textFieldCandidates: string[] = [...acceptableTextFields].filter((field) => {
const fieldName = field.replace(new RegExp(/\.text$/), '');
return (
(!keywordFieldCandidates.includes(fieldName) &&
!keywordFieldCandidates.includes(`${fieldName}.keyword`)) ||
textFieldCandidatesOverrides.includes(field)
);
});
const isECS = containsECSIdentifierField(keywordFieldCandidates);
return {
isECS,
// all keyword field candidates
keywordFieldCandidates: keywordFieldCandidates.sort(),
// preselection:
// - if we identify an ECS schema, filter by custom ECS safe list
// - if not, take the first 100 fields
selectedKeywordFieldCandidates: isECS
? filterByECSFields(keywordFieldCandidates).sort()
: keywordFieldCandidates.sort().slice(0, 100),
// text field candidates
textFieldCandidates: textFieldCandidates.sort(),
selectedTextFieldCandidates: textFieldCandidates.sort(),
};
};

View file

@ -16,313 +16,219 @@ import { fieldCapsLargeArraysMock } from './__mocks__/field_caps_large_arrays';
import { fetchIndexInfo } from './fetch_index_info';
describe('fetch_index_info', () => {
describe('fetchFieldCandidates', () => {
it('returns field candidates and total hits for "my" fields', async () => {
const esClientFieldCapsMock = jest.fn(() => ({
fields: {
// Should end up as a field candidate
myIpFieldName: { ip: { aggregatable: true } },
// Should end up as a field candidate
myKeywordFieldName: { keyword: { aggregatable: true } },
// Should not end up as a field candidate, it's a keyword but non-aggregatable
myKeywordFieldNameToBeIgnored: { keyword: { aggregatable: false } },
// Should not end up as a field candidate since fields of type number will not be considered
myNumericFieldName: { number: {} },
describe('fetchIndexInfo', () => {
it('returns field candidates and total hits for "my" fields', async () => {
const esClientFieldCapsMock = jest.fn(() => ({
fields: {
// Should end up as a field candidate
myIpFieldName: { ip: { aggregatable: true } },
// Should end up as a field candidate
myKeywordFieldName: { keyword: { aggregatable: true } },
// Should not end up as a field candidate, it's a keyword but non-aggregatable
myKeywordFieldNameToBeIgnored: { keyword: { aggregatable: false } },
// Should not end up as a field candidate since fields of type number will not be considered
myNumericFieldName: { number: {} },
},
}));
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
}));
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const { baselineTotalDocCount, deviationTotalDocCount, fieldCandidates } =
await fetchIndexInfo({ esClient: esClientMock, arguments: paramsSearchQueryMock });
expect(fieldCandidates).toEqual(['myIpFieldName', 'myKeywordFieldName']);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
} as unknown as estypes.SearchResponse;
});
it('returns field candidates and total hits for pgBench mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsPgBenchMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const { baselineTotalDocCount, deviationTotalDocCount, keywordFieldCandidates } =
await fetchIndexInfo({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
expect(keywordFieldCandidates).toEqual(['myIpFieldName', 'myKeywordFieldName']);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
const {
baselineTotalDocCount,
deviationTotalDocCount,
fieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo({ esClient: esClientMock, arguments: paramsSearchQueryMock });
expect(fieldCandidates).toEqual([
'_metadata.elastic_apm_trace_id',
'_metadata.elastic_apm_transaction_id',
'_metadata.message_template',
'_metadata.metadata_event_dataset',
'_metadata.user_id',
'agent.ephemeral_id',
'agent.hostname',
'agent.id',
'agent.name',
'agent.type',
'agent.version',
'client.geo.city_name',
'client.geo.continent_name',
'client.geo.country_iso_code',
'client.geo.country_name',
'client.geo.region_iso_code',
'client.geo.region_name',
'client.ip',
'cloud.account.id',
'cloud.availability_zone',
'cloud.instance.id',
'cloud.instance.name',
'cloud.machine.type',
'cloud.project.id',
'cloud.provider',
'cloud.service.name',
'container.id',
'container.image.name',
'container.labels.annotation_io_kubernetes_container_hash',
'container.labels.annotation_io_kubernetes_container_restartCount',
'container.labels.annotation_io_kubernetes_container_terminationMessagePath',
'container.labels.annotation_io_kubernetes_container_terminationMessagePolicy',
'container.labels.annotation_io_kubernetes_pod_terminationGracePeriod',
'container.labels.io_kubernetes_container_logpath',
'container.labels.io_kubernetes_container_name',
'container.labels.io_kubernetes_docker_type',
'container.labels.io_kubernetes_pod_name',
'container.labels.io_kubernetes_pod_namespace',
'container.labels.io_kubernetes_pod_uid',
'container.labels.io_kubernetes_sandbox_id',
'container.name',
'container.runtime',
'data_stream.dataset',
'data_stream.namespace',
'data_stream.type',
'details',
'ecs.version',
'elasticapm_labels.span.id',
'elasticapm_labels.trace.id',
'elasticapm_labels.transaction.id',
'elasticapm_span_id',
'elasticapm_trace_id',
'elasticapm_transaction_id',
'event.category',
'event.dataset',
'event.kind',
'event.module',
'event.timezone',
'event.type',
'fileset.name',
'host.architecture',
'host.containerized',
'host.hostname',
'host.ip',
'host.mac',
'host.name',
'host.os.codename',
'host.os.family',
'host.os.kernel',
'host.os.name',
'host.os.platform',
'host.os.type',
'host.os.version',
'hostname',
'input.type',
'kubernetes.container.name',
'kubernetes.labels.app',
'kubernetes.labels.pod-template-hash',
'kubernetes.namespace',
'kubernetes.namespace_labels.kubernetes_io/metadata_name',
'kubernetes.namespace_uid',
'kubernetes.node.hostname',
'kubernetes.node.labels.addon_gke_io/node-local-dns-ds-ready',
'kubernetes.node.labels.beta_kubernetes_io/arch',
'kubernetes.node.labels.beta_kubernetes_io/instance-type',
'kubernetes.node.labels.beta_kubernetes_io/os',
'kubernetes.node.labels.cloud_google_com/gke-boot-disk',
'kubernetes.node.labels.cloud_google_com/gke-container-runtime',
'kubernetes.node.labels.cloud_google_com/gke-nodepool',
'kubernetes.node.labels.cloud_google_com/gke-os-distribution',
'kubernetes.node.labels.cloud_google_com/machine-family',
'kubernetes.node.labels.failure-domain_beta_kubernetes_io/region',
'kubernetes.node.labels.failure-domain_beta_kubernetes_io/zone',
'kubernetes.node.labels.kubernetes_io/arch',
'kubernetes.node.labels.kubernetes_io/hostname',
'kubernetes.node.labels.kubernetes_io/os',
'kubernetes.node.labels.node_kubernetes_io/instance-type',
'kubernetes.node.labels.node_type',
'kubernetes.node.labels.topology_kubernetes_io/region',
'kubernetes.node.labels.topology_kubernetes_io/zone',
'kubernetes.node.name',
'kubernetes.node.uid',
'kubernetes.pod.ip',
'kubernetes.pod.name',
'kubernetes.pod.uid',
'kubernetes.replicaset.name',
'labels.userId',
'log.file.path',
'log.flags',
'log.level',
'log.logger',
'log.origin.file.name',
'log.origin.function',
'log.original',
'name',
'postgresql.log.database',
'postgresql.log.query',
'postgresql.log.query_step',
'postgresql.log.timestamp',
'process.executable',
'process.name',
'process.thread.name',
'related.user',
'req.headers.accept',
'req.headers.accept-encoding',
'req.headers.cache-control',
'req.headers.connection',
'req.headers.content-length',
'req.headers.content-type',
'req.headers.cookie',
'req.headers.host',
'req.headers.origin',
'req.headers.pragma',
'req.headers.referer',
'req.headers.traceparent',
'req.headers.tracestate',
'req.headers.user-agent',
'req.headers.x-real-ip',
'req.method',
'req.remoteAddress',
'req.url',
'service.name',
'service.type',
'span.id',
'stack',
'stream',
'trace.id',
'transaction.id',
'type',
'user.name',
]);
expect(textFieldCandidates).toEqual(['error.message', 'message']);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
it('returns field candidates and total hits for pgBench mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsPgBenchMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
it('returns field candidates and total hits for ecommerce mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsEcommerceMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const {
baselineTotalDocCount,
deviationTotalDocCount,
fieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo({ esClient: esClientMock, arguments: paramsSearchQueryMock });
expect(fieldCandidates).toEqual([
'category.keyword',
'currency',
'customer_first_name.keyword',
'customer_full_name.keyword',
'customer_gender',
'customer_id',
'customer_last_name.keyword',
'customer_phone',
'day_of_week',
'email',
'event.dataset',
'geoip.city_name',
'geoip.continent_name',
'geoip.country_iso_code',
'geoip.region_name',
'manufacturer.keyword',
'order_id',
'products._id.keyword',
'products.category.keyword',
'products.manufacturer.keyword',
'products.product_name.keyword',
'products.sku',
'sku',
'type',
'user',
]);
expect(textFieldCandidates).toEqual([]);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
const {
baselineTotalDocCount,
deviationTotalDocCount,
keywordFieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
it('returns field candidates and total hits for large-arrays mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsLargeArraysMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
expect(keywordFieldCandidates).toEqual([
'agent.name',
'agent.type',
'agent.version',
'client.geo.city_name',
'client.geo.continent_name',
'client.geo.country_name',
'client.geo.region_name',
'client.ip',
'cloud.account.id',
'cloud.availability_zone',
'cloud.instance.id',
'cloud.machine.type',
'cloud.project.id',
'cloud.provider',
'cloud.service.name',
'container.id',
'container.image.name',
'container.name',
'container.runtime',
'data_stream.dataset',
'data_stream.namespace',
'data_stream.type',
'ecs.version',
'event.category',
'event.dataset',
'event.kind',
'event.module',
'event.timezone',
'event.type',
'host.architecture',
'host.ip',
'host.name',
'host.os.family',
'host.os.kernel',
'host.os.name',
'host.os.platform',
'host.os.version',
'log.file.path',
'log.level',
'log.logger',
'log.origin.file.name',
'log.origin.function',
'process.name',
'service.name',
'service.type',
'user.name',
]);
expect(textFieldCandidates).toEqual(['error.message', 'message']);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const {
baselineTotalDocCount,
deviationTotalDocCount,
fieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo({ esClient: esClientMock, arguments: paramsSearchQueryMock });
expect(fieldCandidates).toEqual(['items']);
expect(textFieldCandidates).toEqual([]);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
it('returns field candidates and total hits for ecommerce mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsEcommerceMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const {
baselineTotalDocCount,
deviationTotalDocCount,
keywordFieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
expect(keywordFieldCandidates).toEqual([
'category.keyword',
'currency',
'customer_first_name.keyword',
'customer_full_name.keyword',
'customer_gender',
'customer_id',
'customer_last_name.keyword',
'customer_phone',
'day_of_week',
'email',
'event.dataset',
'geoip.city_name',
'geoip.continent_name',
'geoip.country_iso_code',
'geoip.region_name',
'manufacturer.keyword',
'order_id',
'products._id.keyword',
'products.category.keyword',
'products.manufacturer.keyword',
'products.product_name.keyword',
'products.sku',
'sku',
'type',
'user',
]);
expect(textFieldCandidates).toEqual([]);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
it('returns field candidates and total hits for large-arrays mappings', async () => {
const esClientFieldCapsMock = jest.fn(() => fieldCapsLargeArraysMock);
const esClientSearchMock = jest.fn((req: estypes.SearchRequest): estypes.SearchResponse => {
return {
hits: {
hits: [],
total: { value: 5000000 },
},
} as unknown as estypes.SearchResponse;
});
const esClientMock = {
fieldCaps: esClientFieldCapsMock,
search: esClientSearchMock,
} as unknown as ElasticsearchClient;
const {
baselineTotalDocCount,
deviationTotalDocCount,
keywordFieldCandidates,
textFieldCandidates,
} = await fetchIndexInfo({
esClient: esClientMock,
arguments: { ...paramsSearchQueryMock, textFieldCandidatesOverrides: [] },
});
expect(keywordFieldCandidates).toEqual(['items']);
expect(textFieldCandidates).toEqual([]);
expect(baselineTotalDocCount).toEqual(5000000);
expect(deviationTotalDocCount).toEqual(5000000);
expect(esClientFieldCapsMock).toHaveBeenCalledTimes(1);
expect(esClientSearchMock).toHaveBeenCalledTimes(2);
});
});

View file

@ -7,32 +7,29 @@
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ES_FIELD_TYPES } from '@kbn/field-types';
import type { ElasticsearchClient } from '@kbn/core/server';
import type { AiopsLogRateAnalysisSchema } from '../api/schema';
import {
fetchFieldCandidates,
type FetchFieldCandidatesParams,
type FetchFieldCandidatesParamsArguments,
} from './fetch_field_candidates';
import { getTotalDocCountRequest } from './get_total_doc_count_request';
// TODO Consolidate with duplicate `fetchPValues` in
// `x-pack/plugins/observability_solution/apm/server/routes/correlations/queries/fetch_duration_field_candidates.ts`
// Supported field names for text fields for log rate analysis.
// If we analyse all detected text fields, we might run into performance
// issues with the `categorize_text` aggregation. Until this is resolved, we
// rely on a predefined white list of supported text fields.
const TEXT_FIELD_WHITE_LIST = ['message', 'error.message'];
export interface FetchIndexInfoParamsArguments {
skipFieldCandidates?: boolean;
}
const SUPPORTED_ES_FIELD_TYPES = [
ES_FIELD_TYPES.KEYWORD,
ES_FIELD_TYPES.IP,
ES_FIELD_TYPES.BOOLEAN,
];
const SUPPORTED_ES_FIELD_TYPES_TEXT = [ES_FIELD_TYPES.TEXT, ES_FIELD_TYPES.MATCH_ONLY_TEXT];
interface IndexInfo {
fieldCandidates: string[];
export interface FetchIndexInfoParams extends FetchFieldCandidatesParams {
arguments: AiopsLogRateAnalysisSchema &
FetchFieldCandidatesParamsArguments &
FetchIndexInfoParamsArguments;
}
export interface FetchIndexInfoResponse {
keywordFieldCandidates: string[];
textFieldCandidates: string[];
baselineTotalDocCount: number;
deviationTotalDocCount: number;
@ -43,60 +40,15 @@ export const fetchIndexInfo = async ({
esClient,
abortSignal,
arguments: args,
}: {
esClient: ElasticsearchClient;
abortSignal?: AbortSignal;
arguments: AiopsLogRateAnalysisSchema & {
textFieldCandidatesOverrides?: string[];
};
}): Promise<IndexInfo> => {
const { textFieldCandidatesOverrides = [], ...params } = args;
const { index } = params;
// Get all supported fields
const respMapping = await esClient.fieldCaps(
{
fields: '*',
filters: '-metadata',
include_empty_fields: false,
index,
index_filter: {
range: {
[params.timeFieldName]: {
gte: params.deviationMin,
lte: params.deviationMax,
},
},
},
types: [...SUPPORTED_ES_FIELD_TYPES, ...SUPPORTED_ES_FIELD_TYPES_TEXT],
},
{ signal: abortSignal, maxRetries: 0 }
);
}: FetchIndexInfoParams): Promise<FetchIndexInfoResponse> => {
const { skipFieldCandidates = false, ...fetchFieldCandidatesArguments } = args;
const { textFieldCandidatesOverrides = [], ...params } = fetchFieldCandidatesArguments;
const allFieldNames: string[] = [];
// There's a bit of logic involved here because we want to fetch the data
// in parallel but the call to `fetchFieldCandidates` is optional.
const acceptableFields: Set<string> = new Set();
const acceptableTextFields: Set<string> = new Set();
Object.entries(respMapping.fields).forEach(([key, value]) => {
const fieldTypes = Object.keys(value) as ES_FIELD_TYPES[];
const isSupportedType = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES.includes(type));
const isAggregatable = fieldTypes.some((type) => value[type].aggregatable);
const isTextField = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES_TEXT.includes(type));
// Check if fieldName is something we can aggregate on
if (isSupportedType && isAggregatable) {
acceptableFields.add(key);
}
if (isTextField && TEXT_FIELD_WHITE_LIST.includes(key)) {
acceptableTextFields.add(key);
}
allFieldNames.push(key);
});
// Get the total doc count for the baseline time range
const respBaselineTotalDocCount = await esClient.search(
// #1 First we define the promises that would fetch the data.
const baselineTotalDocCountPromise = esClient.search(
getTotalDocCountRequest({ ...params, start: params.baselineMin, end: params.baselineMax }),
{
signal: abortSignal,
@ -104,38 +56,48 @@ export const fetchIndexInfo = async ({
}
);
// Get the total doc count for the deviation time range
const respDeviationTotalDocCount = await esClient.search(
getTotalDocCountRequest({ ...params, start: params.deviationMin, end: params.deviationMax }),
const deviationTotalDocCountPromise = esClient.search(
getTotalDocCountRequest({
...params,
start: params.deviationMin,
end: params.deviationMax,
}),
{
signal: abortSignal,
maxRetries: 0,
}
);
const textFieldCandidatesOverridesWithKeywordPostfix = textFieldCandidatesOverrides.map(
(d) => `${d}.keyword`
);
const fieldCandidates: string[] = [...acceptableFields].filter(
(field) => !textFieldCandidatesOverridesWithKeywordPostfix.includes(field)
);
const textFieldCandidates: string[] = [...acceptableTextFields].filter((field) => {
const fieldName = field.replace(new RegExp(/\.text$/), '');
return (
(!fieldCandidates.includes(fieldName) && !fieldCandidates.includes(`${fieldName}.keyword`)) ||
textFieldCandidatesOverrides.includes(field)
);
const fetchFieldCandidatesPromise = fetchFieldCandidates({
esClient,
abortSignal,
arguments: fetchFieldCandidatesArguments,
});
// #2 Then we build an array of these promises. To be able to handle the
// responses properly we build a tuple based on the types of the promises.
const promises: [
typeof baselineTotalDocCountPromise,
typeof deviationTotalDocCountPromise,
typeof fetchFieldCandidatesPromise | undefined
] = [
baselineTotalDocCountPromise,
deviationTotalDocCountPromise,
!skipFieldCandidates ? fetchFieldCandidatesPromise : undefined,
];
// #3 Finally, we await the promises and return the results.
const [respBaselineTotalDocCount, respDeviationTotalDocCount, fieldCandidates] =
await Promise.all(promises);
const baselineTotalDocCount = (respBaselineTotalDocCount.hits.total as estypes.SearchTotalHits)
.value;
const deviationTotalDocCount = (respDeviationTotalDocCount.hits.total as estypes.SearchTotalHits)
.value;
return {
fieldCandidates: fieldCandidates.sort(),
textFieldCandidates: textFieldCandidates.sort(),
keywordFieldCandidates: fieldCandidates?.selectedKeywordFieldCandidates.sort() ?? [],
textFieldCandidates: fieldCandidates?.textFieldCandidates.sort() ?? [],
baselineTotalDocCount,
deviationTotalDocCount,
zeroDocsFallback: baselineTotalDocCount === 0 || deviationTotalDocCount === 0,

View file

@ -18,7 +18,7 @@ export const getTermsQuery = ({ fieldName, fieldValue }: FieldValuePair) => {
};
interface QueryParams {
params: AiopsLogRateAnalysisSchema<'2'>;
params: AiopsLogRateAnalysisSchema<'3'>;
termFilters?: FieldValuePair[];
filter?: estypes.QueryDslQueryContainer;
skipRangeQuery?: boolean;

View file

@ -0,0 +1,83 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { httpServiceMock } from '@kbn/core/public/mocks';
import type { FetchFieldCandidatesResponse } from '../queries/fetch_field_candidates';
import { fetchFieldCandidates } from './log_rate_analysis_field_candidates_slice';
const mockHttp = httpServiceMock.createStartContract();
describe('fetchFieldCandidates', () => {
it('dispatches field candidates', async () => {
const mockDispatch = jest.fn();
const mockGetState = jest.fn();
const mockResponse: FetchFieldCandidatesResponse = {
isECS: false,
keywordFieldCandidates: ['keyword-field', 'another-keyword-field'],
selectedKeywordFieldCandidates: ['keyword-field'],
textFieldCandidates: ['text-field', 'another-text-field', 'yet-another-text-field'],
selectedTextFieldCandidates: ['text-field'],
};
mockHttp.post.mockResolvedValue(mockResponse);
const startParams = {
http: mockHttp,
endpoint: '/internal/aiops/log_rate_analysis',
apiVersion: '3',
abortCtrl: { current: new AbortController() },
body: {
start: 0,
end: 0,
searchQuery: JSON.stringify({ match_all: {} }),
timeFieldName: '@timestamp',
index: 'myIndex',
grouping: true,
flushFix: true,
baselineMin: 0,
baselineMax: 0,
deviationMin: 0,
deviationMax: 0,
sampleProbability: 1,
},
headers: {},
};
const action = fetchFieldCandidates(startParams);
await action(mockDispatch, mockGetState, undefined);
// Expected to be called 3 times including the pending and fulfilled actions.
expect(mockDispatch).toHaveBeenCalledTimes(3);
expect(mockDispatch).toHaveBeenNthCalledWith(2, {
payload: {
fieldSelectionMessage:
'2 out of 5 fields were preselected for the analysis. Use the "Fields" dropdown to adjust the selection.',
fieldFilterSkippedItems: [
'another-keyword-field',
'another-text-field',
'yet-another-text-field',
],
fieldFilterUniqueItems: [
'another-keyword-field',
'another-text-field',
'keyword-field',
'text-field',
'yet-another-text-field',
],
keywordFieldCandidates: ['keyword-field', 'another-keyword-field'],
selectedKeywordFieldCandidates: ['keyword-field'],
selectedTextFieldCandidates: ['text-field'],
textFieldCandidates: ['text-field', 'another-text-field', 'yet-another-text-field'],
},
type: 'log_rate_analysis_field_candidates/setAllFieldCandidates',
});
});
});

View file

@ -0,0 +1,160 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { PayloadAction } from '@reduxjs/toolkit';
import { createAsyncThunk, createSlice } from '@reduxjs/toolkit';
import { i18n } from '@kbn/i18n';
import type { HttpSetup, HttpFetchOptions } from '@kbn/core/public';
import { AIOPS_API_ENDPOINT } from '@kbn/aiops-common/constants';
import type { AiopsLogRateAnalysisSchema } from '../api/schema';
import type { FetchFieldCandidatesResponse } from '../queries/fetch_field_candidates';
const ecsIdentifiedMessage = i18n.translate(
'xpack.aiops.logRateAnalysis.fieldCandidates.ecsIdentifiedMessage',
{
defaultMessage: 'The source documents were identified as ECS compliant.',
}
);
const fieldsDropdownHintMessage = i18n.translate(
'xpack.aiops.logRateAnalysis.fieldCandidates.fieldsDropdownHintMessage',
{
defaultMessage: 'Use the "Fields" dropdown to adjust the selection.',
}
);
const getFieldSelectionMessage = (
isECS: boolean,
allItemsCount: number,
selectedItemsCount: number
): string | undefined => {
if (allItemsCount <= selectedItemsCount || selectedItemsCount < 2) return;
const ecsMessage = isECS ? `${ecsIdentifiedMessage} ` : '';
const fieldsSelectedMessage = i18n.translate(
'xpack.aiops.logRateAnalysis.fieldCandidates.fieldsSelectedMessage',
{
defaultMessage:
'{selectedItemsCount} out of {allItemsCount} fields were preselected for the analysis.',
values: { selectedItemsCount, allItemsCount },
}
);
return `${ecsMessage}${fieldsSelectedMessage} ${fieldsDropdownHintMessage}`;
};
export interface FetchFieldCandidatesParams {
http: HttpSetup;
endpoint: string;
apiVersion?: string;
abortCtrl: React.MutableRefObject<AbortController>;
body?: AiopsLogRateAnalysisSchema;
headers?: HttpFetchOptions['headers'];
}
/**
* Async thunk to fetch field candidates.
*/
export const fetchFieldCandidates = createAsyncThunk(
'log_rate_analysis_field_candidates/fetch_field_candidates',
async (options: FetchFieldCandidatesParams, thunkApi) => {
const { http, abortCtrl, body, headers } = options;
// Get field candidates so we're able to populate the field selection dropdown.
const fieldCandidatesResp = await http.post<FetchFieldCandidatesResponse>(
AIOPS_API_ENDPOINT.LOG_RATE_ANALYSIS_FIELD_CANDIDATES,
{
signal: abortCtrl.current.signal,
version: '1',
headers,
...(body && Object.keys(body).length > 0 ? { body: JSON.stringify(body) } : {}),
}
);
const {
isECS,
keywordFieldCandidates,
textFieldCandidates,
selectedKeywordFieldCandidates,
selectedTextFieldCandidates,
} = fieldCandidatesResp;
const fieldFilterUniqueItems = [...keywordFieldCandidates, ...textFieldCandidates].sort();
const fieldFilterUniqueSelectedItems = [
...selectedKeywordFieldCandidates,
...selectedTextFieldCandidates,
];
const fieldFilterSkippedItems = fieldFilterUniqueItems.filter(
(d) => !fieldFilterUniqueSelectedItems.includes(d)
);
thunkApi.dispatch(
setAllFieldCandidates({
fieldSelectionMessage: getFieldSelectionMessage(
isECS,
fieldFilterUniqueItems.length,
fieldFilterUniqueSelectedItems.length
),
fieldFilterUniqueItems,
fieldFilterSkippedItems,
keywordFieldCandidates,
textFieldCandidates,
selectedKeywordFieldCandidates,
selectedTextFieldCandidates,
})
);
}
);
export interface FieldCandidatesState {
isLoading: boolean;
fieldSelectionMessage?: string;
fieldFilterUniqueItems: string[];
fieldFilterSkippedItems: string[];
keywordFieldCandidates: string[];
textFieldCandidates: string[];
selectedKeywordFieldCandidates: string[];
selectedTextFieldCandidates: string[];
}
function getDefaultState(): FieldCandidatesState {
return {
isLoading: false,
fieldFilterUniqueItems: [],
fieldFilterSkippedItems: [],
keywordFieldCandidates: [],
textFieldCandidates: [],
selectedKeywordFieldCandidates: [],
selectedTextFieldCandidates: [],
};
}
export const logRateAnalysisFieldCandidatesSlice = createSlice({
name: 'log_rate_analysis_field_candidates',
initialState: getDefaultState(),
reducers: {
setAllFieldCandidates: (
state: FieldCandidatesState,
action: PayloadAction<Omit<FieldCandidatesState, 'isLoading'>>
) => {
return { ...state, ...action.payload };
},
},
extraReducers: (builder) => {
builder.addCase(fetchFieldCandidates.pending, (state) => {
state.isLoading = true;
});
builder.addCase(fetchFieldCandidates.fulfilled, (state) => {
state.isLoading = false;
});
},
});
// Action creators are generated for each case reducer function
export const { setAllFieldCandidates } = logRateAnalysisFieldCandidatesSlice.actions;

View file

@ -16,6 +16,7 @@ import { logRateAnalysisResultsSlice } from '../api/stream_reducer';
import { logRateAnalysisSlice } from './log_rate_analysis_slice';
import { logRateAnalysisTableRowSlice } from './log_rate_analysis_table_row_slice';
import { logRateAnalysisFieldCandidatesSlice } from './log_rate_analysis_field_candidates_slice';
import type { InitialAnalysisStart } from './log_rate_analysis_slice';
const getReduxStore = () =>
@ -23,6 +24,8 @@ const getReduxStore = () =>
reducer: {
// General page state
logRateAnalysis: logRateAnalysisSlice.reducer,
// Field candidates
logRateAnalysisFieldCandidates: logRateAnalysisFieldCandidatesSlice.reducer,
// Analysis results
logRateAnalysisResults: logRateAnalysisResultsSlice.reducer,
// Handles running the analysis

View file

@ -101,6 +101,14 @@ export const ItemFilterPopover: FC<ItemFilterPopoverProps> = ({
);
}, [uniqueItemNames]);
// If the supplied list of initial skipped items changes, only update if
// the list hasn't been touched yet.
useEffect(() => {
if (!isTouched) {
setSkippedItems(initialSkippedItems);
}
}, [initialSkippedItems, isTouched]);
const selectedItemCount = uniqueItemNames.length - skippedItems.length;
return (

View file

@ -7,7 +7,7 @@
import type { FC } from 'react';
import React, { useEffect, useMemo, useRef, useState } from 'react';
import { isEqual, uniq } from 'lodash';
import { isEqual } from 'lodash';
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import {
@ -40,11 +40,13 @@ import { FormattedMessage } from '@kbn/i18n-react';
import type { SignificantItem, SignificantItemGroup } from '@kbn/ml-agg-utils';
import { AIOPS_TELEMETRY_ID } from '@kbn/aiops-common/constants';
import type { AiopsLogRateAnalysisSchema } from '@kbn/aiops-log-rate-analysis/api/schema';
import type { AiopsLogRateAnalysisSchemaSignificantItem } from '@kbn/aiops-log-rate-analysis/api/schema_v2';
import type { AiopsLogRateAnalysisSchemaSignificantItem } from '@kbn/aiops-log-rate-analysis/api/schema_v3';
import {
setCurrentAnalysisType,
setCurrentAnalysisWindowParameters,
resetResults,
} from '@kbn/aiops-log-rate-analysis/api/stream_reducer';
import { fetchFieldCandidates } from '@kbn/aiops-log-rate-analysis/state/log_rate_analysis_field_candidates_slice';
import { useAiopsAppContext } from '../../hooks/use_aiops_app_context';
import { useDataSource } from '../../hooks/use_data_source';
@ -92,7 +94,7 @@ const resultsGroupedOffId = 'aiopsLogRateAnalysisGroupingOff';
const resultsGroupedOnId = 'aiopsLogRateAnalysisGroupingOn';
const fieldFilterHelpText = i18n.translate('xpack.aiops.logRateAnalysis.page.fieldFilterHelpText', {
defaultMessage:
'Deselect non-relevant fields to remove them from groups and click the Apply button to rerun the grouping. Use the search bar to filter the list, then select/deselect multiple fields with the actions below.',
'Deselect non-relevant fields to remove them from the analysis and click the Apply button to rerun the analysis. Use the search bar to filter the list, then select/deselect multiple fields with the actions below.',
});
const columnsFilterHelpText = i18n.translate(
'xpack.aiops.logRateAnalysis.page.columnsFilterHelpText',
@ -118,8 +120,8 @@ const columnSearchAriaLabel = i18n.translate('xpack.aiops.analysis.columnSelecto
const columnsButton = i18n.translate('xpack.aiops.logRateAnalysis.page.columnsFilterButtonLabel', {
defaultMessage: 'Columns',
});
const fieldsButton = i18n.translate('xpack.aiops.analysis.fieldFilterButtonLabel', {
defaultMessage: 'Filter fields',
const fieldsButton = i18n.translate('xpack.aiops.analysis.fieldsButtonLabel', {
defaultMessage: 'Fields',
});
/**
@ -172,7 +174,8 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
} = useAppSelector((s) => s.logRateAnalysis);
const { isRunning, errors: streamErrors } = useAppSelector((s) => s.logRateAnalysisStream);
const data = useAppSelector((s) => s.logRateAnalysisResults);
const { currentAnalysisType, currentAnalysisWindowParameters } = data;
const fieldCandidates = useAppSelector((s) => s.logRateAnalysisFieldCandidates);
const { currentAnalysisWindowParameters } = data;
// Store the performance metric's start time using a ref
// to be able to track it across rerenders.
@ -180,8 +183,6 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
const abortCtrl = useRef(new AbortController());
const [groupResults, setGroupResults] = useState<boolean>(false);
const [groupSkipFields, setGroupSkipFields] = useState<string[]>([]);
const [uniqueFieldNames, setUniqueFieldNames] = useState<string[]>([]);
const [overrides, setOverrides] = useState<AiopsLogRateAnalysisSchema['overrides'] | undefined>(
undefined
);
@ -201,15 +202,26 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
dispatch(clearAllRowState());
};
const {
fieldFilterUniqueItems,
fieldFilterSkippedItems,
keywordFieldCandidates,
textFieldCandidates,
selectedKeywordFieldCandidates,
selectedTextFieldCandidates,
} = fieldCandidates;
const fieldFilterButtonDisabled =
isRunning || fieldCandidates.isLoading || fieldFilterUniqueItems.length === 0;
const onFieldsFilterChange = (skippedFields: string[]) => {
setGroupSkipFields(skippedFields);
dispatch(resetResults());
setOverrides({
loaded: 0,
remainingFieldCandidates: [],
significantItems: data.significantItems.filter(
(d) => !skippedFields.includes(d.fieldName)
) as AiopsLogRateAnalysisSchemaSignificantItem[],
regroupOnly: true,
remainingKeywordFieldCandidates: keywordFieldCandidates.filter(
(d) => !skippedFields.includes(d)
),
remainingTextFieldCandidates: textFieldCandidates.filter((d) => !skippedFields.includes(d)),
regroupOnly: false,
});
startHandler(true, false);
};
@ -218,13 +230,6 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
setSkippedColumns(columns);
};
const { significantItems } = data;
useEffect(
() => setUniqueFieldNames(uniq(significantItems.map((d) => d.fieldName)).sort()),
[significantItems]
);
function cancelHandler() {
abortCtrl.current.abort();
dispatch(cancelStream());
@ -232,16 +237,25 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
useEffect(() => {
if (!isRunning) {
const { loaded, remainingFieldCandidates, groupsMissing } = data;
const {
loaded,
remainingKeywordFieldCandidates,
remainingTextFieldCandidates,
groupsMissing,
} = data;
if (
loaded < 1 &&
((Array.isArray(remainingFieldCandidates) && remainingFieldCandidates.length > 0) ||
((Array.isArray(remainingKeywordFieldCandidates) &&
remainingKeywordFieldCandidates.length > 0) ||
(Array.isArray(remainingTextFieldCandidates) &&
remainingTextFieldCandidates.length > 0) ||
groupsMissing)
) {
setOverrides({
loaded,
remainingFieldCandidates,
remainingKeywordFieldCandidates,
remainingTextFieldCandidates,
significantItems: data.significantItems as AiopsLogRateAnalysisSchemaSignificantItem[],
});
} else if (loaded > 0) {
@ -271,8 +285,11 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
// significant items on analysis refresh.
function startHandler(continueAnalysis = false, resetGroupButton = true) {
if (!continueAnalysis) {
setOverrides(undefined);
setUniqueFieldNames([]);
dispatch(resetResults());
setOverrides({
remainingKeywordFieldCandidates: selectedKeywordFieldCandidates,
remainingTextFieldCandidates: selectedTextFieldCandidates,
});
}
// Reset grouping to false and clear all row selections when restarting the analysis.
@ -291,14 +308,14 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
}
const startParams = useMemo(() => {
if (!chartWindowParameters) {
if (!chartWindowParameters || !earliest || !latest) {
return undefined;
}
return {
http,
endpoint: '/internal/aiops/log_rate_analysis',
apiVersion: '2',
apiVersion: '3',
abortCtrl,
body: {
start: earliest,
@ -342,6 +359,7 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
useEffect(() => {
if (startParams) {
dispatch(fetchFieldCandidates(startParams));
dispatch(setCurrentAnalysisType(analysisType));
dispatch(setCurrentAnalysisWindowParameters(chartWindowParameters));
dispatch(startStream(startParams));
@ -369,7 +387,7 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
// Disable the grouping switch toggle only if no groups were found,
// the toggle wasn't enabled already and no fields were selected to be skipped.
const disabledGroupResultsSwitch = !foundGroups && !groupResults && groupSkipFields.length === 0;
const disabledGroupResultsSwitch = !foundGroups && !groupResults;
const toggleButtons = [
{
@ -421,13 +439,15 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
<EuiFlexItem grow={false}>
<FieldFilterPopover
dataTestSubj="aiopsFieldFilterButton"
disabled={!groupResults || isRunning}
disabledApplyButton={isRunning}
disabled={fieldFilterButtonDisabled}
disabledApplyButton={fieldFilterButtonDisabled}
disabledApplyTooltipContent={disabledFieldFilterApplyButtonTooltipContent}
helpText={fieldFilterHelpText}
itemSearchAriaLabel={fieldsButton}
popoverButtonTitle={fieldsButton}
uniqueItemNames={uniqueFieldNames}
selectedItemLimit={1}
uniqueItemNames={fieldFilterUniqueItems}
initialSkippedItems={fieldFilterSkippedItems}
onChange={onFieldsFilterChange}
/>
</EuiFlexItem>
@ -451,13 +471,9 @@ export const LogRateAnalysisResults: FC<LogRateAnalysisResultsProps> = ({
/>
</EuiFlexItem>
</ProgressControls>
{showLogRateAnalysisResultsTable && currentAnalysisType !== undefined && (
<>
<EuiSpacer size="s" />
<LogRateAnalysisTypeCallOut analysisType={currentAnalysisType} />
<EuiSpacer size="xs" />
</>
)}
<LogRateAnalysisTypeCallOut />
{errors.length > 0 ? (
<>
<EuiSpacer size="xs" />

View file

@ -7,24 +7,27 @@
import React, { type FC } from 'react';
import { EuiCallOut, EuiText } from '@elastic/eui';
import { EuiCallOut, EuiSpacer, EuiText } from '@elastic/eui';
import { LOG_RATE_ANALYSIS_TYPE, type LogRateAnalysisType } from '@kbn/aiops-log-rate-analysis';
import { LOG_RATE_ANALYSIS_TYPE } from '@kbn/aiops-log-rate-analysis';
import { useAppSelector } from '@kbn/aiops-log-rate-analysis/state';
import { i18n } from '@kbn/i18n';
interface LogRateAnalysisTypeCallOutProps {
analysisType: LogRateAnalysisType;
}
export const LogRateAnalysisTypeCallOut: FC<LogRateAnalysisTypeCallOutProps> = ({
analysisType,
}) => {
export const LogRateAnalysisTypeCallOut: FC = () => {
const showCallout = useAppSelector((s) => s.logRateAnalysisResults.significantItems.length > 0);
const zeroDocsFallback = useAppSelector((s) => s.logRateAnalysisResults.zeroDocsFallback);
const analysisType = useAppSelector((s) => s.logRateAnalysisResults.currentAnalysisType);
const fieldSelectionMessage = useAppSelector(
(s) => s.logRateAnalysisFieldCandidates.fieldSelectionMessage
);
let callOutTitle: string;
let callOutText: string;
if (!showCallout) {
return null;
}
if (!zeroDocsFallback && analysisType === LOG_RATE_ANALYSIS_TYPE.SPIKE) {
callOutTitle = i18n.translate('xpack.aiops.analysis.analysisTypeSpikeCallOutTitle', {
defaultMessage: 'Analysis type: Log rate spike',
@ -62,13 +65,20 @@ export const LogRateAnalysisTypeCallOut: FC<LogRateAnalysisTypeCallOutProps> = (
}
return (
<EuiCallOut
title={<span data-test-subj="aiopsAnalysisTypeCalloutTitle">{callOutTitle}</span>}
color="primary"
iconType="pin"
size="s"
>
<EuiText size="s">{callOutText}</EuiText>
</EuiCallOut>
<>
<EuiSpacer size="s" />
<EuiCallOut
title={<span data-test-subj="aiopsAnalysisTypeCalloutTitle">{callOutTitle}</span>}
color="primary"
iconType="pin"
size="s"
>
<EuiText size="s">
{callOutText}
{fieldSelectionMessage && ` ${fieldSelectionMessage}`}
</EuiText>
</EuiCallOut>
<EuiSpacer size="xs" />
</>
);
};

View file

@ -25,6 +25,7 @@ import type {
AiopsPluginSetupDeps,
AiopsPluginStartDeps,
} from './types';
import { defineRoute as defineLogRateAnalysisFieldCandidatesRoute } from './routes/log_rate_analysis_field_candidates/define_route';
import { defineRoute as defineLogRateAnalysisRoute } from './routes/log_rate_analysis/define_route';
import { defineRoute as defineCategorizationFieldValidationRoute } from './routes/categorization_field_validation/define_route';
import { registerCasesPersistableState } from './register_cases';
@ -63,6 +64,7 @@ export class AiopsPlugin
// Register server side APIs
void core.getStartServices().then(([coreStart, depsStart]) => {
defineLogRateAnalysisFieldCandidatesRoute(router, aiopsLicense, coreStart, this.usageCounter);
defineLogRateAnalysisRoute(router, aiopsLicense, this.logger, coreStart, this.usageCounter);
defineCategorizationFieldValidationRoute(router, aiopsLicense, this.usageCounter);
});

View file

@ -11,7 +11,10 @@ import {
updateLoadingState,
setZeroDocsFallback,
} from '@kbn/aiops-log-rate-analysis/api/stream_reducer';
import type { AiopsLogRateAnalysisApiVersion as ApiVersion } from '@kbn/aiops-log-rate-analysis/api/schema';
import type {
AiopsLogRateAnalysisSchema,
AiopsLogRateAnalysisApiVersion as ApiVersion,
} from '@kbn/aiops-log-rate-analysis/api/schema';
import { isRequestAbortedError } from '@kbn/aiops-common/is_request_aborted_error';
import { fetchIndexInfo } from '@kbn/aiops-log-rate-analysis/queries/fetch_index_info';
@ -30,89 +33,110 @@ export const indexInfoHandlerFactory =
requestBody,
responseStream,
stateHandler,
version,
} = options;
const fieldCandidates: string[] = [];
let fieldCandidatesCount = fieldCandidates.length;
const keywordFieldCandidates: string[] = [];
let keywordFieldCandidatesCount = keywordFieldCandidates.length;
const textFieldCandidates: string[] = [];
let textFieldCandidatesCount = textFieldCandidates.length;
let zeroDocsFallback = false;
if (!requestBody.overrides?.remainingFieldCandidates) {
logDebugMessage('Fetch index information.');
responseStream.push(
updateLoadingState({
ccsWarning: false,
loaded: stateHandler.loaded(),
loadingState: i18n.translate(
'xpack.aiops.logRateAnalysis.loadingState.loadingIndexInformation',
{
defaultMessage: 'Loading index information.',
}
),
})
logDebugMessage('Fetch index information.');
responseStream.push(
updateLoadingState({
ccsWarning: false,
loaded: stateHandler.loaded(),
loadingState: i18n.translate(
'xpack.aiops.logRateAnalysis.loadingState.loadingIndexInformation',
{
defaultMessage: 'Loading index information.',
}
),
})
);
let skipFieldCandidates = false;
if (version === '2') {
skipFieldCandidates = Array.isArray(
(requestBody as AiopsLogRateAnalysisSchema<'2'>).overrides?.remainingFieldCandidates
);
try {
const indexInfo = await fetchIndexInfo({
esClient,
abortSignal,
arguments: {
...requestBody,
textFieldCandidatesOverrides: ['message', 'error.message'],
},
});
logDebugMessage(`Baseline document count: ${indexInfo.baselineTotalDocCount}`);
logDebugMessage(`Deviation document count: ${indexInfo.deviationTotalDocCount}`);
fieldCandidates.push(...indexInfo.fieldCandidates);
fieldCandidatesCount = fieldCandidates.length;
textFieldCandidates.push(...indexInfo.textFieldCandidates);
textFieldCandidatesCount = textFieldCandidates.length;
zeroDocsFallback = indexInfo.zeroDocsFallback;
} catch (e) {
if (!isRequestAbortedError(e)) {
logger.error(`Failed to fetch index information, got: \n${e.toString()}`);
responseStream.pushError(`Failed to fetch index information.`);
}
responseStream.end();
return;
}
stateHandler.loaded(LOADED_FIELD_CANDIDATES, false);
responseStream.pushPingWithTimeout();
responseStream.push(
updateLoadingState({
ccsWarning: false,
loaded: stateHandler.loaded(),
loadingState: i18n.translate(
'xpack.aiops.logRateAnalysis.loadingState.identifiedFieldCandidates',
{
defaultMessage:
'Identified {fieldCandidatesCount, plural, one {# field candidate} other {# field candidates}}.',
values: {
fieldCandidatesCount: fieldCandidatesCount + textFieldCandidatesCount,
},
}
),
})
);
responseStream.push(setZeroDocsFallback(zeroDocsFallback));
if (fieldCandidatesCount === 0) {
responseStream.endWithUpdatedLoadingState();
} else if (stateHandler.shouldStop()) {
logDebugMessage('shouldStop after fetching field candidates.');
responseStream.end();
return;
}
} else if (version === '3') {
skipFieldCandidates =
Array.isArray(
(requestBody as AiopsLogRateAnalysisSchema<'3'>).overrides
?.remainingKeywordFieldCandidates
) ||
Array.isArray(
(requestBody as AiopsLogRateAnalysisSchema<'3'>).overrides?.remainingTextFieldCandidates
);
}
return { fieldCandidates, textFieldCandidates, zeroDocsFallback };
try {
const indexInfo = await fetchIndexInfo({
esClient,
abortSignal,
arguments: {
...requestBody,
textFieldCandidatesOverrides: ['message', 'error.message'],
skipFieldCandidates,
},
});
logDebugMessage(`Baseline document count: ${indexInfo.baselineTotalDocCount}`);
logDebugMessage(`Deviation document count: ${indexInfo.deviationTotalDocCount}`);
keywordFieldCandidates.push(...indexInfo.keywordFieldCandidates);
keywordFieldCandidatesCount = keywordFieldCandidates.length;
textFieldCandidates.push(...indexInfo.textFieldCandidates);
textFieldCandidatesCount = textFieldCandidates.length;
zeroDocsFallback = indexInfo.zeroDocsFallback;
} catch (e) {
if (!isRequestAbortedError(e)) {
logger.error(`Failed to fetch index information, got: \n${e.toString()}`);
responseStream.pushError(`Failed to fetch index information.`);
}
responseStream.end();
return;
}
stateHandler.loaded(LOADED_FIELD_CANDIDATES, false);
responseStream.pushPingWithTimeout();
responseStream.push(
updateLoadingState({
ccsWarning: false,
loaded: stateHandler.loaded(),
loadingState: i18n.translate(
'xpack.aiops.logRateAnalysis.loadingState.identifiedFieldCandidates',
{
defaultMessage:
'Identified {fieldCandidatesCount, plural, one {# field candidate} other {# field candidates}}.',
values: {
fieldCandidatesCount: keywordFieldCandidatesCount + textFieldCandidatesCount,
},
}
),
})
);
responseStream.push(setZeroDocsFallback(zeroDocsFallback));
if (
!skipFieldCandidates &&
keywordFieldCandidatesCount === 0 &&
textFieldCandidatesCount === 0
) {
responseStream.endWithUpdatedLoadingState();
} else if (stateHandler.shouldStop()) {
logDebugMessage('shouldStop after fetching field candidates.');
responseStream.end();
return;
}
return { keywordFieldCandidates, textFieldCandidates, zeroDocsFallback };
};

View file

@ -44,15 +44,16 @@ export const significantItemsHandlerFactory =
requestBody,
responseStream,
stateHandler,
version,
}: ResponseStreamFetchOptions<T>) =>
async ({
fieldCandidates,
keywordFieldCandidates,
textFieldCandidates,
}: {
fieldCandidates: string[];
keywordFieldCandidates: string[];
textFieldCandidates: string[];
}) => {
let fieldCandidatesCount = fieldCandidates.length;
let keywordFieldCandidatesCount = keywordFieldCandidates.length;
const textFieldCandidatesCount = textFieldCandidates.length;
// This will store the combined count of detected significant log patterns and keywords
@ -61,7 +62,7 @@ export const significantItemsHandlerFactory =
const significantCategories: SignificantItem[] = [];
significantCategories.push(
...((requestBody as AiopsLogRateAnalysisSchema<'2'>).overrides?.significantItems?.filter(
...((requestBody as AiopsLogRateAnalysisSchema<'3'>).overrides?.significantItems?.filter(
(d) => d.type === SIGNIFICANT_ITEM_TYPE.LOG_PATTERN
) ?? [])
);
@ -69,30 +70,63 @@ export const significantItemsHandlerFactory =
const significantTerms: SignificantItem[] = [];
significantTerms.push(
...((requestBody as AiopsLogRateAnalysisSchema<'2'>).overrides?.significantItems?.filter(
...((requestBody as AiopsLogRateAnalysisSchema<'3'>).overrides?.significantItems?.filter(
(d) => d.type === SIGNIFICANT_ITEM_TYPE.KEYWORD
) ?? [])
);
let remainingFieldCandidates: string[];
let loadingStepSizePValues = PROGRESS_STEP_P_VALUES;
let remainingKeywordFieldCandidates: string[];
let remainingTextFieldCandidates: string[];
let loadingStepSizePValues: number;
if (requestBody.overrides?.remainingFieldCandidates) {
fieldCandidates.push(...requestBody.overrides?.remainingFieldCandidates);
remainingFieldCandidates = requestBody.overrides?.remainingFieldCandidates;
fieldCandidatesCount = fieldCandidates.length;
if (requestBody.overrides?.loaded) {
loadingStepSizePValues =
LOADED_FIELD_CANDIDATES +
PROGRESS_STEP_P_VALUES -
(requestBody.overrides?.loaded ?? PROGRESS_STEP_P_VALUES);
LOADED_FIELD_CANDIDATES + PROGRESS_STEP_P_VALUES - requestBody.overrides?.loaded;
} else {
remainingFieldCandidates = fieldCandidates;
loadingStepSizePValues = LOADED_FIELD_CANDIDATES;
}
if (version === '2') {
const overridesRemainingFieldCandidates = (requestBody as AiopsLogRateAnalysisSchema<'2'>)
.overrides?.remainingFieldCandidates;
if (Array.isArray(overridesRemainingFieldCandidates)) {
keywordFieldCandidates.push(...overridesRemainingFieldCandidates);
remainingKeywordFieldCandidates = overridesRemainingFieldCandidates;
keywordFieldCandidatesCount = keywordFieldCandidates.length;
} else {
remainingKeywordFieldCandidates = keywordFieldCandidates;
}
remainingTextFieldCandidates = textFieldCandidates;
} else if (version === '3') {
const overridesRemainingKeywordFieldCandidates = (
requestBody as AiopsLogRateAnalysisSchema<'3'>
).overrides?.remainingKeywordFieldCandidates;
if (Array.isArray(overridesRemainingKeywordFieldCandidates)) {
keywordFieldCandidates.push(...overridesRemainingKeywordFieldCandidates);
remainingKeywordFieldCandidates = overridesRemainingKeywordFieldCandidates;
keywordFieldCandidatesCount = keywordFieldCandidates.length;
} else {
remainingKeywordFieldCandidates = keywordFieldCandidates;
}
const overridesRemainingTextFieldCandidates = (requestBody as AiopsLogRateAnalysisSchema<'3'>)
.overrides?.remainingTextFieldCandidates;
if (Array.isArray(overridesRemainingTextFieldCandidates)) {
textFieldCandidates.push(...overridesRemainingTextFieldCandidates);
remainingTextFieldCandidates = overridesRemainingTextFieldCandidates;
} else {
remainingTextFieldCandidates = textFieldCandidates;
}
}
logDebugMessage('Fetch p-values.');
const loadingStep =
(1 / (fieldCandidatesCount + textFieldCandidatesCount)) * loadingStepSizePValues;
(1 / (keywordFieldCandidatesCount + textFieldCandidatesCount)) * loadingStepSizePValues;
const pValuesQueue = queue(async function (payload: QueueFieldCandidate) {
let queueItemLoadingStep = 0;
@ -124,7 +158,9 @@ export const significantItemsHandlerFactory =
return;
}
remainingFieldCandidates = remainingFieldCandidates.filter((d) => !fieldNames.includes(d));
remainingKeywordFieldCandidates = remainingKeywordFieldCandidates.filter(
(d) => !fieldNames.includes(d)
);
if (pValues.length > 0) {
significantTerms.push(...pValues);
@ -135,17 +171,33 @@ export const significantItemsHandlerFactory =
const { textFieldCandidates: fieldNames } = payload;
queueItemLoadingStep = loadingStep * fieldNames.length;
const significantCategoriesForField = await fetchSignificantCategories({
esClient,
logger,
emitError: responseStream.pushError,
abortSignal,
arguments: {
...requestBody,
fieldNames,
sampleProbability: stateHandler.sampleProbability(),
},
});
let significantCategoriesForField: Awaited<ReturnType<typeof fetchSignificantCategories>>;
try {
significantCategoriesForField = await fetchSignificantCategories({
esClient,
logger,
emitError: responseStream.pushError,
abortSignal,
arguments: {
...requestBody,
fieldNames,
sampleProbability: stateHandler.sampleProbability(),
},
});
} catch (e) {
if (!isRequestAbortedError(e)) {
logger.error(
`Failed to fetch p-values for ${fieldNames.join()}, got: \n${e.toString()}`
);
responseStream.pushError(`Failed to fetch p-values for ${fieldNames.join()}.`);
}
return;
}
remainingTextFieldCandidates = remainingTextFieldCandidates.filter(
(d) => !fieldNames.includes(d)
);
if (significantCategoriesForField.length > 0) {
significantCategories.push(...significantCategoriesForField);
@ -170,7 +222,8 @@ export const significantItemsHandlerFactory =
},
}
),
remainingFieldCandidates,
remainingKeywordFieldCandidates,
remainingTextFieldCandidates,
})
);
}, MAX_CONCURRENT_QUERIES);
@ -182,7 +235,9 @@ export const significantItemsHandlerFactory =
pValuesQueue.push(
[
...chunk(textFieldCandidates, QUEUE_CHUNKING_SIZE).map((d) => ({ textFieldCandidates: d })),
...chunk(fieldCandidates, QUEUE_CHUNKING_SIZE).map((d) => ({ keywordFieldCandidates: d })),
...chunk(keywordFieldCandidates, QUEUE_CHUNKING_SIZE).map((d) => ({
keywordFieldCandidates: d,
})),
],
(err) => {
if (err) {

View file

@ -39,23 +39,33 @@ export const topItemsHandlerFactory =
requestBody,
responseStream,
stateHandler,
version,
}: ResponseStreamFetchOptions<T>) =>
async ({
fieldCandidates,
keywordFieldCandidates,
textFieldCandidates,
}: {
fieldCandidates: string[];
keywordFieldCandidates: string[];
textFieldCandidates: string[];
}) => {
let fieldCandidatesCount = fieldCandidates.length;
let keywordFieldCandidatesCount = keywordFieldCandidates.length;
// This will store the combined count of detected log patterns and keywords
let fieldValuePairsCount = 0;
if (version === '3') {
const overridesRemainingTextFieldCandidates = (requestBody as AiopsLogRateAnalysisSchema<'3'>)
.overrides?.remainingTextFieldCandidates;
if (Array.isArray(overridesRemainingTextFieldCandidates)) {
textFieldCandidates.push(...overridesRemainingTextFieldCandidates);
}
}
const topCategories: SignificantItem[] = [];
topCategories.push(
...((requestBody as AiopsLogRateAnalysisSchema<'2'>).overrides?.significantItems?.filter(
...(requestBody.overrides?.significantItems?.filter(
(d) => d.type === SIGNIFICANT_ITEM_TYPE.LOG_PATTERN
) ?? [])
);
@ -82,32 +92,51 @@ export const topItemsHandlerFactory =
const topTerms: SignificantItem[] = [];
topTerms.push(
...((requestBody as AiopsLogRateAnalysisSchema<'2'>).overrides?.significantItems?.filter(
...((requestBody as AiopsLogRateAnalysisSchema<'3'>).overrides?.significantItems?.filter(
(d) => d.type === SIGNIFICANT_ITEM_TYPE.KEYWORD
) ?? [])
);
const fieldsToSample = new Set<string>();
let remainingFieldCandidates: string[];
let remainingKeywordFieldCandidates: string[];
let loadingStepSizeTopTerms = PROGRESS_STEP_P_VALUES;
if (requestBody.overrides?.remainingFieldCandidates) {
fieldCandidates.push(...requestBody.overrides?.remainingFieldCandidates);
remainingFieldCandidates = requestBody.overrides?.remainingFieldCandidates;
fieldCandidatesCount = fieldCandidates.length;
loadingStepSizeTopTerms =
LOADED_FIELD_CANDIDATES +
PROGRESS_STEP_P_VALUES -
(requestBody.overrides?.loaded ?? PROGRESS_STEP_P_VALUES);
} else {
remainingFieldCandidates = fieldCandidates;
if (version === '2') {
const overridesRemainingFieldCandidates = (requestBody as AiopsLogRateAnalysisSchema<'2'>)
.overrides?.remainingFieldCandidates;
if (Array.isArray(overridesRemainingFieldCandidates)) {
keywordFieldCandidates.push(...overridesRemainingFieldCandidates);
remainingKeywordFieldCandidates = overridesRemainingFieldCandidates;
keywordFieldCandidatesCount = keywordFieldCandidates.length;
loadingStepSizeTopTerms =
LOADED_FIELD_CANDIDATES +
PROGRESS_STEP_P_VALUES -
(requestBody.overrides?.loaded ?? PROGRESS_STEP_P_VALUES);
} else {
remainingKeywordFieldCandidates = keywordFieldCandidates;
}
} else if (version === '3') {
const overridesRemainingKeywordFieldCandidates = (
requestBody as AiopsLogRateAnalysisSchema<'3'>
).overrides?.remainingKeywordFieldCandidates;
if (Array.isArray(overridesRemainingKeywordFieldCandidates)) {
keywordFieldCandidates.push(...overridesRemainingKeywordFieldCandidates);
remainingKeywordFieldCandidates = overridesRemainingKeywordFieldCandidates;
keywordFieldCandidatesCount = keywordFieldCandidates.length;
loadingStepSizeTopTerms =
LOADED_FIELD_CANDIDATES +
PROGRESS_STEP_P_VALUES -
(requestBody.overrides?.loaded ?? PROGRESS_STEP_P_VALUES);
} else {
remainingKeywordFieldCandidates = keywordFieldCandidates;
}
}
logDebugMessage('Fetch p-values.');
logDebugMessage('Fetch top items.');
const topTermsQueue = queue(async function (fieldCandidate: string) {
stateHandler.loaded((1 / fieldCandidatesCount) * loadingStepSizeTopTerms, false);
stateHandler.loaded((1 / keywordFieldCandidatesCount) * loadingStepSizeTopTerms, false);
let fetchedTopTerms: Awaited<ReturnType<typeof fetchTopTerms>>;
@ -129,14 +158,12 @@ export const topItemsHandlerFactory =
return;
}
remainingFieldCandidates = remainingFieldCandidates.filter((d) => d !== fieldCandidate);
remainingKeywordFieldCandidates = remainingKeywordFieldCandidates.filter(
(d) => d !== fieldCandidate
);
if (fetchedTopTerms.length > 0) {
fetchedTopTerms.forEach((d) => {
fieldsToSample.add(d.fieldName);
});
topTerms.push(...fetchedTopTerms);
responseStream.push(addSignificantItems(fetchedTopTerms));
}
@ -154,12 +181,12 @@ export const topItemsHandlerFactory =
},
}
),
remainingFieldCandidates,
remainingKeywordFieldCandidates,
})
);
}, MAX_CONCURRENT_QUERIES);
topTermsQueue.push(fieldCandidates, (err) => {
topTermsQueue.push(keywordFieldCandidates, (err) => {
if (err) {
logger.error(`Failed to fetch p-values.', got: \n${err.toString()}`);
responseStream.pushError(`Failed to fetch p-values.`);

View file

@ -10,6 +10,7 @@ import type { Logger } from '@kbn/logging';
import type { DataRequestHandlerContext } from '@kbn/data-plugin/server';
import type { UsageCounter } from '@kbn/usage-collection-plugin/server';
import { aiopsLogRateAnalysisSchemaV2 } from '@kbn/aiops-log-rate-analysis/api/schema_v2';
import { aiopsLogRateAnalysisSchemaV3 } from '@kbn/aiops-log-rate-analysis/api/schema_v3';
import { AIOPS_API_ENDPOINT } from '@kbn/aiops-common/constants';
import type { AiopsLicense } from '../../types';
@ -18,7 +19,7 @@ import { routeHandlerFactory } from './route_handler_factory';
/**
* `defineRoute` is called in the root `plugin.ts` to set up the API route
* for log pattern analysis. Its purpose is to take care of the route setup
* for log rate analysis. Its purpose is to take care of the route setup
* and versioning only. `routeHandlerFactory` is used to take care of
* the actual route logic.
*/
@ -44,5 +45,16 @@ export const defineRoute = (
},
},
routeHandlerFactory('2', license, logger, coreStart, usageCounter)
)
.addVersion(
{
version: '3',
validate: {
request: {
body: aiopsLogRateAnalysisSchemaV3,
},
},
},
routeHandlerFactory('3', license, logger, coreStart, usageCounter)
);
};

View file

@ -0,0 +1,46 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { CoreStart, IRouter } from '@kbn/core/server';
import type { DataRequestHandlerContext } from '@kbn/data-plugin/server';
import type { UsageCounter } from '@kbn/usage-collection-plugin/server';
import { aiopsLogRateAnalysisSchemaV3 } from '@kbn/aiops-log-rate-analysis/api/schema_v3';
import { AIOPS_API_ENDPOINT } from '@kbn/aiops-common/constants';
import type { AiopsLicense } from '../../types';
import { routeHandlerFactory } from './route_handler_factory';
/**
* `defineRoute` is called in the root `plugin.ts` to set up the API route
* for field candidates. Its purpose is to take care of the route setup
* and versioning only. `routeHandlerFactory` is used to take care of
* the actual route logic.
*/
export const defineRoute = (
router: IRouter<DataRequestHandlerContext>,
license: AiopsLicense,
coreStart: CoreStart,
usageCounter?: UsageCounter
) => {
router.versioned
.post({
path: AIOPS_API_ENDPOINT.LOG_RATE_ANALYSIS_FIELD_CANDIDATES,
access: 'internal',
})
.addVersion(
{
version: '1',
validate: {
request: {
body: aiopsLogRateAnalysisSchemaV3,
},
},
},
routeHandlerFactory('1', license, coreStart, usageCounter)
);
};

View file

@ -0,0 +1,92 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type {
CoreStart,
KibanaRequest,
RequestHandlerContext,
RequestHandler,
KibanaResponseFactory,
} from '@kbn/core/server';
import { createExecutionContext } from '@kbn/ml-route-utils';
import type { UsageCounter } from '@kbn/usage-collection-plugin/server';
import { AIOPS_TELEMETRY_ID, AIOPS_PLUGIN_ID } from '@kbn/aiops-common/constants';
import type {
AiopsLogRateAnalysisSchema,
AiopsLogRateAnalysisApiVersion as ApiVersion,
} from '@kbn/aiops-log-rate-analysis/api/schema';
import { fetchFieldCandidates } from '@kbn/aiops-log-rate-analysis/queries/fetch_field_candidates';
import { AIOPS_API_ENDPOINT } from '@kbn/aiops-common/constants';
import { TEXT_FIELD_SAFE_LIST } from '@kbn/aiops-log-rate-analysis/queries/fetch_field_candidates';
import { trackAIOpsRouteUsage } from '../../lib/track_route_usage';
import type { AiopsLicense } from '../../types';
/**
* The fetch field candidates route handler returns fields suitable for log rate analysis.
*/
export function routeHandlerFactory<T extends ApiVersion>(
version: '1',
license: AiopsLicense,
coreStart: CoreStart,
usageCounter?: UsageCounter
): RequestHandler<unknown, unknown, AiopsLogRateAnalysisSchema<T>> {
return async (
context: RequestHandlerContext,
request: KibanaRequest<unknown, unknown, AiopsLogRateAnalysisSchema<T>>,
response: KibanaResponseFactory
) => {
const { body, events, headers } = request;
trackAIOpsRouteUsage(
`POST ${AIOPS_API_ENDPOINT.LOG_RATE_ANALYSIS_FIELD_CANDIDATES}`,
headers[AIOPS_TELEMETRY_ID.AIOPS_ANALYSIS_RUN_ORIGIN],
usageCounter
);
if (!license.isActivePlatinumLicense) {
return response.forbidden();
}
const esClient = (await context.core).elasticsearch.client.asCurrentUser;
const executionContext = createExecutionContext(coreStart, AIOPS_PLUGIN_ID, request.route.path);
return await coreStart.executionContext.withContext(executionContext, async () => {
const controller = new AbortController();
const abortSignal = controller.signal;
events.aborted$.subscribe(() => {
controller.abort();
});
events.completed$.subscribe(() => {
controller.abort();
});
const textFieldCandidatesOverrides = TEXT_FIELD_SAFE_LIST;
try {
const fieldCandidates = await fetchFieldCandidates({
esClient,
abortSignal,
arguments: {
...body,
textFieldCandidatesOverrides,
},
});
return response.ok({ body: fieldCandidates });
} catch (e) {
return response.customError({
statusCode: 500,
body: {
message: 'Unable to fetch field candidates.',
},
});
}
});
};
}

View file

@ -14,5 +14,6 @@ export default function ({ loadTestFile }: FtrProviderContext) {
loadTestFile(require.resolve('./log_rate_analysis_full_analysis'));
loadTestFile(require.resolve('./log_rate_analysis_groups_only'));
loadTestFile(require.resolve('./log_rate_analysis_no_index'));
loadTestFile(require.resolve('./log_rate_analysis_field_candidates'));
});
}

View file

@ -0,0 +1,68 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import expect from '@kbn/expect';
import type { AiopsLogRateAnalysisSchema } from '@kbn/aiops-log-rate-analysis/api/schema';
import { ELASTIC_HTTP_VERSION_HEADER } from '@kbn/core-http-common';
import type { FetchFieldCandidatesResponse } from '@kbn/aiops-log-rate-analysis/queries/fetch_field_candidates';
import type { FtrProviderContext } from '../../ftr_provider_context';
import { getLogRateAnalysisTestData } from './test_data';
export default ({ getService }: FtrProviderContext) => {
const aiops = getService('aiops');
const supertest = getService('supertest');
const esArchiver = getService('esArchiver');
describe('POST /internal/aiops/log_rate_analysis/field_candidates', () => {
getLogRateAnalysisTestData<'3'>().forEach((testData) => {
describe(`with ${testData.testName}`, () => {
before(async () => {
if (testData.esArchive) {
await esArchiver.loadIfNeeded(testData.esArchive);
} else if (testData.dataGenerator) {
await aiops.logRateAnalysisDataGenerator.generateData(testData.dataGenerator);
}
});
after(async () => {
if (testData.esArchive) {
await esArchiver.unload(testData.esArchive);
} else if (testData.dataGenerator) {
await aiops.logRateAnalysisDataGenerator.removeGeneratedData(testData.dataGenerator);
}
});
async function assertFieldCandidates(data: FetchFieldCandidatesResponse) {
expect(data).to.eql(
testData.expected.fieldCandidates,
`Expected fieldCandidates to be ${JSON.stringify(
testData.expected.fieldCandidates
)}, got ${JSON.stringify(data)}`
);
}
async function requestFieldCandidates(body: AiopsLogRateAnalysisSchema<'3'>) {
const resp = await supertest
.post(`/internal/aiops/log_rate_analysis/field_candidates`)
.set('kbn-xsrf', 'kibana')
.set(ELASTIC_HTTP_VERSION_HEADER, '1')
.send(body)
.expect(200);
await assertFieldCandidates(resp.body);
}
it('should return field candidates', async () => {
await requestFieldCandidates(testData.requestBody);
});
});
});
});
};

View file

@ -11,7 +11,7 @@ import { format as formatUrl } from 'url';
import expect from '@kbn/expect';
import type { AiopsLogRateAnalysisSchema } from '@kbn/aiops-log-rate-analysis/api/schema';
import type { AiopsLogRateAnalysisSchemaSignificantItem } from '@kbn/aiops-log-rate-analysis/api/schema_v2';
import type { AiopsLogRateAnalysisSchemaSignificantItem } from '@kbn/aiops-log-rate-analysis/api/schema_v3';
import { ELASTIC_HTTP_VERSION_HEADER } from '@kbn/core-http-common';
import type { FtrProviderContext } from '../../ftr_provider_context';
@ -37,13 +37,24 @@ export default ({ getService }: FtrProviderContext) => {
getLogRateAnalysisTestData<typeof apiVersion>().forEach((testData) => {
let overrides: AiopsLogRateAnalysisSchema<typeof apiVersion>['overrides'] = {};
overrides = {
loaded: 0,
remainingFieldCandidates: [],
significantItems: testData.expected
.significantItems as AiopsLogRateAnalysisSchemaSignificantItem[],
regroupOnly: true,
} as AiopsLogRateAnalysisSchema<typeof apiVersion>['overrides'];
if (apiVersion === '2') {
overrides = {
loaded: 0,
remainingFieldCandidates: [],
significantItems: testData.expected
.significantItems as AiopsLogRateAnalysisSchemaSignificantItem[],
regroupOnly: true,
} as AiopsLogRateAnalysisSchema<typeof apiVersion>['overrides'];
} else if (apiVersion === '3') {
overrides = {
loaded: 0,
remainingKeywordFieldCandidates: [],
remainingTextFieldCandidates: [],
significantItems: testData.expected
.significantItems as AiopsLogRateAnalysisSchemaSignificantItem[],
regroupOnly: true,
} as AiopsLogRateAnalysisSchema<typeof apiVersion>['overrides'];
}
describe(`with v${apiVersion} - ${testData.testName}`, () => {
before(async () => {

View file

@ -25,7 +25,49 @@ import {
import type { TestData } from './types';
export const API_VERSIONS: ApiVersion[] = ['2'];
export const API_VERSIONS: ApiVersion[] = ['2', '3'];
export const API_VERSIONS_FIELD_CANDIDATES: ApiVersion[] = ['3'];
const expectedEcommerceFieldCandidates = [
'category.keyword',
'currency',
'customer_first_name.keyword',
'customer_full_name.keyword',
'customer_gender',
'customer_id',
'customer_last_name.keyword',
'customer_phone',
'day_of_week',
'email',
'geoip.city_name',
'geoip.continent_name',
'geoip.country_iso_code',
'geoip.region_name',
'manufacturer.keyword',
'order_id',
'products._id.keyword',
'products.category.keyword',
'products.manufacturer.keyword',
'products.product_name.keyword',
'products.sku',
'sku',
'type',
'user',
];
const expectedArtificialLogsFieldCandidates = {
isECS: false,
keywordFieldCandidates: ['response_code', 'url', 'user', 'version'],
selectedKeywordFieldCandidates: ['response_code', 'url', 'user', 'version'],
selectedTextFieldCandidates: [],
textFieldCandidates: [],
};
const expectedArtificialLogsFieldCandidatesWithTextfield = {
...expectedArtificialLogsFieldCandidates,
selectedTextFieldCandidates: ['message'],
textFieldCandidates: ['message'],
};
export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestData<T>> => [
{
@ -76,6 +118,13 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
],
groups: [],
histogramLength: 20,
fieldCandidates: {
isECS: false,
keywordFieldCandidates: expectedEcommerceFieldCandidates,
selectedKeywordFieldCandidates: expectedEcommerceFieldCandidates,
selectedTextFieldCandidates: [],
textFieldCandidates: [],
},
},
},
{
@ -99,6 +148,7 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
significantItems: artificialLogSignificantTerms,
groups: artificialLogsSignificantItemGroups,
histogramLength: 20,
fieldCandidates: expectedArtificialLogsFieldCandidates,
},
},
{
@ -122,6 +172,7 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
significantItems: topTerms,
groups: topTermsGroups,
histogramLength: 20,
fieldCandidates: expectedArtificialLogsFieldCandidates,
},
},
{
@ -145,6 +196,7 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
significantItems: topTerms,
groups: topTermsGroups,
histogramLength: 20,
fieldCandidates: expectedArtificialLogsFieldCandidates,
},
},
{
@ -168,6 +220,7 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
significantItems: [...artificialLogSignificantTerms, ...artificialLogSignificantLogPatterns],
groups: artificialLogsSignificantItemGroupsTextfield,
histogramLength: 20,
fieldCandidates: expectedArtificialLogsFieldCandidatesWithTextfield,
},
},
{
@ -191,6 +244,7 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
significantItems: artificialLogSignificantTerms,
groups: artificialLogsSignificantItemGroups,
histogramLength: 20,
fieldCandidates: expectedArtificialLogsFieldCandidates,
},
},
{
@ -214,6 +268,7 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
significantItems: [...artificialLogSignificantTerms, ...artificialLogSignificantLogPatterns],
groups: artificialLogsSignificantItemGroupsTextfield,
histogramLength: 20,
fieldCandidates: expectedArtificialLogsFieldCandidatesWithTextfield,
},
},
{
@ -237,6 +292,13 @@ export const getLogRateAnalysisTestData = <T extends ApiVersion>(): Array<TestDa
groups: frequentItemSetsLargeArraysGroups,
significantItems: frequentItemSetsLargeArraysSignificantItems,
histogramLength: 1,
fieldCandidates: {
isECS: false,
keywordFieldCandidates: ['items'],
selectedKeywordFieldCandidates: ['items'],
selectedTextFieldCandidates: [],
textFieldCandidates: [],
},
},
},
];

View file

@ -10,6 +10,7 @@ import type {
AiopsLogRateAnalysisApiVersion as ApiVersion,
} from '@kbn/aiops-log-rate-analysis/api/schema';
import type { SignificantItem, SignificantItemGroup } from '@kbn/ml-agg-utils';
import type { FetchFieldCandidatesResponse } from '@kbn/aiops-log-rate-analysis/queries/fetch_field_candidates';
import type { LogRateAnalysisDataGenerator } from '../../../functional/services/aiops/log_rate_analysis_data_generator';
@ -24,5 +25,6 @@ export interface TestData<T extends ApiVersion> {
significantItems: SignificantItem[];
groups: SignificantItemGroup[];
histogramLength: number;
fieldCandidates: FetchFieldCandidatesResponse;
};
}

View file

@ -17,7 +17,7 @@ import type {
import type { FtrProviderContext } from '../../ftr_provider_context';
const API_VERSIONS: ApiVersion[] = ['2'];
const API_VERSIONS: ApiVersion[] = ['3'];
export default ({ getService }: FtrProviderContext) => {
const supertest = getService('supertest');

View file

@ -71,10 +71,7 @@ export const getArtificialLogDataViewTestData = ({
}
function getFieldSelectorPopover() {
if (zeroDocsFallback) {
return [...(textField ? ['message'] : []), 'response_code', 'url', 'user', 'version'];
}
return [...(textField ? ['message'] : []), 'response_code', 'url', 'user'];
return [...(textField ? ['message'] : []), 'response_code', 'url', 'user', 'version'];
}
function getSuiteTitle() {

View file

@ -10,7 +10,7 @@ import { LOG_RATE_ANALYSIS_TYPE } from '@kbn/aiops-log-rate-analysis';
import type { TestData } from '../../types';
export const farequoteDataViewTestDataWithQuery: TestData = {
suiteTitle: 'farequote with spike',
suiteTitle: 'farequote with spike with query',
analysisType: LOG_RATE_ANALYSIS_TYPE.SPIKE,
autoRun: false,
dataGenerator: 'farequote_with_spike',
@ -21,7 +21,7 @@ export const farequoteDataViewTestDataWithQuery: TestData = {
chartClickCoordinates: [0, 0],
columnSelectorSearch: 'p-value',
fieldSelectorSearch: 'airline',
fieldSelectorApplyAvailable: false,
fieldSelectorApplyAvailable: true,
query: 'NOT airline:("SWR" OR "ACA" OR "AWE" OR "BAW" OR "JAL" OR "JBU" OR "JZA" OR "KLM")',
expected: {
totalDocCountFormatted: '48,799',
@ -54,7 +54,7 @@ export const farequoteDataViewTestDataWithQuery: TestData = {
'Log rate change',
'Actions',
],
fieldSelectorPopover: ['airline', 'custom_field.keyword'],
fieldSelectorPopover: ['@version.keyword', 'airline', 'custom_field.keyword', 'type.keyword'],
globalState: {
refreshInterval: { pause: true, value: 60000 },
time: { from: '2016-02-07T00:00:00.000Z', to: '2016-02-11T23:59:54.000Z' },

View file

@ -83,15 +83,21 @@ export const kibanaLogsDataViewTestData: TestData = {
fieldSelectorPopover: [
'agent.keyword',
'clientip',
'event.dataset',
'extension.keyword',
'geo.dest',
'geo.src',
'geo.srcdest',
'host.keyword',
'index.keyword',
'ip',
'machine.os.keyword',
'message',
'referer',
'request',
'response.keyword',
'tags.keyword',
'url',
],
prompt: 'change-point',
},