[APM] Correlations: Update field candidates request. (#186182)

## Summary

Fixes #185875.

Since we created the correlations feature for APM, some new options were
added to the `_field_caps` API which allow us to improve the way we
retrieve field candidates for the analysis.

Previously we used 2 queries to get field candidates: We fetched all
fields via `_field_caps`, then searched for a random sample of 1000 docs
to identify fields with values. Additional code would then filter the
supported fields.

Now we can use additional `_field_caps` options to get rid of the random
docs request and simplify some of the filtering code.

- `filters: '-metadata,-parent'` will exclude metadata and object
fields,
- `include_empty_fields: false` will include populated fields only,
- `index_filter: ...` allows to provide a range filter with `start/end`
to limit the scope of indices,
- `types: ...` allows us to get only fields of the type supported by the
analysis (keyword, boolean, ip).

### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Walter Rafelsberger 2024-07-02 17:26:12 +02:00 committed by GitHub
parent bd9a2ad358
commit 8fc3d3ca1b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 79 additions and 70 deletions

View file

@ -67,8 +67,6 @@ export const FIELD_PREFIX_TO_ADD_AS_CANDIDATE = ['cloud.', 'labels.', 'user_agen
/**
* Other constants
*/
export const POPULATED_DOC_COUNT_SAMPLE_SIZE = 1000;
export const PERCENTILES_STEP = 2;
export const TERMS_SIZE = 20;
export const SIGNIFICANT_FRACTION = 3;

View file

@ -0,0 +1,60 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ProcessorEvent } from '@kbn/observability-plugin/common';
import type { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
import { fetchDurationFieldCandidates } from './fetch_duration_field_candidates';
const mockResponse = {
indices: ['.ds-traces-apm-default-2024.06.17-000001'],
fields: {
'keep.this.field': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'source.ip': {
ip: { type: 'ip', metadata_field: false, searchable: true, aggregatable: true },
},
// fields prefixed with 'observer.' should be ignored (via FIELD_PREFIX_TO_EXCLUDE_AS_CANDIDATE)
'observer.version': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'observer.hostname': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
// example fields to exclude (via FIELDS_TO_EXCLUDE_AS_CANDIDATE)
'agent.name': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
'parent.id': {
keyword: { type: 'keyword', metadata_field: false, searchable: true, aggregatable: true },
},
},
};
const mockApmEventClient = {
fieldCaps: async () => {
return mockResponse;
},
} as unknown as APMEventClient;
describe('fetchDurationFieldCandidates', () => {
it('returns duration field candidates', async () => {
const response = await fetchDurationFieldCandidates({
apmEventClient: mockApmEventClient,
eventType: ProcessorEvent.transaction,
start: 0,
end: 1,
environment: 'ENVIRONMENT_ALL',
query: { match_all: {} },
kuery: '',
});
expect(response).toStrictEqual({
fieldCandidates: ['keep.this.field', 'source.ip'],
});
});
});

View file

@ -8,15 +8,12 @@
import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ES_FIELD_TYPES } from '@kbn/field-types';
import { ProcessorEvent } from '@kbn/observability-plugin/common';
import { rangeQuery } from '@kbn/observability-plugin/server';
import type { CommonCorrelationsQueryParams } from '../../../../common/correlations/types';
import {
FIELD_PREFIX_TO_EXCLUDE_AS_CANDIDATE,
FIELDS_TO_ADD_AS_CANDIDATE,
FIELDS_TO_EXCLUDE_AS_CANDIDATE,
POPULATED_DOC_COUNT_SAMPLE_SIZE,
} from '../../../../common/correlations/constants';
import { hasPrefixToInclude } from '../../../../common/correlations/utils';
import { getCommonCorrelationsQuery } from './get_common_correlations_query';
import { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client';
const SUPPORTED_ES_FIELD_TYPES = [
@ -25,13 +22,6 @@ const SUPPORTED_ES_FIELD_TYPES = [
ES_FIELD_TYPES.BOOLEAN,
];
export const shouldBeExcluded = (fieldName: string) => {
return (
FIELDS_TO_EXCLUDE_AS_CANDIDATE.has(fieldName) ||
FIELD_PREFIX_TO_EXCLUDE_AS_CANDIDATE.some((prefix) => fieldName.startsWith(prefix))
);
};
export interface DurationFieldCandidatesResponse {
fieldCandidates: string[];
}
@ -39,73 +29,34 @@ export interface DurationFieldCandidatesResponse {
export async function fetchDurationFieldCandidates({
apmEventClient,
eventType,
query,
start,
end,
environment,
kuery,
}: CommonCorrelationsQueryParams & {
query: estypes.QueryDslQueryContainer;
apmEventClient: APMEventClient;
eventType: ProcessorEvent.transaction | ProcessorEvent.span;
}): Promise<DurationFieldCandidatesResponse> {
// Get all supported fields
const [respMapping, respRandomDoc] = await Promise.all([
apmEventClient.fieldCaps('get_field_caps', {
apm: {
events: [eventType],
},
fields: '*',
}),
apmEventClient.search('get_random_doc_for_field_candidate', {
apm: {
events: [eventType],
},
body: {
track_total_hits: false,
fields: ['*'],
_source: false,
query: getCommonCorrelationsQuery({
start,
end,
environment,
kuery,
query,
}),
size: POPULATED_DOC_COUNT_SAMPLE_SIZE,
},
}),
]);
const finalFieldCandidates = new Set(FIELDS_TO_ADD_AS_CANDIDATE);
const acceptableFields: Set<string> = new Set();
Object.entries(respMapping.fields).forEach(([key, value]) => {
const fieldTypes = Object.keys(value) as ES_FIELD_TYPES[];
const isSupportedType = fieldTypes.some((type) => SUPPORTED_ES_FIELD_TYPES.includes(type));
// Definitely include if field name matches any of the wild card
if (hasPrefixToInclude(key) && isSupportedType) {
finalFieldCandidates.add(key);
}
// Check if fieldName is something we can aggregate on
if (isSupportedType) {
acceptableFields.add(key);
}
});
const sampledDocs = respRandomDoc.hits.hits.map((d) => d.fields ?? {});
// Get all field names for each returned doc and flatten it
// to a list of unique field names used across all docs
// and filter by list of acceptable fields and some APM specific unique fields.
[...new Set(sampledDocs.map(Object.keys).flat(1))].forEach((field) => {
if (acceptableFields.has(field) && !shouldBeExcluded(field)) {
finalFieldCandidates.add(field);
}
const respMapping = await apmEventClient.fieldCaps('get_field_caps', {
apm: {
events: [eventType],
},
fields: '*',
// We exclude metadata and parent fields as they are not useful for correlations.
// There's an issue in ES (https://github.com/elastic/elasticsearch/issues/109797)
// that describes why we need to add -parent in addition to the types option.
filters: '-metadata,-parent',
include_empty_fields: false,
index_filter: rangeQuery(start, end)[0],
types: SUPPORTED_ES_FIELD_TYPES,
});
return {
fieldCandidates: [...finalFieldCandidates],
fieldCandidates: Object.keys(respMapping.fields).filter((fieldName: string) => {
return (
!FIELDS_TO_EXCLUDE_AS_CANDIDATE.has(fieldName) &&
!FIELD_PREFIX_TO_EXCLUDE_AS_CANDIDATE.some((prefix) => fieldName.startsWith(prefix))
);
}),
};
}