[8.x] [EEM] Replace hashed ID with human readable ID (#193652) (#196902)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[EEM] Replace hashed ID with human readable ID
(#193652)](https://github.com/elastic/kibana/pull/193652)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Milton
Hultgren","email":"milton.hultgren@elastic.co"},"sourceCommit":{"committedDate":"2024-10-18T14:42:38Z","message":"[EEM]
Replace hashed ID with human readable ID (#193652)\n\nThis PR turns the
`entity.id` field format from a hashed value to a\r\nhuman readable
string of the **values** found in the identity fields,\r\nsuch as
`my_host-my_cloud_zone` for the identity fields
`[host.name,\r\ncloud.availability_zone]`.\r\nThe order of the values is
based on the order in the identity
fields\r\nlist.\r\n\r\n---------\r\n\r\nCo-authored-by: kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"ae2c6ad321f2b4318d4114c1309b4420861bcd29","branchLabelMapping":{"^v9.0.0$":"main","^v8.17.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","backport:prev-minor","ci:project-deploy-observability","Feature:EEM"],"title":"[EEM]
Replace hashed ID with human readable
ID","number":193652,"url":"https://github.com/elastic/kibana/pull/193652","mergeCommit":{"message":"[EEM]
Replace hashed ID with human readable ID (#193652)\n\nThis PR turns the
`entity.id` field format from a hashed value to a\r\nhuman readable
string of the **values** found in the identity fields,\r\nsuch as
`my_host-my_cloud_zone` for the identity fields
`[host.name,\r\ncloud.availability_zone]`.\r\nThe order of the values is
based on the order in the identity
fields\r\nlist.\r\n\r\n---------\r\n\r\nCo-authored-by: kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"ae2c6ad321f2b4318d4114c1309b4420861bcd29"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.0.0","branchLabelMappingKey":"^v9.0.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/193652","number":193652,"mergeCommit":{"message":"[EEM]
Replace hashed ID with human readable ID (#193652)\n\nThis PR turns the
`entity.id` field format from a hashed value to a\r\nhuman readable
string of the **values** found in the identity fields,\r\nsuch as
`my_host-my_cloud_zone` for the identity fields
`[host.name,\r\ncloud.availability_zone]`.\r\nThe order of the values is
based on the order in the identity
fields\r\nlist.\r\n\r\n---------\r\n\r\nCo-authored-by: kibanamachine
<42973632+kibanamachine@users.noreply.github.com>","sha":"ae2c6ad321f2b4318d4114c1309b4420861bcd29"}}]}]
BACKPORT-->

Co-authored-by: Milton Hultgren <milton.hultgren@elastic.co>
This commit is contained in:
Kibana Machine 2024-10-19 03:25:33 +11:00 committed by GitHub
parent a6216c5582
commit ef9f373008
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 59 additions and 178 deletions

View file

@ -91,7 +91,7 @@ describe('checking migration metadata changes on all registered SO types', () =>
"endpoint:unified-user-artifact-manifest": "71c7fcb52c658b21ea2800a6b6a76972ae1c776e",
"endpoint:user-artifact-manifest": "1c3533161811a58772e30cdc77bac4631da3ef2b",
"enterprise_search_telemetry": "9ac912e1417fc8681e0cd383775382117c9e3d3d",
"entity-definition": "e3811fd5fbb878d170067c0d6897a2e63010af36",
"entity-definition": "1c6bff35c423d5dc5650bc806cf2899e4706a0bc",
"entity-discovery-api-key": "c267a65c69171d1804362155c1378365f5acef88",
"entity-engine-status": "8cb7dcb13f5e2ea8f2e08dd4af72c110e2051120",
"epm-packages": "8042d4a1522f6c4e6f5486e791b3ffe3a22f88fd",

View file

@ -145,7 +145,7 @@ export type MetadataField = z.infer<typeof metadataSchema>;
export const identityFieldsSchema = z
.object({
field: z.string(),
optional: z.boolean(),
optional: z.literal(false),
})
.or(z.string().transform((value) => ({ field: value, optional: false })));

View file

@ -7,14 +7,11 @@ Entity definitions are a core concept of the entity model. They define the way t
> [!NOTE]
> Entity definitions are based on transform and as such a subset of the configuration is tightly coupled to transform settings. While we provide defaults for these settings, one can still update properties such as `frequency`, `sync.time.delay` and `sync.time.field` (see [transform documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/put-transform.html)).
When creating a definition (see [entity definition schema](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L21)), entity manager will create two transforms to collect entities based on the configured [identityFields](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L29):
- the history transform creates a snapshot of entities over time, reading documents from the configured source indices and grouping them by the identity fields and a date histogram. For a given entity the transform creates at most one document per interval (configured by the `history.settings.interval` setting), with its associated metrics and metadata fields aggregated over that interval. While metrics support [multiple aggregations](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/common.ts#L13), metadata use a `terms` aggregation (to be expanded by https://github.com/elastic/elastic-entity-model/issues/130). To limit the amount of data processed when created, history transform accepts a `history.settings.lookbackPeriod` that defaults to 1h.
- the summary transform creates one document per entity, reading documents from the history transform output indices. Each entity document gets overwritten over time, updating metadata and metrics with the following rules: metrics get the value of the most recent history document while metadata are aggregated over a computed period that attempts to limit the amount of data it looks at.
When creating a definition (see [entity definition schema](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L21)), entity manager will create a transforms to collect entities based on the configured [identityFields](https://github.com/elastic/kibana/blob/main/x-pack/packages/kbn-entities-schema/src/schema/entity_definition.ts#L29).
The transform creates one document per entity, reading documents from the configured source indices and grouping them by the identity fields. Each entity document gets overwritten each time the transform runs.
The definition allows defining an optional backfill transform. This works on the principle that transforms only capture an immutable snapshot of the data at the time they execute. If data is ingested with delay and falls in a bucket that was already covered by a previous [transform checkpoint](https://www.elastic.co/guide/en/elasticsearch/reference/current/transform-checkpoints.html), the data will never be transformed in the output. Ideally one would sync the transform on the [event.ingested field](https://www.elastic.co/guide/en/elasticsearch/reference/current/transform-checkpoints.html#sync-field-ingest-timestamp) to work with delayed data, when that is not possible or desirable the backfill transform can be a fallback. Backfill transform will output its data to the same history indice, because transform uses deterministic ids for the generated document, it will not create duplicate but instead upsert documents from the initial history transform pass.
To enable the backfill transform set a value to `history.settings.backfillSyncDelay` higher than the `history.settings.syncDelay`. The backfill lookback and frequency can also be configured.
History and summary transforms will output their data to indices where history writes to time-based (monthly) indices (`.entities.v1.history.<definition-id>.<yyyy-MM-dd>`) and summary writes to a unique indice (`.entities.v1.latest.<definition-id>`). For convenience we create type-based aliases on top on these indices, where the type is extracted from the `entityDefinition.type` property. For a definition of `type: service`, the data can be read through the `entities-service-history` and `entities-service-latest` aliases.
The transforms outputs the data to a unique index (`.entities.v1.latest.<definition-id>`).
For convenience we create type-based aliases on top on these indices, where the type is extracted from the `entityDefinition.type` property. For a definition of `type: service`, the data can be read through the `entities-service-history` and `entities-service-latest` aliases.
#### Iterating on a definition
@ -48,7 +45,7 @@ Let's look at the most basic example, one that only discovers entities.
```
This definition will look inside the `logs-*` index pattern for documents that container the field `host.name` and group them based on that value to create the entities. It will run the discovery every 2 minutes.
The documents will be of type "host" so they can be queried via `entities-host-history` or `entities-host-latest`. Beyond the basic `entity` fields, each entity document will also contain all the identify fields at the root of the document, this it is easy to find your hosts by filtering by `host.name`. Note that it is not necessary to add the `identifyFields` as metadata as these will be automatically collected in the output documents, and that it is possible to set `identityFields` as optional.
The documents will be of type "host" so they can be queried via `entities-host-history` or `entities-host-latest`. Beyond the basic `entity` fields, each entity document will also contain all the identify fields at the root of the document, this it is easy to find your hosts by filtering by `host.name`. Note that it is not necessary to add the `identifyFields` as metadata as these will be automatically collected in the output documents.
An entity document for this definition will look like below.
@ -213,8 +210,7 @@ __service_from_logs definition__
"indexPatterns": ["logs-*"],
/** the field/combination of fields identifying an entity **/
"identityFields": [
"service.name", // == { "field": "service.name", "optional": false }
{ "field": "service.environment", "optional": true }
"service.name",
],
"displayNameTemplate": "{{service.name}}{{#service.environment}}:{{.}}{{/service.environment}}", // <a href="https://mustache.github.io/">mustache</a> template
/**

View file

@ -16,8 +16,8 @@ export const builtInEntityDefinition = entityDefinitionSchema.parse({
latest: {
timestampField: '@timestamp',
},
identityFields: ['log.logger', { field: 'event.category', optional: true }],
displayNameTemplate: '{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}',
identityFields: ['log.logger'],
displayNameTemplate: '{{log.logger}}',
metadata: ['tags', 'host.name', 'host.os.name', { source: '_index', destination: 'sourceIndex' }],
metrics: [],
});

View file

@ -20,8 +20,8 @@ export const rawEntityDefinition = {
syncDelay: '10s',
},
},
identityFields: ['log.logger', { field: 'event.category', optional: true }],
displayNameTemplate: '{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}',
identityFields: ['log.logger'],
displayNameTemplate: '{{log.logger}}',
metadata: ['tags', 'host.name', 'host.os.name', { source: '_index', destination: 'sourceIndex' }],
metrics: [
{

View file

@ -37,52 +37,13 @@ Array [
"field": "entity.identityFields",
"value": Array [
"log.logger",
"event.category",
],
},
},
Object {
"script": Object {
"description": "Generated the entity.id field",
"source": "// This function will recursively collect all the values of a HashMap of HashMaps
Collection collectValues(HashMap subject) {
Collection values = new ArrayList();
// Iterate through the values
for(Object value: subject.values()) {
// If the value is a HashMap, recurse
if (value instanceof HashMap) {
values.addAll(collectValues((HashMap) value));
} else {
values.add(String.valueOf(value));
}
}
return values;
}
// Create the string builder
StringBuilder entityId = new StringBuilder();
if (ctx[\\"entity\\"][\\"identity\\"] != null) {
// Get the values as a collection
Collection values = collectValues(ctx[\\"entity\\"][\\"identity\\"]);
// Convert to a list and sort
List sortedValues = new ArrayList(values);
Collections.sort(sortedValues);
// Create comma delimited string
for(String instanceValue: sortedValues) {
entityId.append(instanceValue);
entityId.append(\\":\\");
}
// Assign the entity.id
ctx[\\"entity\\"][\\"id\\"] = entityId.length() > 0 ? entityId.substring(0, entityId.length() - 1) : \\"unknown\\";
}",
},
},
Object {
"fingerprint": Object {
"fields": Array [
"entity.id",
],
"method": "MurmurHash3",
"target_field": "entity.id",
"set": Object {
"field": "entity.id",
"value": "{{{entity.identity.log.logger}}}",
},
},
Object {
@ -123,13 +84,6 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) {
"value": "{{entity.identity.log.logger}}",
},
},
Object {
"set": Object {
"field": "event.category",
"if": "ctx.entity?.identity?.event?.category != null",
"value": "{{entity.identity.event.category}}",
},
},
Object {
"remove": Object {
"field": "entity.identity",
@ -139,7 +93,7 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) {
Object {
"set": Object {
"field": "entity.displayName",
"value": "{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}",
"value": "{{log.logger}}",
},
},
Object {
@ -188,52 +142,13 @@ Array [
"field": "entity.identityFields",
"value": Array [
"log.logger",
"event.category",
],
},
},
Object {
"script": Object {
"description": "Generated the entity.id field",
"source": "// This function will recursively collect all the values of a HashMap of HashMaps
Collection collectValues(HashMap subject) {
Collection values = new ArrayList();
// Iterate through the values
for(Object value: subject.values()) {
// If the value is a HashMap, recurse
if (value instanceof HashMap) {
values.addAll(collectValues((HashMap) value));
} else {
values.add(String.valueOf(value));
}
}
return values;
}
// Create the string builder
StringBuilder entityId = new StringBuilder();
if (ctx[\\"entity\\"][\\"identity\\"] != null) {
// Get the values as a collection
Collection values = collectValues(ctx[\\"entity\\"][\\"identity\\"]);
// Convert to a list and sort
List sortedValues = new ArrayList(values);
Collections.sort(sortedValues);
// Create comma delimited string
for(String instanceValue: sortedValues) {
entityId.append(instanceValue);
entityId.append(\\":\\");
}
// Assign the entity.id
ctx[\\"entity\\"][\\"id\\"] = entityId.length() > 0 ? entityId.substring(0, entityId.length() - 1) : \\"unknown\\";
}",
},
},
Object {
"fingerprint": Object {
"fields": Array [
"entity.id",
],
"method": "MurmurHash3",
"target_field": "entity.id",
"set": Object {
"field": "entity.id",
"value": "{{{entity.identity.log.logger}}}",
},
},
Object {
@ -274,13 +189,6 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) {
"value": "{{entity.identity.log.logger}}",
},
},
Object {
"set": Object {
"field": "event.category",
"if": "ctx.entity?.identity?.event?.category != null",
"value": "{{entity.identity.event.category}}",
},
},
Object {
"remove": Object {
"field": "entity.identity",
@ -290,7 +198,7 @@ if (ctx.entity?.metadata?.sourceIndex?.data != null) {
Object {
"set": Object {
"field": "entity.displayName",
"value": "{{log.logger}}{{#event.category}}:{{.}}{{/event.category}}",
"value": "{{log.logger}}",
},
},
Object {

View file

@ -140,52 +140,13 @@ export function generateLatestProcessors(definition: EntityDefinition) {
},
},
{
script: {
description: 'Generated the entity.id field',
source: cleanScript(`
// This function will recursively collect all the values of a HashMap of HashMaps
Collection collectValues(HashMap subject) {
Collection values = new ArrayList();
// Iterate through the values
for(Object value: subject.values()) {
// If the value is a HashMap, recurse
if (value instanceof HashMap) {
values.addAll(collectValues((HashMap) value));
} else {
values.add(String.valueOf(value));
}
}
return values;
}
// Create the string builder
StringBuilder entityId = new StringBuilder();
if (ctx["entity"]["identity"] != null) {
// Get the values as a collection
Collection values = collectValues(ctx["entity"]["identity"]);
// Convert to a list and sort
List sortedValues = new ArrayList(values);
Collections.sort(sortedValues);
// Create comma delimited string
for(String instanceValue: sortedValues) {
entityId.append(instanceValue);
entityId.append(":");
}
// Assign the entity.id
ctx["entity"]["id"] = entityId.length() > 0 ? entityId.substring(0, entityId.length() - 1) : "unknown";
}
`),
},
},
{
fingerprint: {
fields: ['entity.id'],
target_field: 'entity.id',
method: 'MurmurHash3',
set: {
field: 'entity.id',
value: definition.identityFields
.map((identityField) => identityField.field)
.sort()
.map((identityField) => `{{{entity.identity.${identityField}}}}`)
.join('-'),
},
},
...(definition.staticFields != null

View file

@ -139,16 +139,9 @@ Object {
},
},
"group_by": Object {
"entity.identity.event.category": Object {
"terms": Object {
"field": "event.category",
"missing_bucket": true,
},
},
"entity.identity.log.logger": Object {
"terms": Object {
"field": "log.logger",
"missing_bucket": false,
},
},
},

View file

@ -32,13 +32,9 @@ export function generateLatestTransform(
filter.push(getElasticsearchQueryOrThrow(definition.filter));
}
if (definition.identityFields.some(({ optional }) => !optional)) {
definition.identityFields
.filter(({ optional }) => !optional)
.forEach(({ field }) => {
filter.push({ exists: { field } });
});
}
definition.identityFields.forEach(({ field }) => {
filter.push({ exists: { field } });
});
filter.push({
range: {
@ -108,7 +104,7 @@ const generateTransformPutRequest = ({
(acc, id) => ({
...acc,
[`entity.identity.${id.field}`]: {
terms: { field: id.field, missing_bucket: id.optional },
terms: { field: id.field },
},
}),
{}

View file

@ -5,7 +5,11 @@
* 2.0.
*/
import { SavedObjectModelDataBackfillFn } from '@kbn/core-saved-objects-server';
import {
SavedObjectModelDataBackfillFn,
SavedObjectModelTransformationDoc,
SavedObjectModelUnsafeTransformFn,
} from '@kbn/core-saved-objects-server';
import { SavedObject, SavedObjectsType } from '@kbn/core/server';
import { EntityDefinition } from '@kbn/entities-schema';
import {
@ -35,6 +39,20 @@ export const backfillInstalledComponents: SavedObjectModelDataBackfillFn<
return savedObject;
};
const removeOptionalIdentityFields: SavedObjectModelUnsafeTransformFn<
EntityDefinition,
EntityDefinition
> = (savedObject) => {
// Doing only this may break displayNameTemplates
savedObject.attributes.identityFields = savedObject.attributes.identityFields.filter(
(identityField) => identityField.optional === false
);
return {
document: savedObject as SavedObjectModelTransformationDoc<EntityDefinition>,
};
};
export const entityDefinition: SavedObjectsType = {
name: SO_ENTITY_DEFINITION_TYPE,
hidden: false,
@ -97,5 +115,13 @@ export const entityDefinition: SavedObjectsType = {
},
],
},
'4': {
changes: [
{
type: 'unsafe_transform',
transformFn: removeOptionalIdentityFields,
},
],
},
},
};

View file

@ -169,6 +169,7 @@ export default function ({ getService }: FtrProviderContext) {
const parsedSample = entityLatestSchema.safeParse(sample.hits.hits[0]._source);
expect(parsedSample.success).to.be(true);
expect(parsedSample.data?.entity.id).to.be('admin-console');
});
it('should delete entities data when specified', async () => {