[8.6] [Security Solution][Alerts] improves performance of new terms multi fields (#145167) (#145697)

# Backport This will backport the following commits from `main` to `8.6`: - [[Security Solution][Alerts] improves performance of new terms multi fields (#145167)](https://github.com/elastic/kibana/pull/145167)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Vitalii Dmyterko <92328789+vitaliidm@users.noreply.github.com>
2025-04-24 09:48:58 -04:00 · 2022-11-18 05:28:38 -05:00 · 2022-11-18 05:28:38 -05:00 · d9d8b15a90
commit d9d8b15a90
parent 423d5a58ed
6 changed files with 1800 additions and 21 deletions
--- a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/README.md
+++ b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/README.md
@ -27,4 +27,7 @@ The new terms rule type reuses the singleSearchAfter function which implements t
 ## Limitations and future enhancements

 - Value list exceptions are not supported at the moment. Commit ead04ce removes an experimental method I tried for evaluating value list exceptions.
- Runtime field supports only 100 emitted values. So for large arrays or combination of values greater than 100, results may not be exhaustive. This applies only to new terms with multiple fields
+- Runtime field supports only 100 emitted values. So for large arrays or combination of values greater than 100, results may not be exhaustive. This applies only to new terms with multiple fields.
+  Following edge cases possible:
+    - false negatives (alert is not generated) if too many fields were emitted and actual new values are not getting evaluated if it happened in document in rule run window.
+    - false positives (wrong alert generated) if too many fields were emitted in historical document and some old terms are not getting evaluated against values in new documents.
--- a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/create_new_terms_alert_type.ts
+++ b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/create_new_terms_alert_type.ts
@ -193,6 +193,11 @@ export const createNewTermsAlertType = (
        }
        const bucketsForField = searchResultWithAggs.aggregations.new_terms.buckets;
        const includeValues = transformBucketsToValues(params.newTermsFields, bucketsForField);
+        const newTermsRuntimeMappings = getNewTermsRuntimeMappings(
+          params.newTermsFields,
+          bucketsForField
+        );
+
        // PHASE 2: Take the page of results from Phase 1 and determine if each term exists in the history window.
        // The aggregation filters out buckets for terms that exist prior to `tuple.from`, so the buckets in the
        // response correspond to each new term.
@ -209,7 +214,7 @@ export const createNewTermsAlertType = (
          }),
          runtimeMappings: {
            ...runtimeMappings,
-            ...getNewTermsRuntimeMappings(params.newTermsFields),
+            ...newTermsRuntimeMappings,
          },
          searchAfterSortIds: undefined,
          index: inputIndex,
@ -255,7 +260,7 @@ export const createNewTermsAlertType = (
            }),
            runtimeMappings: {
              ...runtimeMappings,
-              ...getNewTermsRuntimeMappings(params.newTermsFields),
+              ...newTermsRuntimeMappings,
            },
            searchAfterSortIds: undefined,
            index: inputIndex,
--- a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.test.ts
+++ b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.test.ts
@ -12,6 +12,7 @@ import {
  getAggregationField,
  decodeMatchedValues,
  getNewTermsRuntimeMappings,
+  createFieldValuesMap,
  AGG_FIELD_NAME,
 } from './utils';

@ -190,22 +191,185 @@ describe('new terms utils', () => {

  describe('getNewTermsRuntimeMappings', () => {
    it('should not return runtime field if new terms fields is empty', () => {
-      expect(getNewTermsRuntimeMappings([])).toBeUndefined();
+      expect(getNewTermsRuntimeMappings([], [])).toBeUndefined();
    });
    it('should not return runtime field if new terms fields has only one field', () => {
-      expect(getNewTermsRuntimeMappings(['host.name'])).toBeUndefined();
+      expect(getNewTermsRuntimeMappings(['host.name'], [])).toBeUndefined();
    });

    it('should return runtime field if new terms fields has more than one field', () => {
-      const runtimeMappings = getNewTermsRuntimeMappings(['host.name', 'host.ip']);
+      const runtimeMappings = getNewTermsRuntimeMappings(
+        ['source.host', 'source.ip'],
+        [
+          {
+            key: {
+              'source.host': 'host-0',
+              'source.ip': '127.0.0.1',
+            },
+            doc_count: 1,
+          },
+          {
+            key: {
+              'source.host': 'host-1',
+              'source.ip': '127.0.0.1',
+            },
+            doc_count: 1,
+          },
+        ]
+      );

      expect(runtimeMappings?.[AGG_FIELD_NAME]).toMatchObject({
        type: 'keyword',
        script: {
-          params: { fields: ['host.name', 'host.ip'] },
+          params: {
+            fields: ['source.host', 'source.ip'],
+            values: {
+              'source.host': {
+                'host-0': true,
+                'host-1': true,
+              },
+              'source.ip': {
+                '127.0.0.1': true,
+              },
+            },
+          },
          source: expect.any(String),
        },
      });
    });
  });
 });
+
+describe('createFieldValuesMap', () => {
+  it('should return undefined if new terms fields has only one field', () => {
+    expect(
+      createFieldValuesMap(
+        ['host.name'],
+        [
+          {
+            key: {
+              'source.host': 'host-0',
+            },
+            doc_count: 1,
+          },
+          {
+            key: {
+              'source.host': 'host-1',
+            },
+            doc_count: 3,
+          },
+        ]
+      )
+    ).toBeUndefined();
+  });
+
+  it('should return values map if new terms fields has more than one field', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'source.ip'],
+        [
+          {
+            key: {
+              'source.host': 'host-0',
+              'source.ip': '127.0.0.1',
+            },
+            doc_count: 1,
+          },
+          {
+            key: {
+              'source.host': 'host-1',
+              'source.ip': '127.0.0.1',
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-0': true,
+        'host-1': true,
+      },
+      'source.ip': {
+        '127.0.0.1': true,
+      },
+    });
+  });
+
+  it('should not put value in map if it is null', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'source.ip'],
+        [
+          {
+            key: {
+              'source.host': 'host-1',
+              'source.ip': null,
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-1': true,
+      },
+      'source.ip': {},
+    });
+  });
+
+  it('should put value in map if it is a number', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'source.id'],
+        [
+          {
+            key: {
+              'source.host': 'host-1',
+              'source.id': 100,
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-1': true,
+      },
+      'source.id': {
+        '100': true,
+      },
+    });
+  });
+
+  it('should put value in map if it is a boolean', () => {
+    expect(
+      createFieldValuesMap(
+        ['source.host', 'user.enabled'],
+        [
+          {
+            key: {
+              'source.host': 'host-1',
+              'user.enabled': true,
+            },
+            doc_count: 1,
+          },
+          {
+            key: {
+              'source.host': 'host-1',
+              'user.enabled': false,
+            },
+            doc_count: 1,
+          },
+        ]
+      )
+    ).toEqual({
+      'source.host': {
+        'host-1': true,
+      },
+      'user.enabled': {
+        true: true,
+        false: true,
+      },
+    });
+  });
+});
--- a/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.ts
+++ b/x-pack/plugins/security_solution/server/lib/detection_engine/rule_types/new_terms/utils.ts
@ -80,19 +80,55 @@ export const transformBucketsToValues = (
    );
 };

+/**
+ * transforms arrays of new terms fields and its values in object
+ * [new_terms_field]: { [value1]: true, [value1]: true  }
+ * It's needed to have constant time complexity of accessing whether value is present in new terms
+ * It will be passed to Painless script used in runtime field
+ */
+export const createFieldValuesMap = (
+  newTermsFields: string[],
+  buckets: estypes.AggregationsCompositeBucket[]
+) => {
+  if (newTermsFields.length === 1) {
+    return undefined;
+  }
+
+  const valuesMap = newTermsFields.reduce<Record<string, Record<string, boolean>>>(
+    (acc, field) => ({ ...acc, [field]: {} }),
+    {}
+  );
+
+  buckets
+    .map((bucket) => bucket.key)
+    .forEach((bucket) => {
+      Object.entries(bucket).forEach(([key, value]) => {
+        if (value == null) {
+          return;
+        }
+        const strValue = typeof value !== 'string' ? value.toString() : value;
+        valuesMap[key][strValue] = true;
+      });
+    });
+
+  return valuesMap;
+};
+
 export const getNewTermsRuntimeMappings = (
-  newTermsFields: string[]
+  newTermsFields: string[],
+  buckets: estypes.AggregationsCompositeBucket[]
 ): undefined | { [AGG_FIELD_NAME]: estypes.MappingRuntimeField } => {
  // if new terms include only one field we don't use runtime mappings and don't stich fields buckets together
  if (newTermsFields.length <= 1) {
    return undefined;
  }

+  const values = createFieldValuesMap(newTermsFields, buckets);
  return {
    [AGG_FIELD_NAME]: {
      type: 'keyword',
      script: {
-        params: { fields: newTermsFields },
+        params: { fields: newTermsFields, values },
        source: `
          def stack = new Stack();
          // ES has limit in 100 values for runtime field, after this query will fail
@ -110,9 +146,14 @@ export const getNewTermsRuntimeMappings = (
                emit(line);
                emitLimit = emitLimit - 1;
              } else {
-                for (field in doc[params['fields'][index]]) {
+                def fieldName = params['fields'][index];
+                for (field in doc[fieldName]) {
+                    def fieldStr = String.valueOf(field);
+                    if (!params['values'][fieldName].containsKey(fieldStr)) {
+                      continue;
+                    }
                    def delimiter = index === 0 ? '' : '${DELIMITER}';
-                    def nextLine = line + delimiter + String.valueOf(field).encodeBase64();
+                    def nextLine = line + delimiter + fieldStr.encodeBase64();
          
                    stack.add([index + 1, nextLine])
                }
--- a/x-pack/test/detection_engine_api_integration/security_and_spaces/rule_execution_logic/mocks/new_terms.ts
+++ b/x-pack/test/detection_engine_api_integration/security_and_spaces/rule_execution_logic/mocks/new_terms.ts
--- a/x-pack/test/detection_engine_api_integration/security_and_spaces/rule_execution_logic/new_terms.ts
+++ b/x-pack/test/detection_engine_api_integration/security_and_spaces/rule_execution_logic/new_terms.ts
@ -28,6 +28,8 @@ import { FtrProviderContext } from '../../common/ftr_provider_context';
 import { previewRuleWithExceptionEntries } from '../../utils/preview_rule_with_exception_entries';
 import { deleteAllExceptions } from '../../../lists_api_integration/utils';

+import { largeArraysBuckets } from './mocks/new_terms';
+
 const removeRandomValuedProperties = (alert: DetectionAlert | undefined) => {
  if (!alert) {
    return undefined;
@ -608,12 +610,45 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'first_doc' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['host.name', 'host.ip']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['host.name', 'host.ip'],
+            [
+              {
+                key: {
+                  'host.name': 'host-0',
+                  'host.ip': '127.0.0.1',
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits[0].fields?.[AGG_FIELD_NAME]).to.eql(expectedEncodedValues);
      });

+      it('should not return runtime field created from 2 single values if its value is not in buckets', async () => {
+        const { hits } = await performSearchQuery({
+          es,
+          query: { match: { id: 'first_doc' } },
+          index: 'new_terms',
+          fields: [AGG_FIELD_NAME],
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['host.name', 'host.ip'],
+            [
+              {
+                key: {
+                  'host.name': 'host-0',
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
+        });
+
+        expect(hits.hits[0].fields?.[AGG_FIELD_NAME]).to.be(undefined);
+      });
+
      it('should return runtime field created from 2 single values, including number value', async () => {
        // encoded base64 values of "user-0" and  0 joined with underscore
        const expectedEncodedValues = ['dXNlci0w_MA=='];
@ -622,7 +657,18 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'first_doc' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['user.name', 'user.id']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['user.name', 'user.id'],
+            [
+              {
+                key: {
+                  'user.name': 'user-0',
+                  'user.id': 0,
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits[0].fields?.[AGG_FIELD_NAME]).to.eql(expectedEncodedValues);
@ -636,7 +682,18 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'first_doc' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['user.name', 'user.enabled']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['user.name', 'user.enabled'],
+            [
+              {
+                key: {
+                  'user.name': 'user-0',
+                  'user.enabled': true,
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits[0].fields?.[AGG_FIELD_NAME]).to.eql(expectedEncodedValues);
@ -650,7 +707,19 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'first_doc' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['host.name', 'host.ip', 'user.name']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['host.name', 'host.ip', 'user.name'],
+            [
+              {
+                key: {
+                  'host.name': 'host-0',
+                  'host.ip': '127.0.0.1',
+                  'user.name': 'user-0',
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits[0].fields?.[AGG_FIELD_NAME]).to.eql(expectedEncodedValues);
@ -672,7 +741,53 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'doc_with_source_ip_as_array' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['source.ip', 'tags']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['source.ip', 'tags'],
+            [
+              {
+                key: {
+                  tags: 'tag-new-1',
+                  'source.ip': '192.168.1.1',
+                },
+                doc_count: 1,
+              },
+              {
+                key: {
+                  tags: 'tag-2',
+                  'source.ip': '192.168.1.1',
+                },
+                doc_count: 1,
+              },
+              {
+                key: {
+                  tags: 'tag-new-3',
+                  'source.ip': '192.168.1.1',
+                },
+                doc_count: 1,
+              },
+              {
+                key: {
+                  tags: 'tag-new-1',
+                  'source.ip': '192.168.1.2',
+                },
+                doc_count: 1,
+              },
+              {
+                key: {
+                  tags: 'tag-2',
+                  'source.ip': '192.168.1.2',
+                },
+                doc_count: 1,
+              },
+              {
+                key: {
+                  tags: 'tag-new-3',
+                  'source.ip': '192.168.1.2',
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits[0].fields?.[AGG_FIELD_NAME]).to.eql(expectedEncodedValues);
@ -687,7 +802,25 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'doc_with_duplicated_tags' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['host.name', 'tags']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['host.name', 'tags'],
+            [
+              {
+                key: {
+                  tags: 'tag-1',
+                  'host.name': 'host-0',
+                },
+                doc_count: 1,
+              },
+              {
+                key: {
+                  tags: 'tag-2',
+                  'host.name': 'host-0',
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits[0].fields?.[AGG_FIELD_NAME]).to.eql(expectedEncodedValues);
@ -699,7 +832,18 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'doc_with_null_field' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME, 'possibly_null_field', 'host.name'],
-          runtimeMappings: getNewTermsRuntimeMappings(['host.name', 'possibly_null_field']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['host.name', 'possibly_null_field'],
+            [
+              {
+                key: {
+                  'host.name': 'host-0',
+                  possibly_null_field: null,
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits.length).to.be(1);
@ -708,13 +852,23 @@ export default ({ getService }: FtrProviderContext) => {
        expect(hits.hits[0].fields?.['host.name']).to.eql(['host-0']);
      });

-      it('should not return runtime field if one of fields is not defined', async () => {
+      it('should not return runtime field if one of fields is not defined in a document', async () => {
        const { hits } = await performSearchQuery({
          es,
          query: { match: { id: 'doc_without_large_arrays' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['host.name', 'large_array_5']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['host.name', 'large_array_5'],
+            [
+              {
+                key: {
+                  'host.name': 'host-0',
+                },
+                doc_count: 1,
+              },
+            ]
+          ),
        });

        expect(hits.hits.length).to.be(1);
@ -729,7 +883,10 @@ export default ({ getService }: FtrProviderContext) => {
          query: { match: { id: 'first_doc' } },
          index: 'new_terms',
          fields: [AGG_FIELD_NAME],
-          runtimeMappings: getNewTermsRuntimeMappings(['large_array_20', 'large_array_10']),
+          runtimeMappings: getNewTermsRuntimeMappings(
+            ['large_array_20', 'large_array_10'],
+            largeArraysBuckets
+          ),
        });

        // runtime field should have 100 values, as large_array_20 and large_array_10