[8.11] [ML] AIOps: Functional/API integration tests for text field support for log rate analysis (#168177) (#168516)

# Backport This will backport the following commits from `main` to `8.11`: - [[ML] AIOps: Functional/API integration tests for text field support for log rate analysis (#168177)](https://github.com/elastic/kibana/pull/168177)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Walter Rafelsberger <walter.rafelsberger@elastic.co>
2025-04-23 17:28:26 -04:00 · 2023-10-10 14:41:13 -04:00 · 2023-10-10 14:41:13 -04:00 · a88e7d2dc8
commit a88e7d2dc8
parent abb04cd107
25 changed files with 603 additions and 142 deletions
--- a/x-pack/plugins/aiops/common/mocks/artificial_logs/filtered_frequent_item_sets.ts
+++ b/x-pack/plugins/aiops/common/mocks/artificial_logs/filtered_frequent_item_sets.ts
@ -5,9 +5,9 @@
 * 2.0.
 */

-import type { ItemsetResult } from '../../types';
+import type { ItemSet } from '../../types';

-export const filteredFrequentItemSets: ItemsetResult[] = [
+export const filteredFrequentItemSets: ItemSet[] = [
  {
    set: { response_code: '500', url: 'home.php' },
    size: 2,
--- a/x-pack/plugins/aiops/common/mocks/artificial_logs/final_significant_term_groups_textfield.ts
+++ b/x-pack/plugins/aiops/common/mocks/artificial_logs/final_significant_term_groups_textfield.ts
@ -0,0 +1,129 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { SignificantTermGroup } from '@kbn/ml-agg-utils';
+
+export const finalSignificantTermGroupsTextfield: SignificantTermGroup[] = [
+  {
+    docCount: 636,
+    group: [
+      {
+        docCount: 792,
+        duplicate: 2,
+        fieldName: 'url',
+        fieldValue: 'home.php',
+        key: 'url:home.php',
+        pValue: 0.00974308761016614,
+        type: 'keyword',
+      },
+      {
+        docCount: 636,
+        duplicate: 2,
+        fieldName: 'user',
+        fieldValue: 'Peter',
+        key: 'user:Peter',
+        pValue: 0.00974308761016614,
+        type: 'keyword',
+      },
+    ],
+    id: '2091742187',
+    pValue: 0.00974308761016614,
+  },
+  {
+    docCount: 634,
+    group: [
+      {
+        docCount: 1266,
+        duplicate: 2,
+        fieldName: 'response_code',
+        fieldValue: '500',
+        key: 'response_code:500',
+        pValue: 0.012783309213417932,
+        type: 'keyword',
+      },
+      {
+        docCount: 792,
+        duplicate: 2,
+        fieldName: 'url',
+        fieldValue: 'home.php',
+        key: 'url:home.php',
+        pValue: 0.00974308761016614,
+        type: 'keyword',
+      },
+      {
+        docCount: 634,
+        duplicate: 2,
+        fieldName: 'message',
+        fieldValue: 'an unexpected error occured',
+        key: 'an unexpected error occured',
+        pValue: 0.00974308761016614,
+        type: 'log_pattern',
+      },
+    ],
+    id: '1528268618',
+    pValue: 0.00974308761016614,
+  },
+  {
+    docCount: 632,
+    group: [
+      {
+        docCount: 1266,
+        duplicate: 2,
+        fieldName: 'response_code',
+        fieldValue: '500',
+        key: 'response_code:500',
+        pValue: 0.012783309213417932,
+        type: 'keyword',
+      },
+      {
+        docCount: 790,
+        duplicate: 2,
+        fieldName: 'url',
+        fieldValue: 'login.php',
+        key: 'url:login.php',
+        pValue: 0.012783309213417932,
+        type: 'keyword',
+      },
+      {
+        docCount: 632,
+        duplicate: 2,
+        fieldName: 'message',
+        fieldValue: 'an unexpected error occured',
+        key: 'an unexpected error occured',
+        pValue: 0.012783309213417932,
+        type: 'log_pattern',
+      },
+    ],
+    id: '2619569380',
+    pValue: 0.012783309213417932,
+  },
+  {
+    docCount: 632,
+    group: [
+      {
+        docCount: 790,
+        duplicate: 2,
+        fieldName: 'url',
+        fieldValue: 'login.php',
+        key: 'url:login.php',
+        pValue: 0.012783309213417932,
+        type: 'keyword',
+      },
+      {
+        docCount: 632,
+        duplicate: 2,
+        fieldName: 'user',
+        fieldValue: 'Peter',
+        key: 'user:Peter',
+        pValue: 0.012783309213417932,
+        type: 'keyword',
+      },
+    ],
+    id: '1937394803',
+    pValue: 0.012783309213417932,
+  },
+];
--- a/x-pack/plugins/aiops/common/mocks/artificial_logs/frequent_item_sets.ts
+++ b/x-pack/plugins/aiops/common/mocks/artificial_logs/frequent_item_sets.ts
@ -5,9 +5,9 @@
 * 2.0.
 */

-import type { ItemsetResult } from '../../types';
+import type { ItemSet } from '../../types';

-export const frequentItemSets: ItemsetResult[] = [
+export const frequentItemSets: ItemSet[] = [
  {
    set: { response_code: '500', url: 'home.php' },
    size: 2,
--- a/x-pack/plugins/aiops/common/mocks/artificial_logs/significant_log_patterns.ts
+++ b/x-pack/plugins/aiops/common/mocks/artificial_logs/significant_log_patterns.ts
@ -0,0 +1,24 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import type { SignificantTerm } from '@kbn/ml-agg-utils';
+
+export const significantLogPatterns: SignificantTerm[] = [
+  {
+    bg_count: 0,
+    doc_count: 1266,
+    fieldName: 'message',
+    fieldValue: 'an unexpected error occured',
+    key: 'an unexpected error occured',
+    normalizedScore: 0,
+    pValue: 0.000001,
+    score: -13.815510557964274,
+    total_bg_count: 1975,
+    total_doc_count: 4669,
+    type: 'log_pattern',
+  },
+];
--- a/x-pack/plugins/aiops/common/constants.ts
+++ b/x-pack/plugins/aiops/common/constants.ts
@ -5,10 +5,16 @@
 * 2.0.
 */

-/**
- * The p-value threshold to be used for statistically significant items.
- */
-export const LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD = 0.02;
+export const LOG_RATE_ANALYSIS_SETTINGS = {
+  /**
+   * The p-value threshold to be used for statistically significant items.
+   */
+  P_VALUE_THRESHOLD: 0.02,
+  /**
+   * The minimum support value to be used for the frequent item sets aggration.
+   */
+  FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT: 0.001,
+} as const;

 /**
 * For the technical preview of Log Rate Analysis we use a hard coded seed.
--- a/x-pack/plugins/aiops/common/types.ts
+++ b/x-pack/plugins/aiops/common/types.ts
@ -14,7 +14,7 @@ export interface SignificantTermDuplicateGroup {

 export type FieldValuePairCounts = Record<string, Record<string, number>>;

-export interface ItemsetResult {
+export interface ItemSet {
  set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
  size: number;
  maxPValue: number;
@ -23,6 +23,12 @@ export interface ItemsetResult {
  total_doc_count: number;
 }

+export interface FetchFrequentItemSetsResponse {
+  fields: string[];
+  itemSets: ItemSet[];
+  totalDocCount: number;
+}
+
 interface SimpleHierarchicalTreeNodeSet extends FieldValuePair {
  key: string;
  type: SignificantTermType;
--- a/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
+++ b/x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
@ -506,7 +506,7 @@ export const defineLogRateAnalysisRoute = (
                );

                try {
-                  const { fields, df } = await fetchFrequentItemSets(
+                  const { fields, itemSets } = await fetchFrequentItemSets(
                    client,
                    request.body.index,
                    JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
@ -520,23 +520,26 @@ export const defineLogRateAnalysisRoute = (
                    abortSignal
                  );

-                  if (significantCategories.length > 0) {
-                    const { fields: significantCategoriesFields, df: significantCategoriesDf } =
-                      await fetchTerms2CategoriesCounts(
-                        client,
-                        request.body,
-                        JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
-                        significantTerms,
-                        significantCategories,
-                        request.body.deviationMin,
-                        request.body.deviationMax,
-                        logger,
-                        pushError,
-                        abortSignal
-                      );
+                  if (significantCategories.length > 0 && significantTerms.length > 0) {
+                    const {
+                      fields: significantCategoriesFields,
+                      itemSets: significantCategoriesItemSets,
+                    } = await fetchTerms2CategoriesCounts(
+                      client,
+                      request.body,
+                      JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
+                      significantTerms,
+                      itemSets,
+                      significantCategories,
+                      request.body.deviationMin,
+                      request.body.deviationMax,
+                      logger,
+                      pushError,
+                      abortSignal
+                    );

                    fields.push(...significantCategoriesFields);
-                    df.push(...significantCategoriesDf);
+                    itemSets.push(...significantCategoriesItemSets);
                  }

                  if (shouldStop) {
@ -545,9 +548,9 @@ export const defineLogRateAnalysisRoute = (
                    return;
                  }

-                  if (fields.length > 0 && df.length > 0) {
+                  if (fields.length > 0 && itemSets.length > 0) {
                    const significantTermGroups = getSignificantTermGroups(
-                      df,
+                      itemSets,
                      [...significantTerms, ...significantCategories],
                      fields
                    );
@ -757,7 +760,11 @@ export const defineLogRateAnalysisRoute = (
              }

              // histograms for text field patterns
-              if (overallTimeSeries !== undefined && significantCategories.length > 0) {
+              if (
+                overallTimeSeries !== undefined &&
+                significantCategories.length > 0 &&
+                !request.body.overrides?.regroupOnly
+              ) {
                const significantCategoriesHistogramQueries = significantCategories.map((d) => {
                  const histogramQuery = getHistogramQuery(request.body);
                  const categoryQuery = getCategoryQuery(d.fieldName, [
--- a/x-pack/plugins/aiops/server/routes/queries/fetch_categories.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/fetch_categories.ts
@ -33,11 +33,14 @@ export const getCategoryRequest = (
  fieldName: string,
  from: number | undefined,
  to: number | undefined,
+  filter: estypes.QueryDslQueryContainer,
  { wrap }: RandomSamplerWrapper
 ): estypes.SearchRequest => {
  const { index, timeFieldName } = params;
  const query = getQueryWithParams({
    params,
+    termFilters: undefined,
+    filter,
  });
  const { params: request } = createCategoryRequest(
    index,
@ -63,6 +66,7 @@ export const fetchCategories = async (
  fieldNames: string[],
  from: number | undefined,
  to: number | undefined,
+  filter: estypes.QueryDslQueryContainer,
  logger: Logger,
  // The default value of 1 means no sampling will be used
  sampleProbability: number = 1,
@ -78,7 +82,7 @@ export const fetchCategories = async (

  const settledPromises = await Promise.allSettled(
    fieldNames.map((fieldName) => {
-      const request = getCategoryRequest(params, fieldName, from, to, randomSamplerWrapper);
+      const request = getCategoryRequest(params, fieldName, from, to, filter, randomSamplerWrapper);
      return esClient.search(request, {
        signal: abortSignal,
        maxRetries: 0,
--- a/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_item_sets.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/fetch_frequent_item_sets.ts
@ -15,8 +15,12 @@ import type { Logger } from '@kbn/logging';
 import { type SignificantTerm } from '@kbn/ml-agg-utils';
 import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';

-import { RANDOM_SAMPLER_SEED } from '../../../common/constants';
-import type { SignificantTermDuplicateGroup, ItemsetResult } from '../../../common/types';
+import { RANDOM_SAMPLER_SEED, LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants';
+import type {
+  SignificantTermDuplicateGroup,
+  ItemSet,
+  FetchFrequentItemSetsResponse,
+} from '../../../common/types';

 interface FrequentItemSetsAggregation extends estypes.AggregationsSamplerAggregation {
  fi: {
@ -74,7 +78,7 @@ export async function fetchFrequentItemSets(
  sampleProbability: number = 1,
  emitError: (m: string) => void,
  abortSignal?: AbortSignal
-) {
+): Promise<FetchFrequentItemSetsResponse> {
  // Sort significant terms by ascending p-value, necessary to apply the field limit correctly.
  const sortedSignificantTerms = significantTerms.slice().sort((a, b) => {
    return (a.pValue ?? 0) - (b.pValue ?? 0);
@ -103,7 +107,7 @@ export async function fetchFrequentItemSets(
      frequent_item_sets: {
        minimum_set_size: 2,
        size: 200,
-        minimum_support: 0.001,
+        minimum_support: LOG_RATE_ANALYSIS_SETTINGS.FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT,
        fields: getFrequentItemSetsAggFields(sortedSignificantTerms),
      },
    },
@ -138,7 +142,7 @@ export async function fetchFrequentItemSets(
    emitError(`Failed to fetch frequent_item_sets.`);
    return {
      fields: [],
-      df: [],
+      itemSets: [],
      totalDocCount: 0,
    };
  }
@ -158,10 +162,10 @@ export async function fetchFrequentItemSets(
  const fiss = frequentItemSets.fi.buckets;
  fiss.length = maximum;

-  const results: ItemsetResult[] = [];
+  const results: ItemSet[] = [];

  fiss.forEach((fis) => {
-    const result: ItemsetResult = {
+    const result: ItemSet = {
      set: {},
      size: 0,
      maxPValue: 0,
@ -203,7 +207,7 @@ export async function fetchFrequentItemSets(

  return {
    fields: uniqueFields,
-    df: results,
+    itemSets: results,
    totalDocCount: totalDocCountFi,
  };
 }
--- a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_categories.ts
@ -14,7 +14,7 @@ import { type SignificantTerm, SIGNIFICANT_TERM_TYPE } from '@kbn/ml-agg-utils';

 import type { Category } from '../../../common/api/log_categorization/types';
 import type { AiopsLogRateAnalysisSchema } from '../../../common/api/log_rate_analysis';
-import { LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD } from '../../../common/constants';
+import { LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants';

 import { fetchCategories } from './fetch_categories';
 import { fetchCategoryCounts } from './fetch_category_counts';
@ -42,16 +42,39 @@ export const fetchSignificantCategories = async (
  emitError: (m: string) => void,
  abortSignal?: AbortSignal
 ) => {
-  // To make sure we have the same categories for both baseline and deviation,
-  // we do an initial query that spans across baseline start and deviation end.
-  // We could update this to query the exact baseline AND deviation range, but
-  // wanted to avoid the refactor here and it should be good enough for a start.
+  // Filter that includes docs from both the baseline and deviation time range.
+  const baselineOrDeviationFilter = {
+    bool: {
+      should: [
+        {
+          range: {
+            [params.timeFieldName]: {
+              gte: params.baselineMin,
+              lte: params.baselineMax,
+              format: 'epoch_millis',
+            },
+          },
+        },
+        {
+          range: {
+            [params.timeFieldName]: {
+              gte: params.deviationMin,
+              lte: params.deviationMax,
+              format: 'epoch_millis',
+            },
+          },
+        },
+      ],
+    },
+  };
+
  const categoriesOverall = await fetchCategories(
    esClient,
    params,
    fieldNames,
-    params.baselineMin,
-    params.deviationMax,
+    undefined,
+    undefined,
+    baselineOrDeviationFilter,
    logger,
    sampleProbability,
    emitError,
@ -117,7 +140,7 @@ export const fetchSignificantCategories = async (
      const pValue = criticalTableLookup(chiSquared, 1);
      const score = Math.log(pValue);

-      if (pValue <= LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD && observed > expected) {
+      if (pValue <= LOG_RATE_ANALYSIS_SETTINGS.P_VALUE_THRESHOLD && observed > expected) {
        significantCategories.push({
          key,
          fieldName,
--- a/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/fetch_significant_term_p_values.ts
@ -15,10 +15,7 @@ import {
  type RandomSamplerWrapper,
 } from '@kbn/ml-random-sampler-utils';

-import {
-  LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD,
-  RANDOM_SAMPLER_SEED,
-} from '../../../common/constants';
+import { LOG_RATE_ANALYSIS_SETTINGS, RANDOM_SAMPLER_SEED } from '../../../common/constants';
 import type { AiopsLogRateAnalysisSchema } from '../../../common/api/log_rate_analysis';

 import { isRequestAbortedError } from '../../lib/is_request_aborted_error';
@ -168,7 +165,7 @@ export const fetchSignificantTermPValues = async (
    for (const bucket of overallResult.buckets) {
      const pValue = Math.exp(-bucket.score);

-      if (typeof pValue === 'number' && pValue < LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD) {
+      if (typeof pValue === 'number' && pValue < LOG_RATE_ANALYSIS_SETTINGS.P_VALUE_THRESHOLD) {
        result.push({
          key: `${fieldName}:${String(bucket.key)}`,
          type: SIGNIFICANT_TERM_TYPE.KEYWORD,
--- a/x-pack/plugins/aiops/server/routes/queries/fetch_terms_2_categories_counts.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/fetch_terms_2_categories_counts.ts
@ -11,13 +11,14 @@ import type * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';

 import type { ElasticsearchClient } from '@kbn/core-elasticsearch-server';
 import type { Logger } from '@kbn/logging';
-import { type SignificantTerm } from '@kbn/ml-agg-utils';
+import type { FieldValuePair, SignificantTerm } from '@kbn/ml-agg-utils';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';

 import type { AiopsLogRateAnalysisSchema } from '../../../common/api/log_rate_analysis';
-import type { ItemsetResult } from '../../../common/types';
+import type { FetchFrequentItemSetsResponse, ItemSet } from '../../../common/types';
 import { getCategoryQuery } from '../../../common/api/log_categorization/get_category_query';
 import type { Category } from '../../../common/api/log_categorization/types';
+import { LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants';

 import { isRequestAbortedError } from '../../lib/is_request_aborted_error';

@ -26,9 +27,9 @@ import { getQueryWithParams } from './get_query_with_params';
 const isMsearchResponseItem = (arg: unknown): arg is estypes.MsearchMultiSearchItem =>
  isPopulatedObject(arg, ['hits']);

-export const getTerm2CategoryCountRequest = (
+const getTerm2CategoryCountRequest = (
  params: AiopsLogRateAnalysisSchema,
-  significantTerm: SignificantTerm,
+  fieldValuePairs: FieldValuePair[],
  categoryFieldName: string,
  category: Category,
  from: number | undefined,
@ -41,7 +42,9 @@ export const getTerm2CategoryCountRequest = (
  const categoryQuery = getCategoryQuery(categoryFieldName, [category]);

  if (Array.isArray(query.bool?.filter)) {
-    query.bool?.filter?.push({ term: { [significantTerm.fieldName]: significantTerm.fieldValue } });
+    for (const { fieldName, fieldValue } of fieldValuePairs) {
+      query.bool?.filter?.push({ term: { [fieldName]: fieldValue } });
+    }
    query.bool?.filter?.push(categoryQuery);
    query.bool?.filter?.push({
      range: {
@ -66,28 +69,29 @@ export async function fetchTerms2CategoriesCounts(
  params: AiopsLogRateAnalysisSchema,
  searchQuery: estypes.QueryDslQueryContainer,
  significantTerms: SignificantTerm[],
+  itemSets: ItemSet[],
  significantCategories: SignificantTerm[],
  from: number,
  to: number,
  logger: Logger,
  emitError: (m: string) => void,
  abortSignal?: AbortSignal
-) {
+): Promise<FetchFrequentItemSetsResponse> {
  const searches: Array<
    | estypes.MsearchMultisearchBody
    | {
        index: string;
      }
  > = [];
-  const results: ItemsetResult[] = [];
+  const results: ItemSet[] = [];

-  significantTerms.forEach((term) => {
-    significantCategories.forEach((category) => {
+  significantCategories.forEach((category) => {
+    significantTerms.forEach((term) => {
      searches.push({ index: params.index });
      searches.push(
        getTerm2CategoryCountRequest(
          params,
-          term,
+          [{ fieldName: term.fieldName, fieldValue: term.fieldValue }],
          category.fieldName,
          { key: `${category.key}`, count: category.doc_count, examples: [] },
          from,
@ -102,8 +106,36 @@ export async function fetchTerms2CategoriesCounts(
        size: 2,
        maxPValue: Math.max(term.pValue ?? 1, category.pValue ?? 1),
        doc_count: 0,
-        support: 1,
-        total_doc_count: 0,
+        support: 0,
+        total_doc_count: Math.max(term.total_doc_count, category.total_doc_count),
+      });
+    });
+
+    itemSets.forEach((itemSet) => {
+      searches.push({ index: params.index });
+      searches.push(
+        getTerm2CategoryCountRequest(
+          params,
+          Object.entries(itemSet.set).map(([fieldName, fieldValue]) => ({
+            fieldName,
+            fieldValue,
+          })),
+          category.fieldName,
+          { key: `${category.key}`, count: category.doc_count, examples: [] },
+          from,
+          to
+        ) as estypes.MsearchMultisearchBody
+      );
+      results.push({
+        set: {
+          ...itemSet.set,
+          [category.fieldName]: category.fieldValue,
+        },
+        size: Object.keys(itemSet.set).length + 1,
+        maxPValue: Math.max(itemSet.maxPValue ?? 1, category.pValue ?? 1),
+        doc_count: 0,
+        support: 0,
+        total_doc_count: Math.max(itemSet.total_doc_count, category.total_doc_count),
      });
    });
  });
@ -127,7 +159,7 @@ export async function fetchTerms2CategoriesCounts(
    }
    return {
      fields: [],
-      df: [],
+      itemSets: [],
      totalDocCount: 0,
    };
  }
@ -136,15 +168,25 @@ export async function fetchTerms2CategoriesCounts(

  return {
    fields: uniq(significantCategories.map((c) => c.fieldName)),
-    df: results
+    itemSets: results
      .map((result, i) => {
        const resp = mSearchResponses[i];
        if (isMsearchResponseItem(resp)) {
          result.doc_count = (resp.hits.total as estypes.SearchTotalHits).value ?? 0;
+          if (result.total_doc_count > 0) {
+            // Replicates how the `frequent_item_sets` aggregation calculates
+            // the support value by dividing the number of documents containing
+            // the item set by the total number of documents.
+            result.support = result.doc_count / result.total_doc_count;
+          }
        }
        return result;
      })
-      .filter((d) => d.doc_count > 0),
+      .filter(
+        (d) =>
+          d.doc_count > 0 &&
+          d.support > LOG_RATE_ANALYSIS_SETTINGS.FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT
+      ),
    totalDocCount: 0,
  };
 }
--- a/x-pack/plugins/aiops/server/routes/queries/get_query_with_params.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/get_query_with_params.ts
@ -20,10 +20,12 @@ export const getTermsQuery = ({ fieldName, fieldValue }: FieldValuePair) => {
 interface QueryParams {
  params: AiopsLogRateAnalysisSchema;
  termFilters?: FieldValuePair[];
+  filter?: estypes.QueryDslQueryContainer;
 }
 export const getQueryWithParams = ({
  params,
  termFilters,
+  filter,
 }: QueryParams): estypes.QueryDslQueryContainer => {
  const searchQuery = JSON.parse(params.searchQuery) as estypes.QueryDslQueryContainer;
  return {
@ -32,6 +34,7 @@ export const getQueryWithParams = ({
        searchQuery,
        ...getFilters(params),
        ...(Array.isArray(termFilters) ? termFilters.map(getTermsQuery) : []),
+        ...(filter ? [filter] : []),
      ] as estypes.QueryDslQueryContainer[],
    },
  };
--- a/x-pack/plugins/aiops/server/routes/queries/get_significant_term_groups.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/get_significant_term_groups.ts
@ -15,10 +15,10 @@ import { getSimpleHierarchicalTree } from './get_simple_hierarchical_tree';
 import { getSimpleHierarchicalTreeLeaves } from './get_simple_hierarchical_tree_leaves';
 import { getMissingSignificantTerms } from './get_missing_significant_terms';
 import { transformSignificantTermToGroup } from './transform_significant_term_to_group';
-import type { ItemsetResult } from '../../../common/types';
+import type { ItemSet } from '../../../common/types';

 export function getSignificantTermGroups(
-  itemsets: ItemsetResult[],
+  itemsets: ItemSet[],
  significantTerms: SignificantTerm[],
  fields: string[]
 ): SignificantTermGroup[] {
--- a/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/get_simple_hierarchical_tree.ts
@ -7,7 +7,7 @@

 import type { SignificantTerm } from '@kbn/ml-agg-utils';

-import type { ItemsetResult, SimpleHierarchicalTreeNode } from '../../../common/types';
+import type { ItemSet, SimpleHierarchicalTreeNode } from '../../../common/types';

 import { getValueCounts } from './get_value_counts';
 import { getValuesDescending } from './get_values_descending';
@ -54,7 +54,7 @@ function dfDepthFirstSearch(
  parentLabel: string,
  field: string,
  value: string,
-  iss: ItemsetResult[],
+  iss: ItemSet[],
  collapseRedundant: boolean,
  displayOther: boolean
 ) {
@ -178,18 +178,18 @@ function dfDepthFirstSearch(
 * By default (fields==None), the field search order is dependent on the highest count itemsets.
 */
 export function getSimpleHierarchicalTree(
-  df: ItemsetResult[],
+  itemSets: ItemSet[],
  collapseRedundant: boolean,
  displayOther: boolean,
  significantTerms: SignificantTerm[],
  fields: string[] = []
 ) {
-  const totalDocCount = Math.max(...df.map((d) => d.total_doc_count));
+  const totalDocCount = Math.max(...itemSets.map((d) => d.total_doc_count));

  const newRoot = NewNodeFactory('');

  for (const field of fields) {
-    for (const value of getValuesDescending(df, field)) {
+    for (const value of getValuesDescending(itemSets, field)) {
      dfDepthFirstSearch(
        significantTerms,
        fields,
@ -198,7 +198,7 @@ export function getSimpleHierarchicalTree(
        '',
        field,
        value,
-        df,
+        itemSets,
        collapseRedundant,
        displayOther
      );
--- a/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/get_value_counts.ts
@ -5,9 +5,9 @@
 * 2.0.
 */

-import type { ItemsetResult } from '../../../common/types';
+import type { ItemSet } from '../../../common/types';

-export function getValueCounts(df: ItemsetResult[], field: string) {
+export function getValueCounts(df: ItemSet[], field: string) {
  return df.reduce<Record<string, number>>((p, c) => {
    if (c.set[field] === undefined) {
      return p;
--- a/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts
+++ b/x-pack/plugins/aiops/server/routes/queries/get_values_descending.ts
@ -5,11 +5,11 @@
 * 2.0.
 */

-import type { ItemsetResult } from '../../../common/types';
+import type { ItemSet } from '../../../common/types';

 import { getValueCounts } from './get_value_counts';

-export function getValuesDescending(df: ItemsetResult[], field: string): string[] {
+export function getValuesDescending(df: ItemSet[], field: string): string[] {
  const valueCounts = getValueCounts(df, field);
  const keys = Object.keys(valueCounts);

--- a/x-pack/test/api_integration/apis/aiops/log_rate_analysis_groups_only.ts
+++ b/x-pack/test/api_integration/apis/aiops/log_rate_analysis_groups_only.ts
@ -63,11 +63,17 @@ export default ({ getService }: FtrProviderContext) => {
          const addSignificantTermsActions = data.filter(
            (d) => d.type === testData.expected.significantTermFilter
          );
-          expect(addSignificantTermsActions.length).to.be(0);
+          expect(addSignificantTermsActions.length).to.eql(
+            0,
+            `Expected significant terms actions to be 0, got ${addSignificantTermsActions.length}`
+          );

          const histogramActions = data.filter((d) => d.type === testData.expected.histogramFilter);
          // for each significant term we should get a histogram
-          expect(histogramActions.length).to.be(0);
+          expect(histogramActions.length).to.eql(
+            0,
+            `Expected histogram actions to be 0, got ${histogramActions.length}`
+          );

          const groupActions = data.filter((d) => d.type === testData.expected.groupFilter);
          const groups = groupActions.flatMap((d) => d.payload);
@ -188,21 +194,26 @@ export default ({ getService }: FtrProviderContext) => {
            }

            // If streaming works correctly we should receive more than one chunk.
-            expect(chunkCounter).to.be.greaterThan(1);
+            expect(chunkCounter).to.be.greaterThan(
+              1,
+              `Expected 'chunkCounter' to be greater than 1, got ${chunkCounter} with the following data: ${JSON.stringify(
+                data
+              )}.`
+            );

            await assertAnalysisResult(data);
          }
        }

-        it('should return group only  in chunks with streaming with compression with flushFix', async () => {
+        it('should return group only in chunks with streaming with compression with flushFix', async () => {
          await requestWithStreaming({ ...testData.requestBody, overrides });
        });

-        it('should return group only  in chunks with streaming with compression without flushFix', async () => {
+        it('should return group only in chunks with streaming with compression without flushFix', async () => {
          await requestWithStreaming({ ...testData.requestBody, overrides, flushFix: false });
        });

-        it('should return group only  in chunks with streaming without compression with flushFix', async () => {
+        it('should return group only in chunks with streaming without compression with flushFix', async () => {
          await requestWithStreaming({
            ...testData.requestBody,
            overrides,
@ -210,7 +221,7 @@ export default ({ getService }: FtrProviderContext) => {
          });
        });

-        it('should return group only  in chunks with streaming without compression without flushFix', async () => {
+        it('should return group only in chunks with streaming without compression without flushFix', async () => {
          await requestWithStreaming({
            ...testData.requestBody,
            overrides,
--- a/x-pack/test/api_integration/apis/aiops/test_data.ts
+++ b/x-pack/test/api_integration/apis/aiops/test_data.ts
@ -9,7 +9,9 @@
 // This makes sure should the assertions for the integration tests need to be updated,
 // that also the jest unit tests use mocks that are not outdated.
 import { significantTerms as artificialLogSignificantTerms } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/significant_terms';
+import { significantLogPatterns as artificialLogSignificantLogPatterns } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/significant_log_patterns';
 import { finalSignificantTermGroups as artificialLogsSignificantTermGroups } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/final_significant_term_groups';
+import { finalSignificantTermGroupsTextfield as artificialLogsSignificantTermGroupsTextfield } from '@kbn/aiops-plugin/common/__mocks__/artificial_logs/final_significant_term_groups_textfield';

 import type { TestData } from './types';

@ -74,14 +76,14 @@ export const logRateAnalysisTestData: TestData[] = [
    },
  },
  {
-    testName: 'artificial_logs_with_spike',
-    dataGenerator: 'artificial_logs_with_spike',
+    testName: 'artificial_logs_with_spike_notextfield',
+    dataGenerator: 'artificial_logs_with_spike_notextfield',
    requestBody: {
      start: 1668760018793,
      end: 1668931954793,
      searchQuery: '{"match_all":{}}',
      timeFieldName: '@timestamp',
-      index: 'artificial_logs_with_spike',
+      index: 'artificial_logs_with_spike_notextfield',
      baselineMin: 1668769200000,
      baselineMax: 1668837600000,
      deviationMin: 1668855600000,
@ -105,4 +107,100 @@ export const logRateAnalysisTestData: TestData[] = [
      histogramLength: 20,
    },
  },
+  {
+    testName: 'artificial_logs_with_spike_textfield',
+    dataGenerator: 'artificial_logs_with_spike_textfield',
+    requestBody: {
+      start: 1668760018793,
+      end: 1668931954793,
+      searchQuery: '{"match_all":{}}',
+      timeFieldName: '@timestamp',
+      index: 'artificial_logs_with_spike_textfield',
+      baselineMin: 1668769200000,
+      baselineMax: 1668837600000,
+      deviationMin: 1668855600000,
+      deviationMax: 1668924000000,
+      grouping: true,
+    },
+    expected: {
+      chunksLength: 30,
+      chunksLengthGroupOnly: 11,
+      actionsLength: 29,
+      actionsLengthGroupOnly: 10,
+      noIndexChunksLength: 4,
+      noIndexActionsLength: 3,
+      significantTermFilter: 'add_significant_terms',
+      groupFilter: 'add_significant_terms_group',
+      groupHistogramFilter: 'add_significant_terms_group_histogram',
+      histogramFilter: 'add_significant_terms_histogram',
+      errorFilter: 'add_error',
+      significantTerms: [...artificialLogSignificantTerms, ...artificialLogSignificantLogPatterns],
+      groups: artificialLogsSignificantTermGroupsTextfield,
+      histogramLength: 20,
+    },
+  },
+  {
+    testName: 'artificial_logs_with_dip_notextfield',
+    dataGenerator: 'artificial_logs_with_dip_notextfield',
+    requestBody: {
+      start: 1668760018793,
+      end: 1668931954793,
+      searchQuery: '{"match_all":{}}',
+      timeFieldName: '@timestamp',
+      index: 'artificial_logs_with_dip_notextfield',
+      baselineMin: 1668855600000,
+      baselineMax: 1668924000000,
+      deviationMin: 1668769200000,
+      deviationMax: 1668837600000,
+      grouping: true,
+    },
+    expected: {
+      chunksLength: 27,
+      chunksLengthGroupOnly: 11,
+      actionsLength: 26,
+      actionsLengthGroupOnly: 10,
+      noIndexChunksLength: 4,
+      noIndexActionsLength: 3,
+      significantTermFilter: 'add_significant_terms',
+      groupFilter: 'add_significant_terms_group',
+      groupHistogramFilter: 'add_significant_terms_group_histogram',
+      histogramFilter: 'add_significant_terms_histogram',
+      errorFilter: 'add_error',
+      significantTerms: artificialLogSignificantTerms,
+      groups: artificialLogsSignificantTermGroups,
+      histogramLength: 20,
+    },
+  },
+  {
+    testName: 'artificial_logs_with_dip_textfield',
+    dataGenerator: 'artificial_logs_with_dip_textfield',
+    requestBody: {
+      start: 1668760018793,
+      end: 1668931954793,
+      searchQuery: '{"match_all":{}}',
+      timeFieldName: '@timestamp',
+      index: 'artificial_logs_with_dip_textfield',
+      baselineMin: 1668855600000,
+      baselineMax: 1668924000000,
+      deviationMin: 1668769200000,
+      deviationMax: 1668837600000,
+      grouping: true,
+    },
+    expected: {
+      chunksLength: 30,
+      chunksLengthGroupOnly: 11,
+      actionsLength: 29,
+      actionsLengthGroupOnly: 10,
+      noIndexChunksLength: 4,
+      noIndexActionsLength: 3,
+      significantTermFilter: 'add_significant_terms',
+      groupFilter: 'add_significant_terms_group',
+      groupHistogramFilter: 'add_significant_terms_group_histogram',
+      histogramFilter: 'add_significant_terms_histogram',
+      errorFilter: 'add_error',
+      significantTerms: [...artificialLogSignificantTerms, ...artificialLogSignificantLogPatterns],
+      groups: artificialLogsSignificantTermGroupsTextfield,
+      histogramLength: 20,
+    },
+  },
 ];
--- a/x-pack/test/api_integration/apis/aiops/types.ts
+++ b/x-pack/test/api_integration/apis/aiops/types.ts
@ -8,10 +8,12 @@
 import type { AiopsApiLogRateAnalysis } from '@kbn/aiops-plugin/common/api';
 import type { SignificantTerm, SignificantTermGroup } from '@kbn/ml-agg-utils';

+import type { LogRateAnalysisDataGenerator } from '../../../functional/services/aiops/log_rate_analysis_data_generator';
+
 export interface TestData {
  testName: string;
  esArchive?: string;
-  dataGenerator?: string;
+  dataGenerator?: LogRateAnalysisDataGenerator;
  requestBody: AiopsApiLogRateAnalysis['body'];
  expected: {
    chunksLength: number;
--- a/x-pack/test/functional/apps/aiops/log_rate_analysis.ts
+++ b/x-pack/test/functional/apps/aiops/log_rate_analysis.ts
@ -11,7 +11,7 @@ import expect from '@kbn/expect';

 import type { FtrProviderContext } from '../../ftr_provider_context';
 import { isTestDataExpectedWithSampleProbability, type TestData } from './types';
-import { logRateAnalysisTestData } from './test_data';
+import { logRateAnalysisTestData } from './log_rate_analysis_test_data';

 export default function ({ getPageObjects, getService }: FtrProviderContext) {
  const PageObjects = getPageObjects(['common', 'console', 'header', 'home', 'security']);
--- a/x-pack/test/functional/apps/aiops/log_rate_analysis_test_data.ts
+++ b/x-pack/test/functional/apps/aiops/log_rate_analysis_test_data.ts
@ -176,12 +176,19 @@ const DAY_MS = 86400000;
 const DEVIATION_TS = REFERENCE_TS - DAY_MS * 2;
 const BASELINE_TS = DEVIATION_TS - DAY_MS * 1;

-const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): TestData => ({
-  suiteTitle: `artificial logs with ${analysisType}`,
+const getArtificialLogDataViewTestData = (
+  analysisType: LogRateAnalysisType,
+  textField: boolean
+): TestData => ({
+  suiteTitle: `artificial logs with ${analysisType} and ${
+    textField ? 'text field' : 'no text field'
+  }`,
  analysisType,
-  dataGenerator: `artificial_logs_with_${analysisType}`,
+  dataGenerator: `artificial_logs_with_${analysisType}_${textField ? 'textfield' : 'notextfield'}`,
  isSavedSearch: false,
-  sourceIndexOrSavedSearch: `artificial_logs_with_${analysisType}`,
+  sourceIndexOrSavedSearch: `artificial_logs_with_${analysisType}_${
+    textField ? 'textfield' : 'notextfield'
+  }`,
  brushBaselineTargetTimestamp: BASELINE_TS + DAY_MS / 2,
  brushDeviationTargetTimestamp: DEVIATION_TS + DAY_MS / 2,
  brushIntervalFactor: 10,
@ -191,14 +198,24 @@ const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): Te
  expected: {
    totalDocCountFormatted: '8,400',
    analysisGroupsTable: [
-      {
-        group: 'response_code: 500url: home.php',
-        docCount: '792',
-      },
-      {
-        group: 'url: login.phpresponse_code: 500',
-        docCount: '790',
-      },
+      textField
+        ? {
+            group: 'message: an unexpected error occuredurl: home.phpresponse_code: 500',
+            docCount: '634',
+          }
+        : {
+            group: 'response_code: 500url: home.php',
+            docCount: '792',
+          },
+      textField
+        ? {
+            group: 'message: an unexpected error occuredurl: login.phpresponse_code: 500',
+            docCount: '632',
+          }
+        : {
+            group: 'url: login.phpresponse_code: 500',
+            docCount: '790',
+          },
      {
        docCount: '636',
        group: 'user: Peterurl: home.php',
@ -208,11 +225,40 @@ const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): Te
        group: 'user: Peterurl: login.php',
      },
    ],
-    filteredAnalysisGroupsTable: [
-      { group: '* url: home.phpresponse_code: 500', docCount: '792' },
-      { group: '* url: login.phpresponse_code: 500', docCount: '790' },
-    ],
+    filteredAnalysisGroupsTable: textField
+      ? [
+          {
+            group: '* url: home.phpmessage: an unexpected error occuredresponse_code: 500',
+            docCount: '634',
+          },
+          {
+            group: '* url: login.phpmessage: an unexpected error occuredresponse_code: 500',
+            docCount: '632',
+          },
+        ]
+      : [
+          { group: '* url: home.phpresponse_code: 500', docCount: '792' },
+          { group: '* url: login.phpresponse_code: 500', docCount: '790' },
+        ],
    analysisTable: [
+      ...(textField
+        ? [
+            {
+              fieldName: 'message',
+              fieldValue: 'an unexpected error occured',
+              logRate: 'Chart type:bar chart',
+              pValue: '0.00000100',
+              impact: 'Medium',
+            },
+            {
+              fieldName: 'response_code',
+              fieldValue: '500',
+              logRate: 'Chart type:bar chart',
+              pValue: '3.61e-12',
+              impact: 'High',
+            },
+          ]
+        : []),
      {
        fieldName: 'url',
        fieldValue: 'home.php',
@ -220,15 +266,19 @@ const getArtificialLogDataViewTestData = (analysisType: LogRateAnalysisType): Te
        logRate: 'Chart type:bar chart',
        pValue: '0.00974',
      },
-      {
-        fieldName: 'user',
-        fieldValue: 'Peter',
-        impact: 'High',
-        logRate: 'Chart type:bar chart',
-        pValue: '2.63e-21',
-      },
+      ...(textField
+        ? []
+        : [
+            {
+              fieldName: 'user',
+              fieldValue: 'Peter',
+              impact: 'High',
+              logRate: 'Chart type:bar chart',
+              pValue: '2.63e-21',
+            },
+          ]),
    ],
-    fieldSelectorPopover: ['response_code', 'url', 'user'],
+    fieldSelectorPopover: [...(textField ? ['message'] : []), 'response_code', 'url', 'user'],
  },
 });

@ -236,6 +286,8 @@ export const logRateAnalysisTestData: TestData[] = [
  kibanaLogsDataViewTestData,
  farequoteDataViewTestData,
  farequoteDataViewTestDataWithQuery,
-  getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.SPIKE),
-  getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.DIP),
+  getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.SPIKE, false),
+  getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.SPIKE, true),
+  getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.DIP, false),
+  getArtificialLogDataViewTestData(LOG_RATE_ANALYSIS_TYPE.DIP, true),
 ];
--- a/x-pack/test/functional/apps/aiops/types.ts
+++ b/x-pack/test/functional/apps/aiops/types.ts
@ -8,6 +8,8 @@
 import type { LogRateAnalysisType } from '@kbn/aiops-utils';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';

+import { LogRateAnalysisDataGenerator } from '../../services/aiops/log_rate_analysis_data_generator';
+
 interface TestDataTableActionLogPatternAnalysis {
  type: 'LogPatternAnalysis';
  tableRowId: string;
@ -46,7 +48,7 @@ interface TestDataExpectedWithoutSampleProbability {
 export interface TestData {
  suiteTitle: string;
  analysisType: LogRateAnalysisType;
-  dataGenerator: string;
+  dataGenerator: LogRateAnalysisDataGenerator;
  isSavedSearch?: boolean;
  sourceIndexOrSavedSearch: string;
  rowsPerPage?: 10 | 25 | 50;
--- a/x-pack/test/functional/apps/ml/data_visualizer/data_drift.ts
+++ b/x-pack/test/functional/apps/ml/data_visualizer/data_drift.ts
@ -6,7 +6,7 @@
 */

 import { FtrProviderContext } from '../../../ftr_provider_context';
-import { farequoteDataViewTestDataWithQuery } from '../../aiops/test_data';
+import { farequoteDataViewTestDataWithQuery } from '../../aiops/log_rate_analysis_test_data';
 import { TestData } from '../../aiops/types';

 export default function ({ getService, getPageObjects }: FtrProviderContext) {
--- a/x-pack/test/functional/services/aiops/log_rate_analysis_data_generator.ts
+++ b/x-pack/test/functional/services/aiops/log_rate_analysis_data_generator.ts
@ -11,6 +11,17 @@ import { LOG_RATE_ANALYSIS_TYPE } from '@kbn/aiops-utils';

 import { FtrProviderContext } from '../../ftr_provider_context';

+const LOG_RATE_ANALYSYS_DATA_GENERATOR = {
+  KIBANA_SAMPLE_DATA_LOGS: 'kibana_sample_data_logs',
+  FAREQUOTE_WITH_SPIKE: 'farequote_with_spike',
+  ARTIFICIAL_LOGS_WITH_SPIKE_NOTEXTFIELD: 'artificial_logs_with_spike_notextfield',
+  ARTIFICIAL_LOGS_WITH_SPIKE_TEXTFIELD: 'artificial_logs_with_spike_textfield',
+  ARTIFICIAL_LOGS_WITH_DIP_NOTEXTFIELD: 'artificial_logs_with_dip_notextfield',
+  ARTIFICIAL_LOGS_WITH_DIP_TEXTFIELD: 'artificial_logs_with_dip_textfield',
+} as const;
+export type LogRateAnalysisDataGenerator =
+  typeof LOG_RATE_ANALYSYS_DATA_GENERATOR[keyof typeof LOG_RATE_ANALYSYS_DATA_GENERATOR];
+
 export interface GeneratedDoc {
  user: string;
  response_code: string;
@ -18,6 +29,7 @@ export interface GeneratedDoc {
  version: string;
  '@timestamp': number;
  should_ignore_this_field: string;
+  message?: string;
 }

 const REFERENCE_TS = 1669018354793;
@ -26,7 +38,16 @@ const DAY_MS = 86400000;
 const DEVIATION_TS = REFERENCE_TS - DAY_MS * 2;
 const BASELINE_TS = DEVIATION_TS - DAY_MS * 1;

-function getArtificialLogsWithDeviation(index: string, deviationType: string) {
+function getMessage(timestamp: number, user: string, url: string, responseCode: string) {
+  const date = new Date(timestamp);
+  return `${user} [${date.toLocaleString('en-US')}] "GET /${url} HTTP/1.1" ${responseCode}`;
+}
+
+function getArtificialLogsWithDeviation(
+  index: string,
+  deviationType: string,
+  includeTextField = false
+) {
  const bulkBody: estypes.BulkRequest<GeneratedDoc, GeneratedDoc>['body'] = [];
  const action = { index: { _index: index } };
  let tsOffset = 0;
@ -47,15 +68,20 @@ function getArtificialLogsWithDeviation(index: string, deviationType: string) {
            tsOffset = 0;
            [...Array(100)].forEach(() => {
              tsOffset += Math.round(DAY_MS / 100);
+              const timestamp = ts + tsOffset;
              const doc: GeneratedDoc = {
                user,
                response_code: responseCode,
                url,
                version: 'v1.0.0',
-                '@timestamp': ts + tsOffset,
+                '@timestamp': timestamp,
                should_ignore_this_field: 'should_ignore_this_field',
              };

+              if (includeTextField) {
+                doc.message = getMessage(timestamp, user, url, responseCode);
+              }
+
              bulkBody.push(action);
              bulkBody.push(doc);
            });
@ -77,17 +103,24 @@ function getArtificialLogsWithDeviation(index: string, deviationType: string) {
      tsOffset = 0;
      [...Array(docsPerUrl1[url])].forEach(() => {
        tsOffset += Math.round(DAY_MS / docsPerUrl1[url]);
-        bulkBody.push(action);
-        bulkBody.push({
+        const timestamp =
+          (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) + tsOffset;
+
+        const doc: GeneratedDoc = {
          user: 'Peter',
          response_code: responseCode,
          url,
          version: 'v1.0.0',
-          '@timestamp':
-            (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) +
-            tsOffset,
+          '@timestamp': timestamp,
          should_ignore_this_field: 'should_ignore_this_field',
-        });
+        };
+
+        if (includeTextField) {
+          doc.message = getMessage(timestamp, 'Peter', url, responseCode);
+        }
+
+        bulkBody.push(action);
+        bulkBody.push(doc);
      });
    });
  });
@ -102,17 +135,24 @@ function getArtificialLogsWithDeviation(index: string, deviationType: string) {
      tsOffset = 0;
      [...Array(docsPerUrl2[url] + userIndex)].forEach(() => {
        tsOffset += Math.round(DAY_MS / docsPerUrl2[url]);
-        bulkBody.push(action);
-        bulkBody.push({
+        const timestamp =
+          (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) + tsOffset;
+
+        const doc: GeneratedDoc = {
          user,
          response_code: '500',
          url,
          version: 'v1.0.0',
-          '@timestamp':
-            (deviationType === LOG_RATE_ANALYSIS_TYPE.SPIKE ? DEVIATION_TS : BASELINE_TS) +
-            tsOffset,
+          '@timestamp': timestamp,
          should_ignore_this_field: 'should_ignore_this_field',
-        });
+        };
+
+        if (includeTextField) {
+          doc.message = 'an unexpected error occured';
+        }
+
+        bulkBody.push(action);
+        bulkBody.push(doc);
      });
    });
  });
@ -126,7 +166,7 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider
  const log = getService('log');

  return new (class DataGenerator {
-    public async generateData(dataGenerator: string) {
+    public async generateData(dataGenerator: LogRateAnalysisDataGenerator) {
      switch (dataGenerator) {
        case 'kibana_sample_data_logs':
          // will be added via UI
@ -164,12 +204,19 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider
          });
          break;

-        case 'artificial_logs_with_spike':
-        case 'artificial_logs_with_dip':
+        case 'artificial_logs_with_spike_notextfield':
+        case 'artificial_logs_with_spike_textfield':
+        case 'artificial_logs_with_dip_notextfield':
+        case 'artificial_logs_with_dip_textfield':
          try {
-            await es.indices.delete({
+            const indexExists = await es.indices.exists({
              index: dataGenerator,
            });
+            if (indexExists) {
+              await es.indices.delete({
+                index: dataGenerator,
+              });
+            }
          } catch (e) {
            log.info(`Could not delete index '${dataGenerator}' in before() callback`);
          }
@ -185,16 +232,18 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider
                version: { type: 'keyword' },
                '@timestamp': { type: 'date' },
                should_ignore_this_field: { type: 'keyword', doc_values: false, index: false },
+                message: { type: 'text' },
              },
            },
          });

+          const dataGeneratorOptions = dataGenerator.split('_');
+          const deviationType = dataGeneratorOptions[3] ?? LOG_RATE_ANALYSIS_TYPE.SPIKE;
+          const textField = dataGeneratorOptions[4] === 'textfield' ?? false;
+
          await es.bulk({
            refresh: 'wait_for',
-            body: getArtificialLogsWithDeviation(
-              dataGenerator,
-              dataGenerator.split('_').pop() ?? LOG_RATE_ANALYSIS_TYPE.SPIKE
-            ),
+            body: getArtificialLogsWithDeviation(dataGenerator, deviationType, textField),
          });
          break;

@ -203,7 +252,7 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider
      }
    }

-    public async removeGeneratedData(dataGenerator: string) {
+    public async removeGeneratedData(dataGenerator: LogRateAnalysisDataGenerator) {
      switch (dataGenerator) {
        case 'kibana_sample_data_logs':
          // do not remove
@ -213,8 +262,10 @@ export function LogRateAnalysisDataGeneratorProvider({ getService }: FtrProvider
          await esArchiver.unload('x-pack/test/functional/es_archives/ml/farequote');
          break;

-        case 'artificial_logs_with_spike':
-        case 'artificial_logs_with_dip':
+        case 'artificial_logs_with_spike_notextfield':
+        case 'artificial_logs_with_spike_textfield':
+        case 'artificial_logs_with_dip_notextfield':
+        case 'artificial_logs_with_dip_textfield':
          try {
            await es.indices.delete({
              index: dataGenerator,