[ML] Switch from normal sampling to random sampler for Index data visualizer table (#144646)

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
2025-04-23 17:28:26 -04:00 · 2022-11-16 08:36:55 -06:00 · 2022-11-16 08:36:55 -06:00 · 22d0fa742d
commit 22d0fa742d
parent 8a6e91b23f
39 changed files with 1330 additions and 656 deletions
--- a/src/plugins/discover/public/application/main/components/field_stats_table/field_stats_table.tsx
+++ b/src/plugins/discover/public/application/main/components/field_stats_table/field_stats_table.tsx
@ -24,6 +24,24 @@ import { useDiscoverServices } from '../../../../hooks/use_discover_services';
 import { FIELD_STATISTICS_LOADED } from './constants';
 import type { GetStateReturn } from '../../services/discover_state';
 import { AvailableFields$, DataRefetch$, DataTotalHits$ } from '../../hooks/use_saved_search';
+export interface RandomSamplingOption {
+  mode: 'random_sampling';
+  seed: string;
+  probability: number;
+}
+
+export interface NormalSamplingOption {
+  mode: 'normal_sampling';
+  seed: string;
+  shardSize: number;
+}
+
+export interface NoSamplingOption {
+  mode: 'no_sampling';
+  seed: string;
+}
+
+export type SamplingOption = RandomSamplingOption | NormalSamplingOption | NoSamplingOption;

 export interface DataVisualizerGridEmbeddableInput extends EmbeddableInput {
  dataView: DataView;
@ -39,6 +57,7 @@ export interface DataVisualizerGridEmbeddableInput extends EmbeddableInput {
  sessionId?: string;
  fieldsToFetch?: string[];
  totalDocuments?: number;
+  samplingOption?: SamplingOption;
 }
 export interface DataVisualizerGridEmbeddableOutput extends EmbeddableOutput {
  showDistributions?: boolean;
@ -163,6 +182,11 @@ export const FieldStatisticsTable = (props: FieldStatisticsTableProps) => {
        totalDocuments: savedSearchDataTotalHits$
          ? savedSearchDataTotalHits$.getValue()?.result
          : undefined,
+        samplingOption: {
+          mode: 'normal_sampling',
+          shardSize: 5000,
+          seed: searchSessionId,
+        } as NormalSamplingOption,
      });
      embeddable.reload();
    }
--- a/x-pack/packages/ml/is_populated_object/src/is_populated_object.ts
+++ b/x-pack/packages/ml/is_populated_object/src/is_populated_object.ts
@ -22,10 +22,10 @@
 *      Otherwise you'd just satisfy TS requirements but might still
 *      run into runtime issues.
 */
-export const isPopulatedObject = <U extends string = string>(
+export const isPopulatedObject = <U extends string = string, T extends unknown = unknown>(
  arg: unknown,
  requiredAttributes: U[] = []
-): arg is Record<U, unknown> => {
+): arg is Record<U, T> => {
  return (
    typeof arg === 'object' &&
    arg !== null &&
--- a/x-pack/plugins/data_visualizer/common/types/field_request_config.ts
+++ b/x-pack/plugins/data_visualizer/common/types/field_request_config.ts
@ -64,9 +64,7 @@ export interface FieldVisStats {
  max?: number;
  median?: number;
  min?: number;
-  topValues?: Array<{ key: number | string; doc_count: number }>;
-  topValuesSampleSize?: number;
-  topValuesSamplerShardSize?: number;
+  topValues?: Array<{ key: number | string; doc_count: number; percent: number }>;
  examples?: Array<string | GeoPointExample | object>;
  timeRangeEarliest?: number;
  timeRangeLatest?: number;
--- a/x-pack/plugins/data_visualizer/common/types/field_stats.ts
+++ b/x-pack/plugins/data_visualizer/common/types/field_stats.ts
@ -11,6 +11,25 @@ import { IKibanaSearchResponse } from '@kbn/data-plugin/common';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';
 import { TimeBucketsInterval } from '../services/time_buckets';

+export interface RandomSamplingOption {
+  mode: 'random_sampling';
+  seed: string;
+  probability: number;
+}
+
+export interface NormalSamplingOption {
+  mode: 'normal_sampling';
+  seed: string;
+  shardSize: number;
+}
+
+export interface NoSamplingOption {
+  mode: 'no_sampling';
+  seed: string;
+}
+
+export type SamplingOption = RandomSamplingOption | NormalSamplingOption | NoSamplingOption;
+
 export interface FieldData {
  fieldName: string;
  existsInDocs: boolean;
@ -54,7 +73,7 @@ export const isIKibanaSearchResponse = (arg: unknown): arg is IKibanaSearchRespo

 export interface NumericFieldStats {
  fieldName: string;
-  count: number;
+  count?: number;
  min: number;
  max: number;
  avg: number;
@ -86,7 +105,8 @@ export interface BooleanFieldStats {
  count: number;
  trueCount: number;
  falseCount: number;
-  [key: string]: number | string;
+  topValues: Bucket[];
+  topValuesSampleSize: number;
 }

 export interface DocumentCountStats {
@ -186,6 +206,9 @@ export interface FieldStatsCommonRequestParams {
  intervalMs?: number;
  query: estypes.QueryDslQueryContainer;
  maxExamples?: number;
+  samplingProbability: number | null;
+  browserSessionSeed: number;
+  samplingOption: SamplingOption;
 }

 export interface OverallStatsSearchStrategyParams {
@ -202,6 +225,8 @@ export interface OverallStatsSearchStrategyParams {
  aggregatableFields: string[];
  nonAggregatableFields: string[];
  fieldsToFetch?: string[];
+  browserSessionSeed: number;
+  samplingOption: SamplingOption;
 }

 export interface FieldStatsSearchStrategyReturnBase {
@ -238,3 +263,20 @@ export interface Field {
 export interface Aggs {
  [key: string]: estypes.AggregationsAggregationContainer;
 }
+
+export const EMBEDDABLE_SAMPLER_OPTION = {
+  RANDOM: 'random_sampling',
+  NORMAL: 'normal_sampling',
+};
+export type FieldStatsEmbeddableSamplerOption =
+  typeof EMBEDDABLE_SAMPLER_OPTION[keyof typeof EMBEDDABLE_SAMPLER_OPTION];
+
+export function isRandomSamplingOption(arg: SamplingOption): arg is RandomSamplingOption {
+  return arg.mode === 'random_sampling';
+}
+export function isNormalSamplingOption(arg: SamplingOption): arg is NormalSamplingOption {
+  return arg.mode === 'normal_sampling';
+}
+export function isNoSamplingOption(arg: SamplingOption): arg is NoSamplingOption {
+  return arg.mode === 'no_sampling' || (arg.mode === 'random_sampling' && arg.probability === 1);
+}
--- a/x-pack/plugins/data_visualizer/public/application/common/components/document_count_content/document_count_content.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/document_count_content/document_count_content.tsx
@ -20,7 +20,7 @@ import {
  EuiFormRow,
 } from '@elastic/eui';
 import { i18n } from '@kbn/i18n';
-import { sortedIndex } from 'lodash';
+import { debounce, sortedIndex } from 'lodash';
 import { FormattedMessage } from '@kbn/i18n-react';
 import { isDefined } from '../../util/is_defined';
 import type { DocumentCountChartPoint } from './document_count_chart';
@ -64,6 +64,24 @@ export const DocumentCountContent: FC<Props> = ({
    setShowSamplingOptionsPopover(false);
  }, [setShowSamplingOptionsPopover]);

+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  const updateSamplingProbability = useCallback(
+    debounce((newProbability: number) => {
+      if (setSamplingProbability) {
+        const idx = sortedIndex(RANDOM_SAMPLER_PROBABILITIES, newProbability);
+        const closestPrev = RANDOM_SAMPLER_PROBABILITIES[idx - 1];
+        const closestNext = RANDOM_SAMPLER_PROBABILITIES[idx];
+        const closestProbability =
+          Math.abs(closestPrev - newProbability) < Math.abs(closestNext - newProbability)
+            ? closestPrev
+            : closestNext;
+
+        setSamplingProbability(closestProbability / 100);
+      }
+    }, 100),
+    [setSamplingProbability]
+  );
+
  const calloutInfoMessage = useMemo(() => {
    switch (randomSamplerPreference) {
      case RANDOM_SAMPLER_OPTION.OFF:
@ -125,7 +143,7 @@ export const DocumentCountContent: FC<Props> = ({
    <>
      <EuiFlexGroup alignItems="center" gutterSize="xs">
        <TotalCountHeader totalCount={totalCount} approximate={approximate} loading={loading} />
-        <EuiFlexItem grow={false}>
+        <EuiFlexItem grow={false} style={{ marginLeft: 'auto' }}>
          <EuiPopover
            data-test-subj="dvRandomSamplerOptionsPopover"
            id="dataVisualizerSamplingOptions"
@ -199,21 +217,7 @@ export const DocumentCountContent: FC<Props> = ({
                        value: d,
                        label: d === 0.001 || d >= 5 ? `${d}%` : '',
                      }))}
-                      onChange={(e) => {
-                        const newProbability = Number(e.currentTarget.value);
-                        const idx = sortedIndex(RANDOM_SAMPLER_PROBABILITIES, newProbability);
-                        const closestPrev = RANDOM_SAMPLER_PROBABILITIES[idx - 1];
-                        const closestNext = RANDOM_SAMPLER_PROBABILITIES[idx];
-                        const closestProbability =
-                          Math.abs(closestPrev - newProbability) <
-                          Math.abs(closestNext - newProbability)
-                            ? closestPrev
-                            : closestNext;
-
-                        if (setSamplingProbability) {
-                          setSamplingProbability(closestProbability / 100);
-                        }
-                      }}
+                      onChange={(e) => updateSamplingProbability(Number(e.currentTarget.value))}
                      step={RANDOM_SAMPLER_STEP}
                      data-test-subj="dvRandomSamplerProbabilityRange"
                    />
--- a/x-pack/plugins/data_visualizer/public/application/common/components/fields_stats_grid/fields_stats_grid.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/fields_stats_grid/fields_stats_grid.tsx
@ -112,6 +112,7 @@ export const FieldsStatsGrid: FC<Props> = ({ results }) => {
        pageState={dataVisualizerListState}
        updatePageState={setDataVisualizerListState}
        getItemIdToExpandedRowMap={getItemIdToExpandedRowMap}
+        overallStatsRunning={false}
      />
    </div>
  );
--- a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/boolean_content.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/boolean_content.tsx
@ -10,7 +10,6 @@ import { EuiSpacer } from '@elastic/eui';
 import { Axis, BarSeries, Chart, Settings, ScaleType } from '@elastic/charts';

 import { FormattedMessage } from '@kbn/i18n-react';
-import { i18n } from '@kbn/i18n';
 import { TopValues } from '../../../top_values';
 import type { FieldDataRowProps } from '../../types/field_data_row';
 import { ExpandedRowFieldHeader } from '../expanded_row_field_header';
@ -45,32 +44,13 @@ export const BooleanContent: FC<FieldDataRowProps> = ({ config, onAddFilter }) =
  const theme = useDataVizChartTheme();
  if (!formattedPercentages) return null;

-  const { trueCount, falseCount, count } = formattedPercentages;
-  const stats = {
-    ...config.stats,
-    topValues: [
-      {
-        key: i18n.translate(
-          'xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel',
-          { defaultMessage: 'true' }
-        ),
-        doc_count: trueCount ?? 0,
-      },
-      {
-        key: i18n.translate(
-          'xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel',
-          { defaultMessage: 'false' }
-        ),
-        doc_count: falseCount ?? 0,
-      },
-    ],
-  };
+  const { count } = formattedPercentages;
  return (
    <ExpandedRowContent dataTestSubj={'dataVisualizerBooleanContent'}>
      <DocumentStatsTable config={config} />

      <TopValues
-        stats={stats}
+        stats={config.stats}
        fieldFormat={fieldFormat}
        barColor="success"
        onAddFilter={onAddFilter}
--- a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/choropleth_map.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/choropleth_map.tsx
@ -6,7 +6,7 @@
 */

 import React, { FC, useMemo } from 'react';
-import { EuiSpacer, EuiText, htmlIdGenerator } from '@elastic/eui';
+import { EuiText, htmlIdGenerator } from '@elastic/eui';
 import { i18n } from '@kbn/i18n';
 import { FormattedMessage } from '@kbn/i18n-react';
 import {
@ -18,6 +18,8 @@ import {
  VectorLayerDescriptor,
 } from '@kbn/maps-plugin/common';
 import { EMSTermJoinConfig } from '@kbn/maps-plugin/public';
+import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types';
+import { useDataVisualizerKibana } from '../../../../../kibana_context';
 import { EmbeddedMapComponent } from '../../../embedded_map';
 import { FieldVisStats } from '../../../../../../../common/types';
 import { ExpandedRowPanel } from './expanded_row_panel';
@ -97,13 +99,59 @@ interface Props {
 }

 export const ChoroplethMap: FC<Props> = ({ stats, suggestion }) => {
-  const { fieldName, isTopValuesSampled, topValues, topValuesSamplerShardSize } = stats!;
+  const {
+    services: {
+      data: { fieldFormats },
+    },
+  } = useDataVisualizerKibana();
+
+  const { fieldName, isTopValuesSampled, topValues, sampleCount } = stats!;

  const layerList: VectorLayerDescriptor[] = useMemo(
    () => [getChoroplethTopValuesLayer(fieldName || '', topValues || [], suggestion)],
    [suggestion, fieldName, topValues]
  );

+  if (!stats) return null;
+
+  const totalDocuments = stats.totalDocuments ?? sampleCount ?? 0;
+
+  const countsElement = totalDocuments ? (
+    <EuiText color="subdued" size="xs">
+      {isTopValuesSampled ? (
+        <FormattedMessage
+          id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleRecordsLabel"
+          defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
+          values={{
+            sampledDocuments: sampleCount,
+            sampledDocumentsFormatted: (
+              <strong>
+                {fieldFormats
+                  .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
+                  .convert(sampleCount)}
+              </strong>
+            ),
+          }}
+        />
+      ) : (
+        <FormattedMessage
+          id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromTotalRecordsLabel"
+          defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
+          values={{
+            totalDocuments,
+            totalDocumentsFormatted: (
+              <strong>
+                {fieldFormats
+                  .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
+                  .convert(totalDocuments ?? 0)}
+              </strong>
+            ),
+          }}
+        />
+      )}
+    </EuiText>
+  ) : null;
+
  return (
    <ExpandedRowPanel
      dataTestSubj={'fileDataVisualizerChoroplethMapTopValues'}
@ -114,20 +162,7 @@ export const ChoroplethMap: FC<Props> = ({ stats, suggestion }) => {
        <EmbeddedMapComponent layerList={layerList} />
      </div>

-      {isTopValuesSampled === true && (
-        <div>
-          <EuiSpacer size={'s'} />
-          <EuiText size="xs" textAlign={'center'}>
-            <FormattedMessage
-              id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription"
-              defaultMessage="Calculated from sample of {topValuesSamplerShardSize} documents per shard"
-              values={{
-                topValuesSamplerShardSize,
-              }}
-            />
-          </EuiText>
-        </div>
-      )}
+      {countsElement}
    </ExpandedRowPanel>
  );
 };
--- a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/document_stats.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_expanded_row/document_stats.tsx
@ -10,7 +10,7 @@ import React, { FC, ReactNode } from 'react';
 import { i18n } from '@kbn/i18n';
 import { EuiBasicTable, HorizontalAlignment, LEFT_ALIGNMENT, RIGHT_ALIGNMENT } from '@elastic/eui';
 import { ExpandedRowFieldHeader } from '../expanded_row_field_header';
-import { FieldDataRowProps } from '../../types';
+import { FieldDataRowProps, isIndexBasedFieldVisConfig } from '../../types';
 import { roundToDecimalPlace } from '../../../utils';
 import { ExpandedRowPanel } from './expanded_row_panel';

@ -46,6 +46,13 @@ export const DocumentStatsTable: FC<FieldDataRowProps> = ({ config }) => {
  )
    return null;
  const { cardinality, count, sampleCount } = config.stats;
+
+  const valueCount =
+    count ?? (isIndexBasedFieldVisConfig(config) && config.existsInDocs === true ? undefined : 0);
+  const docsPercent =
+    valueCount !== undefined && sampleCount !== undefined
+      ? roundToDecimalPlace((valueCount / sampleCount) * 100)
+      : undefined;
  const metaTableItems = [
    {
      function: 'count',
@ -57,16 +64,20 @@ export const DocumentStatsTable: FC<FieldDataRowProps> = ({ config }) => {
      ),
      value: count,
    },
-    {
-      function: 'percentage',
-      display: (
-        <FormattedMessage
-          id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.percentageLabel"
-          defaultMessage="percentage"
-        />
-      ),
-      value: `${roundToDecimalPlace((count / sampleCount) * 100)}%`,
-    },
+    ...(docsPercent !== undefined
+      ? [
+          {
+            function: 'percentage',
+            display: (
+              <FormattedMessage
+                id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.percentageLabel"
+                defaultMessage="percentage"
+              />
+            ),
+            value: `${docsPercent}%`,
+          },
+        ]
+      : []),
    {
      function: 'distinctValues',
      display: (
--- a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_row/document_stats.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/components/field_data_row/document_stats.tsx
@ -8,32 +8,46 @@
 import { EuiIcon, EuiText } from '@elastic/eui';

 import React from 'react';
+import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types';
+import { useDataVisualizerKibana } from '../../../../../kibana_context';
+import { isIndexBasedFieldVisConfig } from '../../../../../../../common/types/field_vis_config';
 import type { FieldDataRowProps } from '../../types/field_data_row';
 import { roundToDecimalPlace } from '../../../utils';
-import { isIndexBasedFieldVisConfig } from '../../types';

 interface Props extends FieldDataRowProps {
  showIcon?: boolean;
+  totalCount?: number;
 }
-export const DocumentStat = ({ config, showIcon }: Props) => {
+export const DocumentStat = ({ config, showIcon, totalCount }: Props) => {
  const { stats } = config;
+  const {
+    services: {
+      data: { fieldFormats },
+    },
+  } = useDataVisualizerKibana();
+
  if (stats === undefined) return null;
+
  const { count, sampleCount } = stats;
+  const total = sampleCount ?? totalCount;

  // If field exists is docs but we don't have count stats then don't show
  // Otherwise if field doesn't appear in docs at all, show 0%
-  const docsCount =
+  const valueCount =
    count ?? (isIndexBasedFieldVisConfig(config) && config.existsInDocs === true ? undefined : 0);
  const docsPercent =
-    docsCount !== undefined && sampleCount !== undefined
-      ? roundToDecimalPlace((docsCount / sampleCount) * 100)
-      : 0;
+    valueCount !== undefined && total !== undefined
+      ? `(${roundToDecimalPlace((valueCount / total) * 100)}%)`
+      : null;

-  return docsCount !== undefined ? (
+  return valueCount !== undefined ? (
    <>
      {showIcon ? <EuiIcon type="document" size={'m'} className={'columnHeader__icon'} /> : null}
      <EuiText size={'xs'}>
-        {docsCount} ({docsPercent}%)
+        {fieldFormats
+          .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
+          .convert(valueCount)}{' '}
+        {docsPercent}
      </EuiText>
    </>
  ) : null;
--- a/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/data_visualizer_stats_table.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/stats_table/data_visualizer_stats_table.tsx
@ -60,6 +60,8 @@ interface DataVisualizerTableProps<T> {
  /** Callback to receive any updates when table or page state is changed **/
  onChange?: (update: Partial<DataVisualizerTableState>) => void;
  loading?: boolean;
+  totalCount?: number;
+  overallStatsRunning: boolean;
 }

 export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
@ -71,6 +73,8 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
  showPreviewByDefault,
  onChange,
  loading,
+  totalCount,
+  overallStatsRunning,
 }: DataVisualizerTableProps<T>) => {
  const { euiTheme } = useEuiTheme();

@ -217,12 +221,40 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
      },
      {
        field: 'docCount',
-        name: i18n.translate('xpack.dataVisualizer.dataGrid.documentsCountColumnName', {
-          defaultMessage: 'Documents (%)',
-        }),
-        render: (value: number | undefined, item: DataVisualizerTableItem) => (
-          <DocumentStat config={item} showIcon={dimensions.showIcon} />
+        name: (
+          <div className={'columnHeader__title'}>
+            {i18n.translate('xpack.dataVisualizer.dataGrid.documentsCountColumnName', {
+              defaultMessage: 'Documents (%)',
+            })}
+            {
+              <EuiToolTip
+                content={i18n.translate(
+                  'xpack.dataVisualizer.dataGrid.documentsCountColumnTooltip',
+                  {
+                    defaultMessage:
+                      'Document count found is based on a smaller set of sampled records.',
+                  }
+                )}
+              >
+                <EuiIcon type="questionInCircle" />
+              </EuiToolTip>
+            }
+          </div>
        ),
+
+        render: (value: number | undefined, item: DataVisualizerTableItem) => {
+          if (overallStatsRunning) {
+            return (
+              <EuiText textAlign="center">
+                <EuiLoadingSpinner size="s" />
+              </EuiText>
+            );
+          }
+
+          return (
+            <DocumentStat config={item} showIcon={dimensions.showIcon} totalCount={totalCount} />
+          );
+        },
        sortable: (item: DataVisualizerTableItem) => item?.stats?.count,
        align: LEFT_ALIGNMENT as HorizontalAlignment,
        'data-test-subj': 'dataVisualizerTableColumnDocumentsCount',
@ -233,9 +265,19 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
        name: i18n.translate('xpack.dataVisualizer.dataGrid.distinctValuesColumnName', {
          defaultMessage: 'Distinct values',
        }),
-        render: (_: undefined, item: DataVisualizerTableItem) => (
-          <DistinctValues cardinality={item?.stats?.cardinality} showIcon={dimensions.showIcon} />
-        ),
+        render: (_: undefined, item: DataVisualizerTableItem) => {
+          if (overallStatsRunning) {
+            return (
+              <EuiText textAlign="center">
+                <EuiLoadingSpinner size="s" />
+              </EuiText>
+            );
+          }
+
+          return (
+            <DistinctValues cardinality={item?.stats?.cardinality} showIcon={dimensions.showIcon} />
+          );
+        },
        sortable: (item: DataVisualizerTableItem) => item?.stats?.cardinality,
        align: LEFT_ALIGNMENT as HorizontalAlignment,
        'data-test-subj': 'dataVisualizerTableColumnDistinctValues',
@ -333,6 +375,7 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
    extendedColumns,
    dimensions.breakPoint,
    toggleExpandAll,
+    overallStatsRunning,
  ]);

  const itemIdToExpandedRowMap = useMemo(() => {
--- a/x-pack/plugins/data_visualizer/public/application/common/components/top_values/top_values.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/common/components/top_values/top_values.tsx
@ -36,8 +36,7 @@ interface Props {
  onAddFilter?: (field: DataViewField | string, value: string, type: '+' | '-') => void;
 }

-function getPercentLabel(docCount: number, topValuesSampleSize: number): string {
-  const percent = (100 * docCount) / topValuesSampleSize;
+function getPercentLabel(percent: number): string {
  if (percent >= 0.1) {
    return `${roundToDecimalPlace(percent, 1)}%`;
  } else {
@ -47,76 +46,54 @@ function getPercentLabel(docCount: number, topValuesSampleSize: number): string

 export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed, onAddFilter }) => {
  const {
-    services: { data },
+    services: {
+      data: { fieldFormats },
+    },
  } = useDataVisualizerKibana();

-  const { fieldFormats } = data;
-
  if (stats === undefined || !stats.topValues) return null;
-  const {
-    topValues,
-    topValuesSampleSize,
-    count,
-    isTopValuesSampled,
-    fieldName,
-    sampleCount,
-    topValuesSamplerShardSize,
-  } = stats;
+  const { topValues, fieldName, sampleCount } = stats;

-  const totalDocuments = stats.totalDocuments;
+  const totalDocuments = stats.totalDocuments ?? sampleCount ?? 0;
+  const topValuesOtherCountPercent =
+    1 - (topValues ? topValues.reduce((acc, bucket) => acc + bucket.percent, 0) : 0);
+  const topValuesOtherCount = Math.floor(topValuesOtherCountPercent * (sampleCount ?? 0));

-  const progressBarMax = isTopValuesSampled === true ? topValuesSampleSize : count;
-
-  const topValuesOtherCount =
-    (progressBarMax ?? 0) -
-    (topValues ? topValues.map((value) => value.doc_count).reduce((v, acc) => acc + v, 0) : 0);
-
-  const countsElement =
-    totalDocuments !== undefined ? (
-      <EuiText color="subdued" size="xs">
-        {isTopValuesSampled ? (
-          <FormattedMessage
-            id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleRecordsLabel"
-            defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
-            values={{
-              sampledDocuments: sampleCount,
-              sampledDocumentsFormatted: (
-                <strong>
-                  {fieldFormats
-                    .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
-                    .convert(sampleCount)}
-                </strong>
-              ),
-            }}
-          />
-        ) : (
-          <FormattedMessage
-            id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromTotalRecordsLabel"
-            defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
-            values={{
-              totalDocuments,
-              totalDocumentsFormatted: (
-                <strong>
-                  {fieldFormats
-                    .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
-                    .convert(totalDocuments ?? 0)}
-                </strong>
-              ),
-            }}
-          />
-        )}
-      </EuiText>
-    ) : (
-      <EuiText size="xs" textAlign={'center'}>
+  const countsElement = (
+    <EuiText color="subdued" size="xs">
+      {totalDocuments > (sampleCount ?? 0) ? (
        <FormattedMessage
-          id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription"
-          defaultMessage="Calculated from sample of {topValuesSamplerShardSize} documents per shard"
+          id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleRecordsLabel"
+          defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
          values={{
-            topValuesSamplerShardSize,
+            sampledDocuments: sampleCount,
+            sampledDocumentsFormatted: (
+              <strong>
+                {fieldFormats
+                  .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
+                  .convert(sampleCount)}
+              </strong>
+            ),
          }}
        />
-      </EuiText>
-    );
+      ) : (
+        <FormattedMessage
+          id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromTotalRecordsLabel"
+          defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
+          values={{
+            totalDocuments,
+            totalDocumentsFormatted: (
+              <strong>
+                {fieldFormats
+                  .getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
+                  .convert(totalDocuments ?? 0)}
+              </strong>
+            ),
+          }}
+        />
+      )}
+    </EuiText>
+  );

  return (
    <ExpandedRowPanel
@ -139,15 +116,15 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
              <EuiFlexGroup gutterSize="xs" alignItems="center" key={value.key}>
                <EuiFlexItem data-test-subj="dataVisualizerFieldDataTopValueBar">
                  <EuiProgress
-                    value={value.doc_count}
-                    max={progressBarMax}
+                    value={value.percent}
+                    max={1}
                    color={barColor}
                    size="xs"
                    label={kibanaFieldFormat(value.key, fieldFormat)}
                    className={classNames('eui-textTruncate', 'topValuesValueLabelContainer')}
                    valueText={`${value.doc_count}${
-                      progressBarMax !== undefined
-                        ? ` (${getPercentLabel(value.doc_count, progressBarMax)})`
+                      totalDocuments !== undefined
+                        ? ` (${getPercentLabel(value.percent * 100)})`
                        : ''
                    }`}
                  />
@ -222,7 +199,7 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
            <EuiFlexItem data-test-subj="dataVisualizerFieldDataTopValueBar">
              <EuiProgress
                value={topValuesOtherCount}
-                max={progressBarMax}
+                max={totalDocuments}
                color={barColor}
                size="xs"
                label={
@ -233,8 +210,8 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
                }
                className={classNames('eui-textTruncate', 'topValuesValueLabelContainer')}
                valueText={`${topValuesOtherCount}${
-                  progressBarMax !== undefined
-                    ? ` (${getPercentLabel(topValuesOtherCount, progressBarMax)})`
+                  totalDocuments !== undefined
+                    ? ` (${getPercentLabel(topValuesOtherCountPercent * 100)})`
                    : ''
                }`}
              />
@ -249,12 +226,10 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
          </EuiFlexGroup>
        ) : null}

-        {isTopValuesSampled === true && (
-          <Fragment>
-            <EuiSpacer size="xs" />
-            {countsElement}
-          </Fragment>
-        )}
+        <Fragment>
+          <EuiSpacer size="xs" />
+          {countsElement}
+        </Fragment>
      </div>
    </ExpandedRowPanel>
  );
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/components/index_data_visualizer_view/index_data_visualizer_view.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/components/index_data_visualizer_view/index_data_visualizer_view.tsx
@ -551,8 +551,10 @@ export const IndexDataVisualizerView: FC<IndexDataVisualizerViewProps> = (dataVi
                  getItemIdToExpandedRowMap={getItemIdToExpandedRowMap}
                  extendedColumns={extendedColumns}
                  loading={progress < 100}
+                  overallStatsRunning={overallStatsProgress.isRunning}
                  showPreviewByDefault={dataVisualizerListState.showDistributions ?? true}
                  onChange={setDataVisualizerListState}
+                  totalCount={overallStats.totalCount}
                />
              </EuiPanel>
            </EuiFlexItem>
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/components/search_panel/search_panel.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/components/search_panel/search_panel.tsx
@ -11,8 +11,8 @@ import { i18n } from '@kbn/i18n';
 import { Query, Filter } from '@kbn/es-query';
 import type { TimeRange } from '@kbn/es-query';
 import { DataView, DataViewField } from '@kbn/data-views-plugin/public';
+import { css } from '@emotion/react';
 import { isDefined } from '../../../common/util/is_defined';
-import { ShardSizeFilter } from './shard_size_select';
 import { DataVisualizerFieldNamesFilter } from './field_name_filter';
 import { DataVisualizerFieldTypeFilter } from './field_type_filter';
 import { SupportedFieldType } from '../../../../../common/types';
@ -147,12 +147,15 @@ export const SearchPanel: FC<Props> = ({
        />
      </EuiFlexItem>

-      <EuiFlexItem grow={2} className={'dvSearchPanel__controls'}>
-        <ShardSizeFilter
-          samplerShardSize={samplerShardSize}
-          setSamplerShardSize={setSamplerShardSize}
-        />
-
+      <EuiFlexItem
+        grow={2}
+        className={'dvSearchPanel__controls'}
+        css={css`
+          margin-left: 0px !important;
+          padding-left: 0px !important;
+          padding-right: 0px !important;
+        `}
+      >
        <DataVisualizerFieldNamesFilter
          overallStats={overallStats}
          setVisibleFieldNames={setVisibleFieldNames}
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/components/search_panel/shard_size_select.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/components/search_panel/shard_size_select.tsx
@ -1,66 +0,0 @@
-/*
- * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
- * or more contributor license agreements. Licensed under the Elastic License
- * 2.0; you may not use this file except in compliance with the Elastic License
- * 2.0.
- */
-
-import { EuiFlexGroup, EuiFlexItem, EuiIconTip, EuiSuperSelect } from '@elastic/eui';
-import { i18n } from '@kbn/i18n';
-import React, { FC } from 'react';
-import { FormattedMessage } from '@kbn/i18n-react';
-
-interface Props {
-  samplerShardSize: number;
-  setSamplerShardSize(s: number): void;
-}
-
-const searchSizeOptions = [1000, 5000, 10000, 100000, -1].map((v) => {
-  return {
-    value: String(v),
-    inputDisplay:
-      v > 0 ? (
-        <span data-test-subj={`dataVisualizerShardSizeOption ${v}`}>
-          <FormattedMessage
-            id="xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel"
-            defaultMessage="Sample size (per shard): {wrappedValue}"
-            values={{ wrappedValue: <b>{v}</b> }}
-          />
-        </span>
-      ) : (
-        <span data-test-subj={`dataVisualizerShardSizeOption all`}>
-          <FormattedMessage
-            id="xpack.dataVisualizer.searchPanel.allOptionLabel"
-            defaultMessage="Search all"
-          />
-        </span>
-      ),
-  };
-});
-
-export const ShardSizeFilter: FC<Props> = ({ samplerShardSize, setSamplerShardSize }) => {
-  return (
-    <EuiFlexGroup alignItems="center" gutterSize="s" responsive={false}>
-      <EuiFlexItem grow={false} style={{ width: 310 }}>
-        <EuiSuperSelect
-          options={searchSizeOptions}
-          valueOfSelected={String(samplerShardSize)}
-          onChange={(value) => setSamplerShardSize(+value)}
-          aria-label={i18n.translate('xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel', {
-            defaultMessage: 'Select number of documents to sample',
-          })}
-          data-test-subj="dataVisualizerShardSizeSelect"
-        />
-      </EuiFlexItem>
-      <EuiFlexItem grow={false}>
-        <EuiIconTip
-          content={i18n.translate('xpack.dataVisualizer.searchPanel.queryBarPlaceholder', {
-            defaultMessage:
-              'Selecting a smaller sample size will reduce query run times and the load on the cluster.',
-          })}
-          position="right"
-        />
-      </EuiFlexItem>
-    </EuiFlexGroup>
-  );
-};
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/embeddables/grid_embeddable/grid_embeddable.tsx
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/embeddables/grid_embeddable/grid_embeddable.tsx
@ -24,6 +24,7 @@ import { KibanaContextProvider, KibanaThemeProvider } from '@kbn/kibana-react-pl
 import type { Query } from '@kbn/es-query';
 import { DataView, DataViewField } from '@kbn/data-views-plugin/public';
 import { SavedSearch } from '@kbn/discover-plugin/public';
+import { SamplingOption } from '../../../../../common/types/field_stats';
 import { DATA_VISUALIZER_GRID_EMBEDDABLE_TYPE } from './constants';
 import { EmbeddableLoading } from './embeddable_loading_fallback';
 import { DataVisualizerStartDependencies } from '../../../../plugin';
@ -34,7 +35,7 @@ import {
 import { FieldVisConfig } from '../../../common/components/stats_table/types';
 import { getDefaultDataVisualizerListState } from '../../components/index_data_visualizer_view/index_data_visualizer_view';
 import type { DataVisualizerTableState, SavedSearchSavedObject } from '../../../../../common/types';
-import { DataVisualizerIndexBasedAppState } from '../../types/index_data_visualizer_state';
+import type { DataVisualizerIndexBasedAppState } from '../../types/index_data_visualizer_state';
 import { IndexBasedDataVisualizerExpandedRow } from '../../../common/components/expanded_row/index_based_expanded_row';
 import { useDataVisualizerGridData } from '../../hooks/use_data_visualizer_grid_data';

@ -55,6 +56,7 @@ export interface DataVisualizerGridInput {
  sessionId?: string;
  fieldsToFetch?: string[];
  totalDocuments?: number;
+  samplingOption?: SamplingOption;
 }
 export type DataVisualizerGridEmbeddableInput = EmbeddableInput & DataVisualizerGridInput;
 export type DataVisualizerGridEmbeddableOutput = EmbeddableOutput;
@ -83,8 +85,15 @@ export const EmbeddableWrapper = ({
    [dataVisualizerListState, onOutputChange]
  );

-  const { configs, searchQueryLanguage, searchString, extendedColumns, progress, setLastRefresh } =
-    useDataVisualizerGridData(input, dataVisualizerListState);
+  const {
+    configs,
+    searchQueryLanguage,
+    searchString,
+    extendedColumns,
+    progress,
+    overallStatsProgress,
+    setLastRefresh,
+  } = useDataVisualizerGridData(input, dataVisualizerListState);

  useEffect(() => {
    setLastRefresh(Date.now());
@ -143,6 +152,7 @@ export const EmbeddableWrapper = ({
      showPreviewByDefault={input?.showPreviewByDefault}
      onChange={onOutputChange}
      loading={progress < 100}
+      overallStatsRunning={overallStatsProgress.isRunning}
    />
  );
 };
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/hooks/use_data_visualizer_grid_data.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/hooks/use_data_visualizer_grid_data.ts
@ -5,22 +5,23 @@
 * 2.0.
 */

-import { Required } from 'utility-types';
+import type { Required } from 'utility-types';
 import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
 import { merge } from 'rxjs';
-import { EuiTableActionsColumnType } from '@elastic/eui/src/components/basic_table/table_types';
+import type { EuiTableActionsColumnType } from '@elastic/eui';
 import { i18n } from '@kbn/i18n';
 import { DataViewField, KBN_FIELD_TYPES, UI_SETTINGS } from '@kbn/data-plugin/common';
 import seedrandom from 'seedrandom';
-import { RandomSamplerOption } from '../constants/random_sampler';
-import { DataVisualizerIndexBasedAppState } from '../types/index_data_visualizer_state';
+import type { SamplingOption } from '@kbn/discover-plugin/public/application/main/components/field_stats_table/field_stats_table';
+import type { RandomSamplerOption } from '../constants/random_sampler';
+import type { DataVisualizerIndexBasedAppState } from '../types/index_data_visualizer_state';
 import { useDataVisualizerKibana } from '../../kibana_context';
 import { getEsQueryFromSavedSearch } from '../utils/saved_search_utils';
-import { MetricFieldsStats } from '../../common/components/stats_table/components/field_count_stats';
+import type { MetricFieldsStats } from '../../common/components/stats_table/components/field_count_stats';
 import { useTimefilter } from './use_time_filter';
 import { dataVisualizerRefresh$ } from '../services/timefilter_refresh_service';
 import { TimeBuckets } from '../../../../common/services/time_buckets';
-import { FieldVisConfig } from '../../common/components/stats_table/types';
+import type { FieldVisConfig } from '../../common/components/stats_table/types';
 import {
  SUPPORTED_FIELD_TYPES,
  NON_AGGREGATABLE_FIELD_TYPES,
@ -29,13 +30,13 @@ import {
 import type { FieldRequestConfig, SupportedFieldType } from '../../../../common/types';
 import { kbnTypeToJobType } from '../../common/util/field_types_utils';
 import { getActions } from '../../common/components/field_data_row/action_menu';
-import { DataVisualizerGridInput } from '../embeddables/grid_embeddable/grid_embeddable';
+import type { DataVisualizerGridInput } from '../embeddables/grid_embeddable/grid_embeddable';
 import { getDefaultPageState } from '../components/index_data_visualizer_view/index_data_visualizer_view';
 import { useFieldStatsSearchStrategy } from './use_field_stats';
 import { useOverallStats } from './use_overall_stats';
-import { OverallStatsSearchStrategyParams } from '../../../../common/types/field_stats';
-import { Dictionary } from '../../common/util/url_state';
-import { AggregatableField, NonAggregatableField } from '../types/overall_stats';
+import type { OverallStatsSearchStrategyParams } from '../../../../common/types/field_stats';
+import type { Dictionary } from '../../common/util/url_state';
+import type { AggregatableField, NonAggregatableField } from '../types/overall_stats';

 const defaults = getDefaultPageState();

@ -43,6 +44,11 @@ function isDisplayField(fieldName: string): boolean {
  return !OMIT_FIELDS.includes(fieldName);
 }

+const DEFAULT_SAMPLING_OPTION: SamplingOption = {
+  mode: 'random_sampling',
+  seed: '',
+  probability: 0,
+};
 export const useDataVisualizerGridData = (
  input: DataVisualizerGridInput,
  dataVisualizerListState: Required<DataVisualizerIndexBasedAppState>,
@ -76,6 +82,7 @@ export const useDataVisualizerGridData = (
    currentFilters,
    visibleFieldNames,
    fieldsToFetch,
+    samplingOption,
  } = useMemo(
    () => ({
      currentSavedSearch: input?.savedSearch,
@ -84,6 +91,8 @@ export const useDataVisualizerGridData = (
      visibleFieldNames: input?.visibleFieldNames ?? [],
      currentFilters: input?.filters,
      fieldsToFetch: input?.fieldsToFetch,
+      /** By default, use random sampling **/
+      samplingOption: input?.samplingOption ?? DEFAULT_SAMPLING_OPTION,
    }),
    [input]
  );
@ -203,6 +212,7 @@ export const useDataVisualizerGridData = (
          }
        }
      });
+
      return {
        earliest,
        latest,
@ -217,6 +227,8 @@ export const useDataVisualizerGridData = (
        aggregatableFields,
        nonAggregatableFields,
        fieldsToFetch,
+        browserSessionSeed,
+        samplingOption: { ...samplingOption, seed: browserSessionSeed.toString() },
      };
    },
    // eslint-disable-next-line react-hooks/exhaustive-deps
@ -226,17 +238,19 @@ export const useDataVisualizerGridData = (
      currentDataView.id,
      // eslint-disable-next-line react-hooks/exhaustive-deps
      JSON.stringify(searchQuery),
+      // eslint-disable-next-line react-hooks/exhaustive-deps
+      JSON.stringify(samplingOption),
      samplerShardSize,
      searchSessionId,
      lastRefresh,
      fieldsToFetch,
+      browserSessionSeed,
    ]
  );

  const { overallStats, progress: overallStatsProgress } = useOverallStats(
    fieldStatsRequest,
    lastRefresh,
-    browserSessionSeed,
    dataVisualizerListState.probability
  );

@ -269,10 +283,20 @@ export const useDataVisualizerGridData = (
    return { metricConfigs: existMetricFields, nonMetricConfigs: existNonMetricFields };
  }, [metricConfigs, nonMetricConfigs, overallStatsProgress.loaded]);

+  const probability = useMemo(
+    () =>
+      // If random sampler probability is already manually selected, or is available from the URL
+      // use that instead of using the probability calculated from the doc count
+      (dataVisualizerListState.probability === null
+        ? overallStats?.documentCountStats?.probability
+        : dataVisualizerListState.probability) ?? 1,
+    [dataVisualizerListState.probability, overallStats?.documentCountStats?.probability]
+  );
  const strategyResponse = useFieldStatsSearchStrategy(
    fieldStatsRequest,
    configsWithoutStats,
-    dataVisualizerListState
+    dataVisualizerListState,
+    probability
  );

  const combinedProgress = useMemo(
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/hooks/use_field_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/hooks/use_field_stats.ts
@ -65,7 +65,8 @@ const createBatchedRequests = (fields: Field[], maxBatchSize = 10) => {
 export function useFieldStatsSearchStrategy(
  searchStrategyParams: OverallStatsSearchStrategyParams | undefined,
  fieldStatsParams: FieldStatsParams | undefined,
-  dataVisualizerListState: DataVisualizerIndexBasedAppState
+  dataVisualizerListState: DataVisualizerIndexBasedAppState,
+  samplingProbability: number | null
 ): FieldStatsSearchStrategyReturnBase {
  const {
    services: {
@ -168,6 +169,9 @@ export function useFieldStatsSearchStrategy(
        },
      },
      maxExamples: MAX_EXAMPLES_DEFAULT,
+      samplingProbability,
+      browserSessionSeed: searchStrategyParams.browserSessionSeed,
+      samplingOption: searchStrategyParams.samplingOption,
    };
    const searchOptions: ISearchOptions = {
      abortSignal: abortCtrl.current.signal,
@ -295,6 +299,7 @@ export function useFieldStatsSearchStrategy(
    dataVisualizerListState.pageIndex,
    dataVisualizerListState.sortDirection,
    dataVisualizerListState.sortField,
+    samplingProbability,
  ]);

  const cancelFetch = useCallback(() => {
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/hooks/use_overall_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/hooks/use_overall_stats.ts
@ -30,14 +30,14 @@ import {
 import type { OverallStats } from '../types/overall_stats';
 import { getDefaultPageState } from '../components/index_data_visualizer_view/index_data_visualizer_view';
 import { extractErrorProperties } from '../utils/error_utils';
-import type {
+import {
  DataStatsFetchProgress,
+  isRandomSamplingOption,
  OverallStatsSearchStrategyParams,
 } from '../../../../common/types/field_stats';
 import { getDocumentCountStats } from '../search_strategy/requests/get_document_stats';
 import { getInitialProgress, getReducer } from '../progress_utils';
 import { MAX_CONCURRENT_REQUESTS } from '../constants/index_data_visualizer_viewer';
-import { DocumentCountStats } from '../../../../common/types/field_stats';

 /**
 * Helper function to run forkJoin
@ -92,7 +92,6 @@ function displayError(toastNotifications: ToastsStart, index: string, err: any)
 export function useOverallStats<TParams extends OverallStatsSearchStrategyParams>(
  searchStrategyParams: TParams | undefined,
  lastRefresh: number,
-  browserSessionSeed: number,
  probability?: number | null
 ): {
  progress: DataStatsFetchProgress;
@ -114,167 +113,163 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams
  const abortCtrl = useRef(new AbortController());
  const searchSubscription$ = useRef<Subscription>();

-  const startFetch = useCallback(() => {
-    searchSubscription$.current?.unsubscribe();
-    abortCtrl.current.abort();
-    abortCtrl.current = new AbortController();
+  const startFetch = useCallback(async () => {
+    try {
+      searchSubscription$.current?.unsubscribe();
+      abortCtrl.current.abort();
+      abortCtrl.current = new AbortController();

-    if (!searchStrategyParams || lastRefresh === 0) return;
+      if (!searchStrategyParams || lastRefresh === 0) return;

-    setFetchState({
-      ...getInitialProgress(),
-      error: undefined,
-    });
+      setFetchState({
+        ...getInitialProgress(),
+        isRunning: true,
+        error: undefined,
+      });

-    const {
-      aggregatableFields,
-      nonAggregatableFields,
-      index,
-      searchQuery,
-      timeFieldName,
-      earliest,
-      latest,
-      runtimeFieldMap,
-      samplerShardSize,
-    } = searchStrategyParams;
+      const {
+        aggregatableFields,
+        nonAggregatableFields,
+        index,
+        searchQuery,
+        timeFieldName,
+        earliest,
+        latest,
+        runtimeFieldMap,
+        samplingOption,
+      } = searchStrategyParams;

-    const searchOptions: ISearchOptions = {
-      abortSignal: abortCtrl.current.signal,
-      sessionId: searchStrategyParams?.sessionId,
-    };
+      const searchOptions: ISearchOptions = {
+        abortSignal: abortCtrl.current.signal,
+        sessionId: searchStrategyParams?.sessionId,
+      };

-    const nonAggregatableFieldsObs = nonAggregatableFields.map((fieldName: string) =>
-      data.search
-        .search<IKibanaSearchRequest, IKibanaSearchResponse>(
-          {
-            params: checkNonAggregatableFieldExistsRequest(
-              index,
-              searchQuery,
-              fieldName,
-              timeFieldName,
-              earliest,
-              latest,
-              runtimeFieldMap
-            ),
-          },
-          searchOptions
-        )
-        .pipe(
-          map((resp) => {
-            return {
-              ...resp,
-              rawResponse: { ...resp.rawResponse, fieldName },
-            } as IKibanaSearchResponse;
-          })
-        )
-    );
+      const documentCountStats = await getDocumentCountStats(
+        data.search,
+        searchStrategyParams,
+        searchOptions,
+        samplingOption.seed,
+        probability
+      );

-    // Have to divide into smaller requests to avoid 413 payload too large
-    const aggregatableFieldsChunks = chunk(aggregatableFields, 30);
-
-    const aggregatableOverallStatsObs = aggregatableFieldsChunks.map((aggregatableFieldsChunk) =>
-      data.search
-        .search(
-          {
-            params: checkAggregatableFieldsExistRequest(
-              index,
-              searchQuery,
-              aggregatableFieldsChunk,
-              samplerShardSize,
-              timeFieldName,
-              earliest,
-              latest,
-              undefined,
-              runtimeFieldMap
-            ),
-          },
-          searchOptions
-        )
-        .pipe(
-          map((resp) => {
-            return {
-              ...resp,
-              aggregatableFields: aggregatableFieldsChunk,
-            } as AggregatableFieldOverallStats;
-          })
-        )
-    );
-
-    const sub = rateLimitingForkJoin<
-      | DocumentCountStats
-      | AggregatableFieldOverallStats
-      | NonAggregatableFieldOverallStats
-      | undefined
-    >(
-      [
-        from(
-          getDocumentCountStats(
-            data.search,
-            searchStrategyParams,
-            searchOptions,
-            browserSessionSeed,
-            probability
+      const nonAggregatableFieldsObs = nonAggregatableFields.map((fieldName: string) =>
+        data.search
+          .search<IKibanaSearchRequest, IKibanaSearchResponse>(
+            {
+              params: checkNonAggregatableFieldExistsRequest(
+                index,
+                searchQuery,
+                fieldName,
+                timeFieldName,
+                earliest,
+                latest,
+                runtimeFieldMap
+              ),
+            },
+            searchOptions
          )
-        ),
-        ...aggregatableOverallStatsObs,
-        ...nonAggregatableFieldsObs,
-      ],
-      MAX_CONCURRENT_REQUESTS
-    );
+          .pipe(
+            map((resp) => {
+              return {
+                ...resp,
+                rawResponse: { ...resp.rawResponse, fieldName },
+              } as IKibanaSearchResponse;
+            })
+          )
+      );

-    searchSubscription$.current = sub.subscribe({
-      next: (value) => {
-        const aggregatableOverallStatsResp: AggregatableFieldOverallStats[] = [];
-        const nonAggregatableOverallStatsResp: NonAggregatableFieldOverallStats[] = [];
-        const documentCountStats = value[0] as DocumentCountStats;
+      // Have to divide into smaller requests to avoid 413 payload too large
+      const aggregatableFieldsChunks = chunk(aggregatableFields, 30);

-        value.forEach((resp, idx) => {
-          if (!resp || idx === 0) return;
-          if (isAggregatableFieldOverallStats(resp)) {
-            aggregatableOverallStatsResp.push(resp);
-          }
+      if (isRandomSamplingOption(samplingOption)) {
+        samplingOption.probability = documentCountStats.probability ?? 1;
+      }
+      const aggregatableOverallStatsObs = aggregatableFieldsChunks.map((aggregatableFieldsChunk) =>
+        data.search
+          .search(
+            {
+              params: checkAggregatableFieldsExistRequest(
+                index,
+                searchQuery,
+                aggregatableFieldsChunk,
+                samplingOption,
+                timeFieldName,
+                earliest,
+                latest,
+                undefined,
+                runtimeFieldMap
+              ),
+            },
+            searchOptions
+          )
+          .pipe(
+            map((resp) => {
+              return {
+                ...resp,
+                aggregatableFields: aggregatableFieldsChunk,
+              } as AggregatableFieldOverallStats;
+            })
+          )
+      );

-          if (isNonAggregatableFieldOverallStats(resp)) {
-            nonAggregatableOverallStatsResp.push(resp);
-          }
-        });
+      const sub = rateLimitingForkJoin<
+        AggregatableFieldOverallStats | NonAggregatableFieldOverallStats | undefined
+      >([...aggregatableOverallStatsObs, ...nonAggregatableFieldsObs], MAX_CONCURRENT_REQUESTS);

-        const totalCount = documentCountStats?.totalCount ?? 0;
+      searchSubscription$.current = sub.subscribe({
+        next: (value) => {
+          const aggregatableOverallStatsResp: AggregatableFieldOverallStats[] = [];
+          const nonAggregatableOverallStatsResp: NonAggregatableFieldOverallStats[] = [];

-        const aggregatableOverallStats = processAggregatableFieldsExistResponse(
-          aggregatableOverallStatsResp,
-          aggregatableFields,
-          samplerShardSize,
-          totalCount
-        );
+          value.forEach((resp, idx) => {
+            if (isAggregatableFieldOverallStats(resp)) {
+              aggregatableOverallStatsResp.push(resp);
+            }

-        const nonAggregatableOverallStats = processNonAggregatableFieldsExistResponse(
-          nonAggregatableOverallStatsResp,
-          nonAggregatableFields
-        );
+            if (isNonAggregatableFieldOverallStats(resp)) {
+              nonAggregatableOverallStatsResp.push(resp);
+            }
+          });

-        setOverallStats({
-          documentCountStats,
-          ...nonAggregatableOverallStats,
-          ...aggregatableOverallStats,
-          totalCount,
-        });
-      },
-      error: (error) => {
-        displayError(toasts, searchStrategyParams.index, extractErrorProperties(error));
-        setFetchState({
-          isRunning: false,
-          error,
-        });
-      },
-      complete: () => {
-        setFetchState({
-          loaded: 100,
-          isRunning: false,
-        });
-      },
-    });
-    // eslint-disable-next-line react-hooks/exhaustive-deps
+          const totalCount = documentCountStats?.totalCount ?? 0;
+
+          const aggregatableOverallStats = processAggregatableFieldsExistResponse(
+            aggregatableOverallStatsResp,
+            aggregatableFields
+          );
+
+          const nonAggregatableOverallStats = processNonAggregatableFieldsExistResponse(
+            nonAggregatableOverallStatsResp,
+            nonAggregatableFields
+          );
+
+          setOverallStats({
+            documentCountStats,
+            ...nonAggregatableOverallStats,
+            ...aggregatableOverallStats,
+            totalCount,
+          });
+        },
+        error: (error) => {
+          displayError(toasts, searchStrategyParams.index, extractErrorProperties(error));
+          setFetchState({
+            isRunning: false,
+            error,
+          });
+        },
+        complete: () => {
+          setFetchState({
+            loaded: 100,
+            isRunning: false,
+          });
+        },
+      });
+    } catch (error) {
+      // An `AbortError` gets triggered when a user cancels a request by navigating away, we need to ignore these errors.
+      if (error.name !== 'AbortError') {
+        displayError(toasts, searchStrategyParams!.index, extractErrorProperties(error));
+      }
+    }
  }, [data.search, searchStrategyParams, toasts, lastRefresh, probability]);

  const cancelFetch = useCallback(() => {
@ -286,8 +281,11 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams
  // auto-update
  useEffect(() => {
    startFetch();
+  }, [startFetch]);
+
+  useEffect(() => {
    return cancelFetch;
-  }, [startFetch, cancelFetch]);
+  }, [cancelFetch]);

  return useMemo(
    () => ({
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/build_random_sampler_agg.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/build_random_sampler_agg.ts
@ -0,0 +1,103 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
+import {
+  Aggs,
+  SamplingOption,
+  isNormalSamplingOption,
+  isRandomSamplingOption,
+} from '../../../../../common/types/field_stats';
+
+export function buildAggregationWithSamplingOption(
+  aggs: Aggs,
+  samplingOption: SamplingOption
+): Record<string, estypes.AggregationsAggregationContainer> {
+  if (!samplingOption) {
+    return aggs;
+  }
+  const { seed } = samplingOption;
+
+  if (isNormalSamplingOption(samplingOption)) {
+    return {
+      sample: {
+        sampler: {
+          shard_size: samplingOption.shardSize,
+        },
+        aggs,
+      },
+    };
+  }
+
+  if (isRandomSamplingOption(samplingOption)) {
+    return {
+      sample: {
+        // @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
+        random_sampler: {
+          probability: samplingOption.probability,
+          ...(seed ? { seed } : {}),
+        },
+        aggs,
+      },
+    };
+  }
+
+  // Else, if no sampling, use random sampler with probability set to 1
+  // this is so that all results are returned under 'sample' path
+  return {
+    sample: {
+      aggs,
+      // @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
+      random_sampler: {
+        probability: 1,
+        ...(seed ? { seed } : {}),
+      },
+    },
+  };
+}
+
+/**
+ * Wraps the supplied aggregations in a random sampler aggregation.
+ */
+export function buildRandomSamplerAggregation(
+  aggs: Aggs,
+  probability: number | null,
+  seed: number
+): Record<string, estypes.AggregationsAggregationContainer> {
+  if (probability === null || probability <= 0 || probability > 1) {
+    return aggs;
+  }
+
+  return {
+    sample: {
+      aggs,
+      // @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
+      random_sampler: {
+        probability,
+        ...(seed ? { seed } : {}),
+      },
+    },
+  };
+}
+
+export function buildSamplerAggregation(
+  aggs: Aggs,
+  shardSize: number
+): Record<string, estypes.AggregationsAggregationContainer> {
+  if (shardSize <= 0) {
+    return aggs;
+  }
+
+  return {
+    sample: {
+      aggs,
+      sampler: {
+        shard_size: shardSize,
+      },
+    },
+  };
+}
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_boolean_field_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_boolean_field_stats.ts
@ -14,9 +14,10 @@ import type {
  ISearchOptions,
  ISearchStart,
 } from '@kbn/data-plugin/public';
-import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';

+import { processTopValues } from './utils';
+import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
 import type {
  Field,
  BooleanFieldStats,
@ -30,7 +31,7 @@ export const getBooleanFieldsStatsRequest = (
  params: FieldStatsCommonRequestParams,
  fields: Field[]
 ) => {
-  const { index, query, runtimeFieldMap, samplerShardSize } = params;
+  const { index, query, runtimeFieldMap } = params;

  const size = 0;
  const aggs: Aggs = {};
@ -48,7 +49,7 @@ export const getBooleanFieldsStatsRequest = (
  });
  const searchBody = {
    query,
-    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
    ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
  };

@ -65,7 +66,6 @@ export const fetchBooleanFieldsStats = (
  fields: Field[],
  options: ISearchOptions
 ): Observable<BooleanFieldStats[] | FieldStatsError> => {
-  const { samplerShardSize } = params;
  const request: estypes.SearchRequest = getBooleanFieldsStatsRequest(params, fields);
  return dataSearch
    .search<IKibanaSearchRequest, IKibanaSearchResponse>({ params: request }, options)
@ -80,15 +80,34 @@ export const fetchBooleanFieldsStats = (
        if (!isIKibanaSearchResponse(resp)) return resp;

        const aggregations = resp.rawResponse.aggregations;
-        const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+        const aggsPath = ['sample'];
+        const sampleCount = get(aggregations, [...aggsPath, 'doc_count'], 0);

        const batchStats: BooleanFieldStats[] = fields.map((field, i) => {
          const safeFieldName = field.fieldName;
+          // Sampler agg will yield doc_count that's bigger than the actual # of sampled records
+          // because it uses the stored _doc_count if available
+          // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
+          // therefore we need to correct it by multiplying by the sampled probability
+          const count = get(
+            aggregations,
+            [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'],
+            0
+          );
+
+          const fieldAgg = get(aggregations, [...aggsPath, `${safeFieldName}_values`], {});
+          const { topValuesSampleSize, topValues } = processTopValues(fieldAgg);
+
+          const multiplier =
+            count > sampleCount ? get(aggregations, [...aggsPath, 'probability'], 1) : 1;
+
          const stats: BooleanFieldStats = {
            fieldName: field.fieldName,
-            count: get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0),
+            count: count * multiplier,
            trueCount: 0,
            falseCount: 0,
+            topValues,
+            topValuesSampleSize,
          };

          const valueBuckets: Array<{ [key: string]: number }> = get(
@ -97,7 +116,7 @@ export const fetchBooleanFieldsStats = (
            []
          );
          valueBuckets.forEach((bucket) => {
-            stats[`${bucket.key_as_string}Count`] = bucket.doc_count;
+            stats[`${bucket.key_as_string}Count` as 'trueCount' | 'falseCount'] = bucket.doc_count;
          });
          return stats;
        });
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_date_field_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_date_field_stats.ts
@ -15,8 +15,8 @@ import type {
  ISearchOptions,
  ISearchStart,
 } from '@kbn/data-plugin/public';
-import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';
+import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
 import type { FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats';
 import type { Field, DateFieldStats, Aggs } from '../../../../../common/types/field_stats';
 import { FieldStatsError, isIKibanaSearchResponse } from '../../../../../common/types/field_stats';
@ -26,7 +26,7 @@ export const getDateFieldsStatsRequest = (
  params: FieldStatsCommonRequestParams,
  fields: Field[]
 ) => {
-  const { index, query, runtimeFieldMap, samplerShardSize } = params;
+  const { index, query, runtimeFieldMap } = params;

  const size = 0;

@ -45,7 +45,7 @@ export const getDateFieldsStatsRequest = (

  const searchBody = {
    query,
-    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
    ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
  };
  return {
@ -61,8 +61,6 @@ export const fetchDateFieldsStats = (
  fields: Field[],
  options: ISearchOptions
 ): Observable<DateFieldStats[] | FieldStatsError> => {
-  const { samplerShardSize } = params;
-
  const request: estypes.SearchRequest = getDateFieldsStatsRequest(params, fields);
  return dataSearch
    .search<IKibanaSearchRequest, IKibanaSearchResponse>({ params: request }, options)
@ -76,15 +74,10 @@ export const fetchDateFieldsStats = (
      map((resp) => {
        if (!isIKibanaSearchResponse(resp)) return resp;
        const aggregations = resp.rawResponse.aggregations;
-        const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+        const aggsPath = ['sample'];

        const batchStats: DateFieldStats[] = fields.map((field, i) => {
          const safeFieldName = field.safeFieldName;
-          const docCount = get(
-            aggregations,
-            [...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
-            0
-          );
          const fieldStatsResp = get(
            aggregations,
            [...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
@ -92,7 +85,6 @@ export const fetchDateFieldsStats = (
          );
          return {
            fieldName: field.fieldName,
-            count: docCount,
            earliest: get(fieldStatsResp, 'min', 0),
            latest: get(fieldStatsResp, 'max', 0),
          } as DateFieldStats;
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_document_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_document_stats.ts
@ -19,6 +19,8 @@ import type {
 } from '../../../../../common/types/field_stats';

 const MINIMUM_RANDOM_SAMPLER_DOC_COUNT = 100000;
+const DEFAULT_INITIAL_RANDOM_SAMPLER_PROBABILITY = 0.000001;
+
 export const getDocumentCountStatsRequest = (params: OverallStatsSearchStrategyParams) => {
  const {
    index,
@ -69,11 +71,11 @@ export const getDocumentCountStats = async (
  search: DataPublicPluginStart['search'],
  params: OverallStatsSearchStrategyParams,
  searchOptions: ISearchOptions,
-  browserSessionSeed: number,
+  browserSessionSeed: string,
  probability?: number | null,
  minimumRandomSamplerDocCount?: number
 ): Promise<DocumentCountStats> => {
-  const seed = browserSessionSeed ?? Math.abs(seedrandom().int32());
+  const seed = browserSessionSeed ?? Math.abs(seedrandom().int32()).toString();

  const {
    index,
@ -83,10 +85,11 @@ export const getDocumentCountStats = async (
    runtimeFieldMap,
    searchQuery,
    intervalMs,
-    fieldsToFetch,
  } = params;

-  const result = { randomlySampled: false, took: 0, totalCount: 0 };
+  // Probability = 1 represents no sampling
+  const result = { randomlySampled: false, took: 0, totalCount: 0, probability: 1 };
+
  const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, searchQuery);

  const query = {
@ -109,7 +112,7 @@ export const getDocumentCountStats = async (
  // If probability is provided, use that
  // Else, make an initial query using very low p
  // so that we can calculate the next p value that's appropriate for the data set
-  const initialDefaultProbability = probability ?? 0.000001;
+  const initialDefaultProbability = probability ?? DEFAULT_INITIAL_RANDOM_SAMPLER_PROBABILITY;

  const getAggsWithRandomSampling = (p: number) => ({
    sampler: {
@ -121,16 +124,13 @@ export const getDocumentCountStats = async (
    },
  });

+  const hasTimeField = timeFieldName !== undefined && intervalMs !== undefined && intervalMs > 0;
+
  const getSearchParams = (aggregations: unknown, trackTotalHits = false) => ({
    index,
    body: {
      query,
-      ...(!fieldsToFetch &&
-      timeFieldName !== undefined &&
-      intervalMs !== undefined &&
-      intervalMs > 0
-        ? { aggs: aggregations }
-        : {}),
+      ...(hasTimeField ? { aggs: aggregations } : {}),
      ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
    },
    track_total_hits: trackTotalHits,
@ -142,7 +142,7 @@ export const getDocumentCountStats = async (
        params: getSearchParams(
          getAggsWithRandomSampling(initialDefaultProbability),
          // Track total hits if time field is not defined
-          timeFieldName === undefined
+          !hasTimeField
        ),
      },
      searchOptions
@ -189,13 +189,10 @@ export const getDocumentCountStats = async (
    const newProbability =
      (initialDefaultProbability * numDocs) / (numSampled - 2 * Math.sqrt(numSampled));

-    // If the number of docs sampled is indicative of query with < 10 million docs
-    // proceed to make a vanilla aggregation without any sampling
-    if (
-      numSampled === 0 ||
-      newProbability === Infinity ||
-      numSampled / initialDefaultProbability < 1e7
-    ) {
+    // If the number of docs is < 3 million
+    // proceed to make a vanilla aggregation without any sampling (probability = 1)
+    // Minimum of 4 docs (3e6 * 0.000001 + 1) sampled gives us 90% confidence interval # docs is within
+    if (newProbability === Infinity || numSampled <= 4) {
      const vanillaAggResp = await search
        .search(
          {
@ -241,7 +238,7 @@ export const processDocumentCountStats = (
  body: estypes.SearchResponse | undefined,
  params: OverallStatsSearchStrategyParams,
  randomlySampled = false
-): DocumentCountStats | undefined => {
+): Omit<DocumentCountStats, 'probability'> | undefined => {
  if (!body) return undefined;

  let totalCount = 0;
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_numeric_field_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_numeric_field_stats.ts
@ -16,30 +16,33 @@ import {
  ISearchOptions,
 } from '@kbn/data-plugin/common';
 import type { ISearchStart } from '@kbn/data-plugin/public';
-import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';
-import {
-  MAX_PERCENT,
-  PERCENTILE_SPACING,
-  SAMPLER_TOP_TERMS_SHARD_SIZE,
-  SAMPLER_TOP_TERMS_THRESHOLD,
-} from './constants';
-import type { Aggs, FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats';
+import { processTopValues } from './utils';
+import { isDefined } from '../../../common/util/is_defined';
+import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
+import { MAX_PERCENT, PERCENTILE_SPACING, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
+import type {
+  Aggs,
+  Bucket,
+  FieldStatsCommonRequestParams,
+} from '../../../../../common/types/field_stats';
 import type {
  Field,
  NumericFieldStats,
-  Bucket,
  FieldStatsError,
 } from '../../../../../common/types/field_stats';
 import { processDistributionData } from '../../utils/process_distribution_data';
 import { extractErrorProperties } from '../../utils/error_utils';
-import { isIKibanaSearchResponse } from '../../../../../common/types/field_stats';
+import {
+  isIKibanaSearchResponse,
+  isNormalSamplingOption,
+} from '../../../../../common/types/field_stats';

 export const getNumericFieldsStatsRequest = (
  params: FieldStatsCommonRequestParams,
  fields: Field[]
 ) => {
-  const { index, query, runtimeFieldMap, samplerShardSize } = params;
+  const { index, query, runtimeFieldMap } = params;

  const size = 0;

@ -83,23 +86,12 @@ export const getNumericFieldsStatsRequest = (
      } as AggregationsTermsAggregation,
    };

-    // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
-    // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
-    if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-      aggs[`${safeFieldName}_top`] = buildSamplerAggregation(
-        {
-          top,
-        },
-        0.05
-      );
-    } else {
-      aggs[`${safeFieldName}_top`] = top;
-    }
+    aggs[`${safeFieldName}_top`] = top;
  });

  const searchBody = {
    query,
-    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
    ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
  };

@ -132,7 +124,7 @@ export const fetchNumericFieldsStats = (
        if (!isIKibanaSearchResponse(resp)) return resp;

        const aggregations = resp.rawResponse.aggregations;
-        const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+        const aggsPath = ['sample'];

        const batchStats: NumericFieldStats[] = [];

@ -154,28 +146,23 @@ export const fetchNumericFieldsStats = (
            topAggsPath.push('top');
          }

-          const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
+          const fieldAgg = get(aggregations, [...topAggsPath], {}) as { buckets: Bucket[] };
+          const { topValuesSampleSize, topValues } = processTopValues(fieldAgg);

          const stats: NumericFieldStats = {
            fieldName: field.fieldName,
-            count: docCount,
            min: get(fieldStatsResp, 'min', 0),
            max: get(fieldStatsResp, 'max', 0),
            avg: get(fieldStatsResp, 'avg', 0),
            isTopValuesSampled:
-              field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
+              isNormalSamplingOption(params.samplingOption) ||
+              (isDefined(params.samplingProbability) && params.samplingProbability < 1),
            topValues,
-            topValuesSampleSize: topValues.reduce(
-              (acc, curr) => acc + curr.doc_count,
-              get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
-            ),
-            topValuesSamplerShardSize:
-              field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
-                ? SAMPLER_TOP_TERMS_SHARD_SIZE
-                : samplerShardSize,
+            topValuesSampleSize,
+            topValuesSamplerShardSize: get(aggregations, ['sample', 'doc_count']),
          };

-          if (stats.count > 0) {
+          if (docCount > 0) {
            const percentiles = get(
              aggregations,
              [...aggsPath, `${safeFieldName}_percentiles`, 'values'],
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_string_field_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/get_string_field_stats.ts
@ -15,12 +15,12 @@ import type {
  ISearchOptions,
  ISearchStart,
 } from '@kbn/data-plugin/public';
-import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';
-import { SAMPLER_TOP_TERMS_SHARD_SIZE, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
+import { processTopValues } from './utils';
+import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
+import { SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
 import type {
  Aggs,
-  Bucket,
  Field,
  FieldStatsCommonRequestParams,
  StringFieldStats,
@ -32,7 +32,7 @@ export const getStringFieldStatsRequest = (
  params: FieldStatsCommonRequestParams,
  fields: Field[]
 ) => {
-  const { index, query, runtimeFieldMap, samplerShardSize } = params;
+  const { index, query, runtimeFieldMap } = params;

  const size = 0;

@ -49,25 +49,12 @@ export const getStringFieldStatsRequest = (
      } as AggregationsTermsAggregation,
    };

-    // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
-    // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
-    if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-      aggs[`${safeFieldName}_top`] = {
-        sampler: {
-          shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
-        },
-        aggs: {
-          top,
-        },
-      };
-    } else {
-      aggs[`${safeFieldName}_top`] = top;
-    }
+    aggs[`${safeFieldName}_top`] = top;
  });

  const searchBody = {
    query,
-    aggs: buildSamplerAggregation(aggs, samplerShardSize),
+    aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
    ...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
  };

@ -99,7 +86,8 @@ export const fetchStringFieldsStats = (
      map((resp) => {
        if (!isIKibanaSearchResponse(resp)) return resp;
        const aggregations = resp.rawResponse.aggregations;
-        const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
+
+        const aggsPath = ['sample'];
        const batchStats: StringFieldStats[] = [];

        fields.forEach((field, i) => {
@ -110,21 +98,18 @@ export const fetchStringFieldsStats = (
            topAggsPath.push('top');
          }

-          const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
+          const fieldAgg = get(aggregations, [...topAggsPath], {});

+          const { topValuesSampleSize, topValues } = processTopValues(
+            fieldAgg,
+            get(aggregations, ['sample', 'doc_count'])
+          );
          const stats = {
            fieldName: field.fieldName,
-            isTopValuesSampled:
-              field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
+            isTopValuesSampled: true,
            topValues,
-            topValuesSampleSize: topValues.reduce(
-              (acc, curr) => acc + curr.doc_count,
-              get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
-            ),
-            topValuesSamplerShardSize:
-              field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
-                ? SAMPLER_TOP_TERMS_SHARD_SIZE
-                : samplerShardSize,
+            topValuesSampleSize,
+            topValuesSamplerShardSize: get(aggregations, ['sample', 'doc_count']),
          };

          batchStats.push(stats);
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/overall_stats.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/overall_stats.ts
@ -10,21 +10,21 @@ import { get } from 'lodash';
 import { Query } from '@kbn/es-query';
 import { IKibanaSearchResponse } from '@kbn/data-plugin/common';
 import type { AggCardinality } from '@kbn/ml-agg-utils';
-import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
 import { isPopulatedObject } from '@kbn/ml-is-populated-object';
+import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
 import {
  buildBaseFilterCriteria,
  getSafeAggregationName,
 } from '../../../../../common/utils/query_utils';
 import { getDatafeedAggregations } from '../../../../../common/utils/datafeed_utils';
 import { AggregatableField, NonAggregatableField } from '../../types/overall_stats';
-import { Aggs } from '../../../../../common/types/field_stats';
+import { Aggs, SamplingOption } from '../../../../../common/types/field_stats';

 export const checkAggregatableFieldsExistRequest = (
  dataViewTitle: string,
  query: Query['query'],
  aggregatableFields: string[],
-  samplerShardSize: number,
+  samplingOption: SamplingOption,
  timeFieldName: string | undefined,
  earliestMs?: number,
  latestMs?: number,
@ -73,7 +73,9 @@ export const checkAggregatableFieldsExistRequest = (
        filter: filterCriteria,
      },
    },
-    ...(isPopulatedObject(aggs) ? { aggs: buildSamplerAggregation(aggs, samplerShardSize) } : {}),
+    ...(isPopulatedObject(aggs)
+      ? { aggs: buildAggregationWithSamplingOption(aggs, samplingOption) }
+      : {}),
    ...(isPopulatedObject(combinedRuntimeMappings)
      ? { runtime_mappings: combinedRuntimeMappings }
      : {}),
@ -109,8 +111,6 @@ export function isNonAggregatableFieldOverallStats(
 export const processAggregatableFieldsExistResponse = (
  responses: AggregatableFieldOverallStats[] | undefined,
  aggregatableFields: string[],
-  samplerShardSize: number,
-  totalCount: number,
  datafeedConfig?: estypes.MlDatafeed
 ) => {
  const stats = {
@ -123,12 +123,17 @@ export const processAggregatableFieldsExistResponse = (
  responses.forEach(({ rawResponse: body, aggregatableFields: aggregatableFieldsChunk }) => {
    const aggregations = body.aggregations;

-    const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
-    const sampleCount =
-      samplerShardSize > 0 ? get(aggregations, ['sample', 'doc_count'], 0) : totalCount;
+    const aggsPath = ['sample'];
+    const sampleCount = aggregations.sample.doc_count;
    aggregatableFieldsChunk.forEach((field, i) => {
      const safeFieldName = getSafeAggregationName(field, i);
+      // Sampler agg will yield doc_count that's bigger than the actual # of sampled records
+      // because it uses the stored _doc_count if available
+      // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
+      // therefore we need to correct it by multiplying by the sampled probability
      const count = get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0);
+      const multiplier =
+        count > sampleCount ? get(aggregations, [...aggsPath, 'probability'], 1) : 1;
      if (count > 0) {
        const cardinality = get(
          aggregations,
@ -140,7 +145,7 @@ export const processAggregatableFieldsExistResponse = (
          existsInDocs: true,
          stats: {
            sampleCount,
-            count,
+            count: count * multiplier,
            cardinality,
          },
        });
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/utils.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/search_strategy/requests/utils.ts
@ -0,0 +1,42 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { isPopulatedObject } from '@kbn/ml-is-populated-object';
+import { Bucket } from '../../../../../common/types/field_stats';
+
+/** Utility to calculate the correct sample size, whether or not _doc_count is set
+ * and calculate the percentage (in fraction) for each bucket
+ * https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
+ * @param aggResult
+ */
+export const processTopValues = (aggResult: object, sampledCount?: number) => {
+  const topValuesBuckets: Bucket[] = isPopulatedObject<'buckets', Bucket[]>(aggResult, ['buckets'])
+    ? aggResult.buckets
+    : [];
+  const sumOtherDocCount = isPopulatedObject<'sum_other_doc_count', number>(aggResult, [
+    'sum_other_doc_count',
+  ])
+    ? aggResult.sum_other_doc_count
+    : 0;
+  const valuesInTopBuckets =
+    topValuesBuckets?.reduce((prev, bucket) => bucket.doc_count + prev, 0) || 0;
+  // We could use `aggregations.sample.sample_count.value` instead, but it does not always give a correct sum
+  // See Github issue #144625
+  const realNumberOfDocuments = valuesInTopBuckets + sumOtherDocCount;
+  const topValues = topValuesBuckets.map((bucket) => ({
+    ...bucket,
+    doc_count: sampledCount
+      ? Math.floor(bucket.doc_count * (sampledCount / realNumberOfDocuments))
+      : bucket.doc_count,
+    percent: bucket.doc_count / realNumberOfDocuments,
+  }));
+
+  return {
+    topValuesSampleSize: realNumberOfDocuments,
+    topValues,
+  };
+};
--- a/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/types/index_data_visualizer_state.ts
+++ b/x-pack/plugins/data_visualizer/public/application/index_data_visualizer/types/index_data_visualizer_state.ts
@ -9,7 +9,6 @@ import type { Filter } from '@kbn/es-query';
 import type { Query } from '@kbn/data-plugin/common/query';
 import type { RandomSamplerOption } from '../constants/random_sampler';
 import type { SearchQueryLanguage } from './combined_query';
-
 export interface ListingPageUrlState {
  pageSize: number;
  pageIndex: number;
--- a/x-pack/plugins/ml/server/models/data_visualizer/data_visualizer.ts
+++ b/x-pack/plugins/ml/server/models/data_visualizer/data_visualizer.ts
@ -656,20 +656,7 @@ export class DataVisualizer {
        },
      };

-      // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
-      // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
-      if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-        aggs[`${safeFieldName}_top`] = {
-          sampler: {
-            shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
-          },
-          aggs: {
-            top,
-          },
-        };
-      } else {
-        aggs[`${safeFieldName}_top`] = top;
-      }
+      aggs[`${safeFieldName}_top`] = top;
    });

    const searchBody = {
@ -782,20 +769,7 @@ export class DataVisualizer {
        },
      };

-      // If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
-      // in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
-      if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
-        aggs[`${safeFieldName}_top`] = {
-          sampler: {
-            shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
-          },
-          aggs: {
-            top,
-          },
-        };
-      } else {
-        aggs[`${safeFieldName}_top`] = top;
-      }
+      aggs[`${safeFieldName}_top`] = top;
    });

    const searchBody = {
--- a/x-pack/plugins/translations/translations/fr-FR.json
+++ b/x-pack/plugins/translations/translations/fr-FR.json
@ -9941,8 +9941,6 @@
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent} % des documents ont des valeurs comprises entre {minValFormatted} et {maxValFormatted}",
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent} % des documents ont une valeur de {valFormatted}",
    "xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "Exclure le {fieldName} : \"{value}\"",
-    "xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "Calculé à partir d'un échantillon de {topValuesSamplerShardSize} documents par partition",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "Calculé à partir d'un échantillon de {topValuesSamplerShardSize} documents par partition",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "Affichage de {minPercent} - {maxPercent} centiles",
    "xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "Il peut être rempli, par exemple, à l'aide d'un paramètre {copyToParam} dans le mapping du document ou être réduit à partir du champ {sourceParam} après une indexation par l'utilisation des paramètres {includesParam} et {excludesParam}.",
    "xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "Ce champ n'était pas présent dans le champ {sourceParam} des documents interrogés.",
@ -9980,7 +9978,6 @@
    "xpack.dataVisualizer.nameCollisionMsg": "\"{name}\" existe déjà, veuillez fournir un nom unique",
    "xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "Probabilité utilisée : {samplingProbability} %",
    "xpack.dataVisualizer.searchPanel.ofFieldsTotal": "sur un total de {totalCount}",
-    "xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "Taille de l'échantillon (par partition) : {wrappedValue}",
    "xpack.dataVisualizer.searchPanel.totalDocCountLabel": "Total des documents : {prepend}{strongTotalCount}",
    "xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
    "xpack.dataVisualizer.addCombinedFieldsLabel": "Ajouter un champ combiné",
@ -10013,8 +10010,6 @@
    "xpack.dataVisualizer.dataGrid.field.loadingLabel": "Chargement",
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "distribution",
    "xpack.dataVisualizer.dataGrid.field.topValuesLabel": "Valeurs les plus élevées",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "faux",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "vrai",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "compte",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "valeurs distinctes",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "Statistiques des documents",
@ -10259,13 +10254,10 @@
    "xpack.dataVisualizer.removeCombinedFieldsLabel": "Retirer le champ combiné",
    "xpack.dataVisualizer.samplingOptionsButton": "Options d’échantillonnage",
    "xpack.dataVisualizer.searchPanel.allFieldsLabel": "Tous les champs",
-    "xpack.dataVisualizer.searchPanel.allOptionLabel": "Tout rechercher",
    "xpack.dataVisualizer.searchPanel.invalidSyntax": "Syntaxe non valide",
    "xpack.dataVisualizer.searchPanel.numberFieldsLabel": "Champs de numéros",
-    "xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "La sélection d'une taille d'échantillon plus petite réduira les temps d'exécution de la requête et la charge sur le cluster.",
    "xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "Rechercher… (par exemple, status:200 AND extension:\"PHP\")",
    "xpack.dataVisualizer.searchPanel.randomSamplerMessage": "Des valeurs approximatives sont indiquées dans le décompte de documents et le graphique, qui utilisent des agrégations par échantillonnage aléatoire.",
-    "xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "Sélectionner le nombre de documents à échantillonner",
    "xpack.dataVisualizer.searchPanel.showEmptyFields": "Afficher les champs vides",
    "xpack.dataVisualizer.title": "Charger un fichier",
    "xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "Le panneau comporte {count} recherches",
--- a/x-pack/plugins/translations/translations/ja-JP.json
+++ b/x-pack/plugins/translations/translations/ja-JP.json
@ -9928,8 +9928,6 @@
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent}% のドキュメントに {minValFormatted} から {maxValFormatted} の間の値があります",
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent}% のドキュメントに {valFormatted} の値があります",
    "xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "{fieldName}の除外：\"{value}\"",
-    "xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "1 つのシャードにつき {topValuesSamplerShardSize} のドキュメントのサンプルで計算されています",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "1 つのシャードにつき {topValuesSamplerShardSize} のドキュメントのサンプルで計算されています",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "{minPercent} - {maxPercent} パーセンタイルを表示中",
    "xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "たとえば、ドキュメントマッピングで {copyToParam} パラメーターを使ったり、{includesParam} と {excludesParam} パラメーターを使用してインデックスした後に {sourceParam} フィールドから切り取ったりして入力される場合があります。",
    "xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "このフィールドはクエリが実行されたドキュメントの {sourceParam} フィールドにありませんでした。",
@ -9966,7 +9964,6 @@
    "xpack.dataVisualizer.nameCollisionMsg": "「{name}」はすでに存在します。一意の名前を入力してください。",
    "xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "使用された確率：{samplingProbability}%",
    "xpack.dataVisualizer.searchPanel.ofFieldsTotal": "合計 {totalCount}",
-    "xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "サンプルサイズ（シャード単位）：{wrappedValue}",
    "xpack.dataVisualizer.searchPanel.totalDocCountLabel": "合計ドキュメント数：{prepend}{strongTotalCount}",
    "xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other  {#}}",
    "xpack.dataVisualizer.addCombinedFieldsLabel": "結合されたフィールドを追加",
@ -9999,8 +9996,6 @@
    "xpack.dataVisualizer.dataGrid.field.loadingLabel": "読み込み中",
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "分布",
    "xpack.dataVisualizer.dataGrid.field.topValuesLabel": "トップの値",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "false",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "true",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "カウント",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "固有の値",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "ドキュメント統計情報",
@ -10245,13 +10240,10 @@
    "xpack.dataVisualizer.removeCombinedFieldsLabel": "結合されたフィールドを削除",
    "xpack.dataVisualizer.samplingOptionsButton": "抽出オプション",
    "xpack.dataVisualizer.searchPanel.allFieldsLabel": "すべてのフィールド",
-    "xpack.dataVisualizer.searchPanel.allOptionLabel": "すべて検索",
    "xpack.dataVisualizer.searchPanel.invalidSyntax": "無効な構文",
    "xpack.dataVisualizer.searchPanel.numberFieldsLabel": "数値フィールド",
-    "xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "小さいサンプルサイズを選択することで、クエリの実行時間を短縮しクラスターへの負荷を軽減できます。",
    "xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "検索…（例：status:200 AND extension:\"PHP\"）",
    "xpack.dataVisualizer.searchPanel.randomSamplerMessage": "近似値は、ランダムサンプラーアグリゲーションを使用する、合計ドキュメント数およびグラフに表示されます。",
-    "xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "サンプリングするドキュメント数を選択してください",
    "xpack.dataVisualizer.searchPanel.showEmptyFields": "空のフィールドを表示",
    "xpack.dataVisualizer.title": "ファイルをアップロード",
    "xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "パネルには{count}個のドリルダウンがあります",
--- a/x-pack/plugins/translations/translations/zh-CN.json
+++ b/x-pack/plugins/translations/translations/zh-CN.json
@ -9946,8 +9946,6 @@
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent}% 的文档具有介于 {minValFormatted} 和 {maxValFormatted} 之间的值",
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent}% 的文档的值为 {valFormatted}",
    "xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "筛除 {fieldName}：“{value}”",
-    "xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "基于每个分片的 {topValuesSamplerShardSize} 文档样例计算",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "基于每个分片的 {topValuesSamplerShardSize} 文档样例计算",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "正在显示 {minPercent} - {maxPercent} 百分位数",
    "xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "例如，可以使用文档映射中的 {copyToParam} 参数进行填充，也可以在索引后通过使用 {includesParam} 和 {excludesParam} 参数从 {sourceParam} 字段中修剪。",
    "xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "查询的文档的 {sourceParam} 字段中不存在此字段。",
@ -9985,7 +9983,6 @@
    "xpack.dataVisualizer.nameCollisionMsg": "“{name}”已存在，请提供唯一名称",
    "xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "使用的概率：{samplingProbability}%",
    "xpack.dataVisualizer.searchPanel.ofFieldsTotal": "，共 {totalCount} 个",
-    "xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "样本大小（每分片）：{wrappedValue}",
    "xpack.dataVisualizer.searchPanel.totalDocCountLabel": "文档总数：{prepend}{strongTotalCount}",
    "xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
    "xpack.dataVisualizer.addCombinedFieldsLabel": "添加组合字段",
@ -10018,8 +10015,6 @@
    "xpack.dataVisualizer.dataGrid.field.loadingLabel": "正在加载",
    "xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "分布",
    "xpack.dataVisualizer.dataGrid.field.topValuesLabel": "排名最前值",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "false",
-    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "true",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "计数",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "不同值",
    "xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "文档统计",
@ -10264,13 +10259,10 @@
    "xpack.dataVisualizer.removeCombinedFieldsLabel": "移除组合字段",
    "xpack.dataVisualizer.samplingOptionsButton": "采样选项",
    "xpack.dataVisualizer.searchPanel.allFieldsLabel": "所有字段",
-    "xpack.dataVisualizer.searchPanel.allOptionLabel": "搜索全部",
    "xpack.dataVisualizer.searchPanel.invalidSyntax": "语法无效",
    "xpack.dataVisualizer.searchPanel.numberFieldsLabel": "字段数目",
-    "xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "选择较小的样例大小将减少查询运行时间和集群上的负载。",
    "xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "搜索……（例如，status:200 AND extension:\"PHP\"）",
    "xpack.dataVisualizer.searchPanel.randomSamplerMessage": "总文档计数和图表中将显示近似值，它们使用随机采样器聚合。",
-    "xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "选择要采样的文档数目",
    "xpack.dataVisualizer.searchPanel.showEmptyFields": "显示空字段",
    "xpack.dataVisualizer.title": "上传文件",
    "xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "面板有 {count} 个向下钻取",
--- a/x-pack/test/functional/apps/ml/data_visualizer/index_data_visualizer.ts
+++ b/x-pack/test/functional/apps/ml/data_visualizer/index_data_visualizer.ts
@ -14,7 +14,7 @@ import {
  farequoteKQLSearchTestData,
  farequoteLuceneSearchTestData,
  sampleLogTestData,
-} from './index_test_data';
+} from './index_test_data_random_sampler';

 export default function ({ getPageObject, getService }: FtrProviderContext) {
  const headerPage = getPageObject('header');
@ -62,7 +62,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
      }

      await ml.dataVisualizerTable.assertSearchPanelExist();
-      await ml.dataVisualizerTable.assertSampleSizeInputExists();
      await ml.dataVisualizerTable.assertFieldTypeInputExists();
      await ml.dataVisualizerTable.assertFieldNameInputExists();

@ -113,18 +112,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
        );
      }

-      await ml.testExecution.logTestStep(
-        `${testData.suiteTitle} sample size control changes non-metric fields`
-      );
-      for (const sampleSizeCase of testData.sampleSizeValidations) {
-        const { size, expected } = sampleSizeCase;
-        await ml.dataVisualizerTable.setSampleSizeInputValue(
-          size,
-          expected.field,
-          expected.docCountFormatted
-        );
-      }
-
      await ml.testExecution.logTestStep('sets and resets field type filter correctly');
      await ml.dataVisualizerTable.setFieldTypeFilter(
        testData.fieldTypeFilters,
--- a/x-pack/test/functional/apps/ml/data_visualizer/index_data_visualizer_data_view_management.ts
+++ b/x-pack/test/functional/apps/ml/data_visualizer/index_data_visualizer_data_view_management.ts
@ -63,7 +63,7 @@ export default function ({ getService }: FtrProviderContext) {
          aggregatable: true,
          loading: false,
          exampleCount: 11,
-          docCountFormatted: '5000 (100%)',
+          docCountFormatted: '86,274 (100%)',
          viewableInLens: true,
          hasActionMenu: true,
        },
@ -92,7 +92,7 @@ export default function ({ getService }: FtrProviderContext) {
          existsInDocs: true,
          aggregatable: true,
          loading: false,
-          docCountFormatted: '5000 (100%)',
+          docCountFormatted: '86,274 (100%)',
          statsMaxDecimalPlaces: 3,
          topValuesCount: 11,
          viewableInLens: true,
@ -153,7 +153,6 @@ export default function ({ getService }: FtrProviderContext) {
    }

    await ml.dataVisualizerTable.assertSearchPanelExist();
-    await ml.dataVisualizerTable.assertSampleSizeInputExists();
    await ml.dataVisualizerTable.assertFieldTypeInputExists();
    await ml.dataVisualizerTable.assertFieldNameInputExists();

--- a/x-pack/test/functional/apps/ml/data_visualizer/index_test_data.ts
+++ b/x-pack/test/functional/apps/ml/data_visualizer/index_test_data.ts
@ -15,8 +15,8 @@ export const farequoteDataViewTestData: TestData = {
  fieldNameFilters: ['airline', '@timestamp'],
  fieldTypeFilters: [ML_JOB_FIELD_TYPES.KEYWORD],
  sampleSizeValidations: [
-    { size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
-    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
  ],
  expected: {
    totalDocCountFormatted: '86,274',
@ -27,7 +27,7 @@ export const farequoteDataViewTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        statsMaxDecimalPlaces: 3,
        topValuesCount: 11,
        viewableInLens: true,
@ -40,7 +40,7 @@ export const farequoteDataViewTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        exampleCount: 2,
        viewableInLens: true,
      },
@ -61,7 +61,7 @@ export const farequoteDataViewTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -71,7 +71,7 @@ export const farequoteDataViewTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 11,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -91,7 +91,7 @@ export const farequoteDataViewTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
    ],
@ -112,8 +112,8 @@ export const farequoteKQLSearchTestData: TestData = {
  fieldNameFilters: ['@version'],
  fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
  sampleSizeValidations: [
-    { size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
-    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
  ],
  expected: {
    totalDocCountFormatted: '34,415',
@ -124,7 +124,7 @@ export const farequoteKQLSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        statsMaxDecimalPlaces: 3,
        topValuesCount: 11,
        viewableInLens: true,
@ -137,7 +137,7 @@ export const farequoteKQLSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        exampleCount: 2,
        viewableInLens: true,
      },
@ -158,7 +158,7 @@ export const farequoteKQLSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -168,7 +168,7 @@ export const farequoteKQLSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 5,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -188,7 +188,7 @@ export const farequoteKQLSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
    ],
@ -209,8 +209,8 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
  fieldNameFilters: ['@version'],
  fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
  sampleSizeValidations: [
-    { size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
-    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
  ],
  expected: {
    filters: [{ key: 'airline', value: 'ASA' }],
@ -222,7 +222,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        statsMaxDecimalPlaces: 3,
        topValuesCount: 11,
        viewableInLens: true,
@ -235,7 +235,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        exampleCount: 2,
        viewableInLens: true,
      },
@ -256,7 +256,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -267,7 +267,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
        loading: false,
        exampleCount: 1,
        exampleContent: ['ASA'],
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -287,7 +287,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
    ],
@ -308,8 +308,8 @@ export const farequoteLuceneSearchTestData: TestData = {
  fieldNameFilters: ['@version.keyword', 'type'],
  fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
  sampleSizeValidations: [
-    { size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
-    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
  ],
  expected: {
    totalDocCountFormatted: '34,416',
@ -320,7 +320,7 @@ export const farequoteLuceneSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        statsMaxDecimalPlaces: 3,
        topValuesCount: 11,
        viewableInLens: true,
@ -333,7 +333,7 @@ export const farequoteLuceneSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        exampleCount: 2,
        viewableInLens: true,
      },
@ -354,7 +354,7 @@ export const farequoteLuceneSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -364,7 +364,7 @@ export const farequoteLuceneSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 5,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -384,7 +384,7 @@ export const farequoteLuceneSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
    ],
@ -405,8 +405,8 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
  fieldNameFilters: ['@version.keyword', 'type'],
  fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
  sampleSizeValidations: [
-    { size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
-    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
  ],
  expected: {
    filters: [{ key: 'airline', value: 'ASA' }],
@ -418,7 +418,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        statsMaxDecimalPlaces: 3,
        topValuesCount: 11,
        viewableInLens: true,
@ -431,7 +431,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
        existsInDocs: true,
        aggregatable: true,
        loading: false,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        exampleCount: 2,
        viewableInLens: true,
      },
@ -452,7 +452,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -463,7 +463,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
        loading: false,
        exampleCount: 1,
        exampleContent: ['ASA'],
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
      {
@ -483,7 +483,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
        aggregatable: true,
        loading: false,
        exampleCount: 1,
-        docCountFormatted: '5000 (100%)',
+        docCountFormatted: '5,000 (100%)',
        viewableInLens: true,
      },
    ],
--- a/x-pack/test/functional/apps/ml/data_visualizer/index_test_data_random_sampler.ts
+++ b/x-pack/test/functional/apps/ml/data_visualizer/index_test_data_random_sampler.ts
@ -0,0 +1,535 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { ML_JOB_FIELD_TYPES } from '@kbn/ml-plugin/common/constants/field_types';
+import { TestData } from './types';
+
+export const farequoteDataViewTestData: TestData = {
+  suiteTitle: 'farequote index pattern',
+  isSavedSearch: false,
+  sourceIndexOrSavedSearch: 'ft_farequote',
+  fieldNameFilters: ['airline', '@timestamp'],
+  fieldTypeFilters: [ML_JOB_FIELD_TYPES.KEYWORD],
+  sampleSizeValidations: [
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
+  ],
+  expected: {
+    totalDocCountFormatted: '86,274',
+    metricFields: [
+      {
+        fieldName: 'responsetime',
+        type: ML_JOB_FIELD_TYPES.NUMBER,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '86,274 (100%)',
+        statsMaxDecimalPlaces: 3,
+        topValuesCount: 11,
+        viewableInLens: true,
+      },
+    ],
+    nonMetricFields: [
+      {
+        fieldName: '@timestamp',
+        type: ML_JOB_FIELD_TYPES.DATE,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '86,274 (100%)',
+        exampleCount: 2,
+        viewableInLens: true,
+      },
+      {
+        fieldName: '@version',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: '@version.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '86,274 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'airline',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 11,
+        docCountFormatted: '86,274 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'type',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: 'type.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '86,274 (100%)',
+        viewableInLens: true,
+      },
+    ],
+    emptyFields: ['sourcetype'],
+    visibleMetricFieldsCount: 1,
+    totalMetricFieldsCount: 1,
+    populatedFieldsCount: 7,
+    totalFieldsCount: 8,
+    fieldNameFiltersResultCount: 2,
+    fieldTypeFiltersResultCount: 3,
+  },
+};
+
+export const farequoteKQLSearchTestData: TestData = {
+  suiteTitle: 'KQL saved search',
+  isSavedSearch: true,
+  sourceIndexOrSavedSearch: 'ft_farequote_kuery',
+  fieldNameFilters: ['@version'],
+  fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
+  sampleSizeValidations: [
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
+  ],
+  expected: {
+    totalDocCountFormatted: '34,415',
+    metricFields: [
+      {
+        fieldName: 'responsetime',
+        type: ML_JOB_FIELD_TYPES.NUMBER,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '34,415 (100%)',
+        statsMaxDecimalPlaces: 3,
+        topValuesCount: 11,
+        viewableInLens: true,
+      },
+    ],
+    nonMetricFields: [
+      {
+        fieldName: '@timestamp',
+        type: ML_JOB_FIELD_TYPES.DATE,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '34,415 (100%)',
+        exampleCount: 2,
+        viewableInLens: true,
+      },
+      {
+        fieldName: '@version',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: '@version.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '34,415 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'airline',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 5,
+        docCountFormatted: '34,415 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'type',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: 'type.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '34,415 (100%)',
+        viewableInLens: true,
+      },
+    ],
+    emptyFields: ['sourcetype'],
+    visibleMetricFieldsCount: 1,
+    totalMetricFieldsCount: 1,
+    populatedFieldsCount: 7,
+    totalFieldsCount: 8,
+    fieldNameFiltersResultCount: 1,
+    fieldTypeFiltersResultCount: 3,
+  },
+};
+
+export const farequoteKQLFiltersSearchTestData: TestData = {
+  suiteTitle: 'KQL saved search and filters',
+  isSavedSearch: true,
+  sourceIndexOrSavedSearch: 'ft_farequote_filter_and_kuery',
+  fieldNameFilters: ['@version'],
+  fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
+  sampleSizeValidations: [
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
+  ],
+  expected: {
+    filters: [{ key: 'airline', value: 'ASA' }],
+    totalDocCountFormatted: '5,674',
+    metricFields: [
+      {
+        fieldName: 'responsetime',
+        type: ML_JOB_FIELD_TYPES.NUMBER,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '5,674 (100%)',
+        statsMaxDecimalPlaces: 3,
+        topValuesCount: 11,
+        viewableInLens: true,
+      },
+    ],
+    nonMetricFields: [
+      {
+        fieldName: '@timestamp',
+        type: ML_JOB_FIELD_TYPES.DATE,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '5,674 (100%)',
+        exampleCount: 2,
+        viewableInLens: true,
+      },
+      {
+        fieldName: '@version',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: '@version.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '5,674 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'airline',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        exampleContent: ['ASA'],
+        docCountFormatted: '5,674 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'type',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: 'type.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '5,674 (100%)',
+        viewableInLens: true,
+      },
+    ],
+    emptyFields: ['sourcetype'],
+    visibleMetricFieldsCount: 1,
+    totalMetricFieldsCount: 1,
+    populatedFieldsCount: 7,
+    totalFieldsCount: 8,
+    fieldNameFiltersResultCount: 1,
+    fieldTypeFiltersResultCount: 3,
+  },
+};
+
+export const farequoteLuceneSearchTestData: TestData = {
+  suiteTitle: 'lucene saved search',
+  isSavedSearch: true,
+  sourceIndexOrSavedSearch: 'ft_farequote_lucene',
+  fieldNameFilters: ['@version.keyword', 'type'],
+  fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
+  sampleSizeValidations: [
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
+  ],
+  expected: {
+    totalDocCountFormatted: '34,416',
+    metricFields: [
+      {
+        fieldName: 'responsetime',
+        type: ML_JOB_FIELD_TYPES.NUMBER,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '34,416 (100%)',
+        statsMaxDecimalPlaces: 3,
+        topValuesCount: 11,
+        viewableInLens: true,
+      },
+    ],
+    nonMetricFields: [
+      {
+        fieldName: '@timestamp',
+        type: ML_JOB_FIELD_TYPES.DATE,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '34,416 (100%)',
+        exampleCount: 2,
+        viewableInLens: true,
+      },
+      {
+        fieldName: '@version',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: '@version.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '34,416 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'airline',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 5,
+        docCountFormatted: '34,416 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'type',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: 'type.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '34,416 (100%)',
+        viewableInLens: true,
+      },
+    ],
+    emptyFields: ['sourcetype'],
+    visibleMetricFieldsCount: 1,
+    totalMetricFieldsCount: 1,
+    populatedFieldsCount: 7,
+    totalFieldsCount: 8,
+    fieldNameFiltersResultCount: 2,
+    fieldTypeFiltersResultCount: 1,
+  },
+};
+
+export const farequoteLuceneFiltersSearchTestData: TestData = {
+  suiteTitle: 'lucene saved search and filter',
+  isSavedSearch: true,
+  sourceIndexOrSavedSearch: 'ft_farequote_filter_and_lucene',
+  fieldNameFilters: ['@version.keyword', 'type'],
+  fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
+  sampleSizeValidations: [
+    { size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
+  ],
+  expected: {
+    filters: [{ key: 'airline', value: 'ASA' }],
+    totalDocCountFormatted: '5,673',
+    metricFields: [
+      {
+        fieldName: 'responsetime',
+        type: ML_JOB_FIELD_TYPES.NUMBER,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '5,673 (100%)',
+        statsMaxDecimalPlaces: 3,
+        topValuesCount: 11,
+        viewableInLens: true,
+      },
+    ],
+    nonMetricFields: [
+      {
+        fieldName: '@timestamp',
+        type: ML_JOB_FIELD_TYPES.DATE,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '5,673 (100%)',
+        exampleCount: 2,
+        viewableInLens: true,
+      },
+      {
+        fieldName: '@version',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: '@version.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '5,673 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'airline',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        exampleContent: ['ASA'],
+        docCountFormatted: '5,673 (100%)',
+        viewableInLens: true,
+      },
+      {
+        fieldName: 'type',
+        type: ML_JOB_FIELD_TYPES.TEXT,
+        existsInDocs: true,
+        aggregatable: false,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '',
+        viewableInLens: false,
+      },
+      {
+        fieldName: 'type.keyword',
+        type: ML_JOB_FIELD_TYPES.KEYWORD,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        exampleCount: 1,
+        docCountFormatted: '5,673 (100%)',
+        viewableInLens: true,
+      },
+    ],
+    emptyFields: ['sourcetype'],
+    visibleMetricFieldsCount: 1,
+    totalMetricFieldsCount: 1,
+    populatedFieldsCount: 7,
+    totalFieldsCount: 8,
+    fieldNameFiltersResultCount: 2,
+    fieldTypeFiltersResultCount: 1,
+  },
+};
+
+export const sampleLogTestData: TestData = {
+  suiteTitle: 'geo point field',
+  isSavedSearch: false,
+  sourceIndexOrSavedSearch: 'ft_module_sample_logs',
+  fieldNameFilters: ['geo.coordinates'],
+  fieldTypeFilters: [ML_JOB_FIELD_TYPES.GEO_POINT],
+  rowsPerPage: 50,
+  expected: {
+    totalDocCountFormatted: '408',
+    metricFields: [],
+    // only testing the geo_point fields
+    nonMetricFields: [
+      {
+        fieldName: 'geo.coordinates',
+        type: ML_JOB_FIELD_TYPES.GEO_POINT,
+        existsInDocs: true,
+        aggregatable: true,
+        loading: false,
+        docCountFormatted: '408 (100%)',
+        exampleCount: 10,
+        viewableInLens: false,
+      },
+    ],
+    emptyFields: [],
+    visibleMetricFieldsCount: 4,
+    totalMetricFieldsCount: 5,
+    populatedFieldsCount: 35,
+    totalFieldsCount: 36,
+    fieldNameFiltersResultCount: 1,
+    fieldTypeFiltersResultCount: 1,
+  },
+  sampleSizeValidations: [
+    { size: 1000, expected: { field: 'geo.coordinates', docCountFormatted: '408 (100%)' } },
+    { size: 5000, expected: { field: '@timestamp', docCountFormatted: '408 (100%)' } },
+  ],
+};
--- a/x-pack/test/functional/services/ml/data_visualizer_table.ts
+++ b/x-pack/test/functional/services/ml/data_visualizer_table.ts
@ -290,25 +290,6 @@ export function MachineLearningDataVisualizerTableProvider(
      await testSubjects.existOrFail('dataVisualizerFieldTypeSelect');
    }

-    public async assertSampleSizeInputExists() {
-      await testSubjects.existOrFail('dataVisualizerShardSizeSelect');
-    }
-
-    public async setSampleSizeInputValue(
-      sampleSize: number | 'all',
-      fieldName: string,
-      docCountFormatted: string
-    ) {
-      await this.assertSampleSizeInputExists();
-      await testSubjects.clickWhenNotDisabledWithoutRetry('dataVisualizerShardSizeSelect');
-      await testSubjects.existOrFail(`dataVisualizerShardSizeOption ${sampleSize}`);
-      await testSubjects.click(`dataVisualizerShardSizeOption ${sampleSize}`);
-
-      await retry.tryForTime(5000, async () => {
-        await this.assertFieldDocCount(fieldName, docCountFormatted);
-      });
-    }
-
    public async setFieldTypeFilter(fieldTypes: string[], expectedRowCount = 1) {
      await this.assertFieldTypeInputExists();
      await mlCommonUI.setMultiSelectFilter('dataVisualizerFieldTypeSelect', fieldTypes);
--- a/x-pack/test/screenshot_creation/apps/ml_docs/anomaly_detection/geographic_data.ts
+++ b/x-pack/test/screenshot_creation/apps/ml_docs/anomaly_detection/geographic_data.ts
@ -103,11 +103,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
      await ml.testExecution.logTestStep('set data visualizer options');
      await ml.dataVisualizerIndexBased.assertTimeRangeSelectorSectionExists();
      await ml.dataVisualizerIndexBased.clickUseFullDataButton('14,074');
-      await ml.dataVisualizerTable.setSampleSizeInputValue(
-        'all',
-        'geo.coordinates',
-        '14074 (100%)'
-      );
      await ml.dataVisualizerTable.setFieldTypeFilter([ML_JOB_FIELD_TYPES.GEO_POINT]);

      await ml.testExecution.logTestStep('set maps options and take screenshot');
--- a/x-pack/test/screenshot_creation/apps/ml_docs/anomaly_detection/mapping_anomalies.ts
+++ b/x-pack/test/screenshot_creation/apps/ml_docs/anomaly_detection/mapping_anomalies.ts
@ -66,11 +66,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
      await ml.testExecution.logTestStep('set data visualizer options');
      await ml.dataVisualizerIndexBased.assertTimeRangeSelectorSectionExists();
      await ml.dataVisualizerIndexBased.clickUseFullDataButton('14,074');
-      await ml.dataVisualizerTable.setSampleSizeInputValue(
-        'all',
-        'geo.coordinates',
-        '14074 (100%)'
-      );
      await ml.dataVisualizerTable.setFieldNameFilter(['geo.dest']);

      await ml.testExecution.logTestStep('set maps options and take screenshot');