[ML] Switch from normal sampling to random sampler for Index data visualizer table (#144646)

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Quynh Nguyen (Quinn) 2022-11-16 08:36:55 -06:00 committed by GitHub
parent 8a6e91b23f
commit 22d0fa742d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 1330 additions and 656 deletions

View file

@ -24,6 +24,24 @@ import { useDiscoverServices } from '../../../../hooks/use_discover_services';
import { FIELD_STATISTICS_LOADED } from './constants';
import type { GetStateReturn } from '../../services/discover_state';
import { AvailableFields$, DataRefetch$, DataTotalHits$ } from '../../hooks/use_saved_search';
export interface RandomSamplingOption {
mode: 'random_sampling';
seed: string;
probability: number;
}
export interface NormalSamplingOption {
mode: 'normal_sampling';
seed: string;
shardSize: number;
}
export interface NoSamplingOption {
mode: 'no_sampling';
seed: string;
}
export type SamplingOption = RandomSamplingOption | NormalSamplingOption | NoSamplingOption;
export interface DataVisualizerGridEmbeddableInput extends EmbeddableInput {
dataView: DataView;
@ -39,6 +57,7 @@ export interface DataVisualizerGridEmbeddableInput extends EmbeddableInput {
sessionId?: string;
fieldsToFetch?: string[];
totalDocuments?: number;
samplingOption?: SamplingOption;
}
export interface DataVisualizerGridEmbeddableOutput extends EmbeddableOutput {
showDistributions?: boolean;
@ -163,6 +182,11 @@ export const FieldStatisticsTable = (props: FieldStatisticsTableProps) => {
totalDocuments: savedSearchDataTotalHits$
? savedSearchDataTotalHits$.getValue()?.result
: undefined,
samplingOption: {
mode: 'normal_sampling',
shardSize: 5000,
seed: searchSessionId,
} as NormalSamplingOption,
});
embeddable.reload();
}

View file

@ -22,10 +22,10 @@
* Otherwise you'd just satisfy TS requirements but might still
* run into runtime issues.
*/
export const isPopulatedObject = <U extends string = string>(
export const isPopulatedObject = <U extends string = string, T extends unknown = unknown>(
arg: unknown,
requiredAttributes: U[] = []
): arg is Record<U, unknown> => {
): arg is Record<U, T> => {
return (
typeof arg === 'object' &&
arg !== null &&

View file

@ -64,9 +64,7 @@ export interface FieldVisStats {
max?: number;
median?: number;
min?: number;
topValues?: Array<{ key: number | string; doc_count: number }>;
topValuesSampleSize?: number;
topValuesSamplerShardSize?: number;
topValues?: Array<{ key: number | string; doc_count: number; percent: number }>;
examples?: Array<string | GeoPointExample | object>;
timeRangeEarliest?: number;
timeRangeLatest?: number;

View file

@ -11,6 +11,25 @@ import { IKibanaSearchResponse } from '@kbn/data-plugin/common';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { TimeBucketsInterval } from '../services/time_buckets';
export interface RandomSamplingOption {
mode: 'random_sampling';
seed: string;
probability: number;
}
export interface NormalSamplingOption {
mode: 'normal_sampling';
seed: string;
shardSize: number;
}
export interface NoSamplingOption {
mode: 'no_sampling';
seed: string;
}
export type SamplingOption = RandomSamplingOption | NormalSamplingOption | NoSamplingOption;
export interface FieldData {
fieldName: string;
existsInDocs: boolean;
@ -54,7 +73,7 @@ export const isIKibanaSearchResponse = (arg: unknown): arg is IKibanaSearchRespo
export interface NumericFieldStats {
fieldName: string;
count: number;
count?: number;
min: number;
max: number;
avg: number;
@ -86,7 +105,8 @@ export interface BooleanFieldStats {
count: number;
trueCount: number;
falseCount: number;
[key: string]: number | string;
topValues: Bucket[];
topValuesSampleSize: number;
}
export interface DocumentCountStats {
@ -186,6 +206,9 @@ export interface FieldStatsCommonRequestParams {
intervalMs?: number;
query: estypes.QueryDslQueryContainer;
maxExamples?: number;
samplingProbability: number | null;
browserSessionSeed: number;
samplingOption: SamplingOption;
}
export interface OverallStatsSearchStrategyParams {
@ -202,6 +225,8 @@ export interface OverallStatsSearchStrategyParams {
aggregatableFields: string[];
nonAggregatableFields: string[];
fieldsToFetch?: string[];
browserSessionSeed: number;
samplingOption: SamplingOption;
}
export interface FieldStatsSearchStrategyReturnBase {
@ -238,3 +263,20 @@ export interface Field {
export interface Aggs {
[key: string]: estypes.AggregationsAggregationContainer;
}
export const EMBEDDABLE_SAMPLER_OPTION = {
RANDOM: 'random_sampling',
NORMAL: 'normal_sampling',
};
export type FieldStatsEmbeddableSamplerOption =
typeof EMBEDDABLE_SAMPLER_OPTION[keyof typeof EMBEDDABLE_SAMPLER_OPTION];
export function isRandomSamplingOption(arg: SamplingOption): arg is RandomSamplingOption {
return arg.mode === 'random_sampling';
}
export function isNormalSamplingOption(arg: SamplingOption): arg is NormalSamplingOption {
return arg.mode === 'normal_sampling';
}
export function isNoSamplingOption(arg: SamplingOption): arg is NoSamplingOption {
return arg.mode === 'no_sampling' || (arg.mode === 'random_sampling' && arg.probability === 1);
}

View file

@ -20,7 +20,7 @@ import {
EuiFormRow,
} from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { sortedIndex } from 'lodash';
import { debounce, sortedIndex } from 'lodash';
import { FormattedMessage } from '@kbn/i18n-react';
import { isDefined } from '../../util/is_defined';
import type { DocumentCountChartPoint } from './document_count_chart';
@ -64,6 +64,24 @@ export const DocumentCountContent: FC<Props> = ({
setShowSamplingOptionsPopover(false);
}, [setShowSamplingOptionsPopover]);
// eslint-disable-next-line react-hooks/exhaustive-deps
const updateSamplingProbability = useCallback(
debounce((newProbability: number) => {
if (setSamplingProbability) {
const idx = sortedIndex(RANDOM_SAMPLER_PROBABILITIES, newProbability);
const closestPrev = RANDOM_SAMPLER_PROBABILITIES[idx - 1];
const closestNext = RANDOM_SAMPLER_PROBABILITIES[idx];
const closestProbability =
Math.abs(closestPrev - newProbability) < Math.abs(closestNext - newProbability)
? closestPrev
: closestNext;
setSamplingProbability(closestProbability / 100);
}
}, 100),
[setSamplingProbability]
);
const calloutInfoMessage = useMemo(() => {
switch (randomSamplerPreference) {
case RANDOM_SAMPLER_OPTION.OFF:
@ -125,7 +143,7 @@ export const DocumentCountContent: FC<Props> = ({
<>
<EuiFlexGroup alignItems="center" gutterSize="xs">
<TotalCountHeader totalCount={totalCount} approximate={approximate} loading={loading} />
<EuiFlexItem grow={false}>
<EuiFlexItem grow={false} style={{ marginLeft: 'auto' }}>
<EuiPopover
data-test-subj="dvRandomSamplerOptionsPopover"
id="dataVisualizerSamplingOptions"
@ -199,21 +217,7 @@ export const DocumentCountContent: FC<Props> = ({
value: d,
label: d === 0.001 || d >= 5 ? `${d}%` : '',
}))}
onChange={(e) => {
const newProbability = Number(e.currentTarget.value);
const idx = sortedIndex(RANDOM_SAMPLER_PROBABILITIES, newProbability);
const closestPrev = RANDOM_SAMPLER_PROBABILITIES[idx - 1];
const closestNext = RANDOM_SAMPLER_PROBABILITIES[idx];
const closestProbability =
Math.abs(closestPrev - newProbability) <
Math.abs(closestNext - newProbability)
? closestPrev
: closestNext;
if (setSamplingProbability) {
setSamplingProbability(closestProbability / 100);
}
}}
onChange={(e) => updateSamplingProbability(Number(e.currentTarget.value))}
step={RANDOM_SAMPLER_STEP}
data-test-subj="dvRandomSamplerProbabilityRange"
/>

View file

@ -112,6 +112,7 @@ export const FieldsStatsGrid: FC<Props> = ({ results }) => {
pageState={dataVisualizerListState}
updatePageState={setDataVisualizerListState}
getItemIdToExpandedRowMap={getItemIdToExpandedRowMap}
overallStatsRunning={false}
/>
</div>
);

View file

@ -10,7 +10,6 @@ import { EuiSpacer } from '@elastic/eui';
import { Axis, BarSeries, Chart, Settings, ScaleType } from '@elastic/charts';
import { FormattedMessage } from '@kbn/i18n-react';
import { i18n } from '@kbn/i18n';
import { TopValues } from '../../../top_values';
import type { FieldDataRowProps } from '../../types/field_data_row';
import { ExpandedRowFieldHeader } from '../expanded_row_field_header';
@ -45,32 +44,13 @@ export const BooleanContent: FC<FieldDataRowProps> = ({ config, onAddFilter }) =
const theme = useDataVizChartTheme();
if (!formattedPercentages) return null;
const { trueCount, falseCount, count } = formattedPercentages;
const stats = {
...config.stats,
topValues: [
{
key: i18n.translate(
'xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel',
{ defaultMessage: 'true' }
),
doc_count: trueCount ?? 0,
},
{
key: i18n.translate(
'xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel',
{ defaultMessage: 'false' }
),
doc_count: falseCount ?? 0,
},
],
};
const { count } = formattedPercentages;
return (
<ExpandedRowContent dataTestSubj={'dataVisualizerBooleanContent'}>
<DocumentStatsTable config={config} />
<TopValues
stats={stats}
stats={config.stats}
fieldFormat={fieldFormat}
barColor="success"
onAddFilter={onAddFilter}

View file

@ -6,7 +6,7 @@
*/
import React, { FC, useMemo } from 'react';
import { EuiSpacer, EuiText, htmlIdGenerator } from '@elastic/eui';
import { EuiText, htmlIdGenerator } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { FormattedMessage } from '@kbn/i18n-react';
import {
@ -18,6 +18,8 @@ import {
VectorLayerDescriptor,
} from '@kbn/maps-plugin/common';
import { EMSTermJoinConfig } from '@kbn/maps-plugin/public';
import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types';
import { useDataVisualizerKibana } from '../../../../../kibana_context';
import { EmbeddedMapComponent } from '../../../embedded_map';
import { FieldVisStats } from '../../../../../../../common/types';
import { ExpandedRowPanel } from './expanded_row_panel';
@ -97,13 +99,59 @@ interface Props {
}
export const ChoroplethMap: FC<Props> = ({ stats, suggestion }) => {
const { fieldName, isTopValuesSampled, topValues, topValuesSamplerShardSize } = stats!;
const {
services: {
data: { fieldFormats },
},
} = useDataVisualizerKibana();
const { fieldName, isTopValuesSampled, topValues, sampleCount } = stats!;
const layerList: VectorLayerDescriptor[] = useMemo(
() => [getChoroplethTopValuesLayer(fieldName || '', topValues || [], suggestion)],
[suggestion, fieldName, topValues]
);
if (!stats) return null;
const totalDocuments = stats.totalDocuments ?? sampleCount ?? 0;
const countsElement = totalDocuments ? (
<EuiText color="subdued" size="xs">
{isTopValuesSampled ? (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleRecordsLabel"
defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
values={{
sampledDocuments: sampleCount,
sampledDocumentsFormatted: (
<strong>
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(sampleCount)}
</strong>
),
}}
/>
) : (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromTotalRecordsLabel"
defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
values={{
totalDocuments,
totalDocumentsFormatted: (
<strong>
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(totalDocuments ?? 0)}
</strong>
),
}}
/>
)}
</EuiText>
) : null;
return (
<ExpandedRowPanel
dataTestSubj={'fileDataVisualizerChoroplethMapTopValues'}
@ -114,20 +162,7 @@ export const ChoroplethMap: FC<Props> = ({ stats, suggestion }) => {
<EmbeddedMapComponent layerList={layerList} />
</div>
{isTopValuesSampled === true && (
<div>
<EuiSpacer size={'s'} />
<EuiText size="xs" textAlign={'center'}>
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription"
defaultMessage="Calculated from sample of {topValuesSamplerShardSize} documents per shard"
values={{
topValuesSamplerShardSize,
}}
/>
</EuiText>
</div>
)}
{countsElement}
</ExpandedRowPanel>
);
};

View file

@ -10,7 +10,7 @@ import React, { FC, ReactNode } from 'react';
import { i18n } from '@kbn/i18n';
import { EuiBasicTable, HorizontalAlignment, LEFT_ALIGNMENT, RIGHT_ALIGNMENT } from '@elastic/eui';
import { ExpandedRowFieldHeader } from '../expanded_row_field_header';
import { FieldDataRowProps } from '../../types';
import { FieldDataRowProps, isIndexBasedFieldVisConfig } from '../../types';
import { roundToDecimalPlace } from '../../../utils';
import { ExpandedRowPanel } from './expanded_row_panel';
@ -46,6 +46,13 @@ export const DocumentStatsTable: FC<FieldDataRowProps> = ({ config }) => {
)
return null;
const { cardinality, count, sampleCount } = config.stats;
const valueCount =
count ?? (isIndexBasedFieldVisConfig(config) && config.existsInDocs === true ? undefined : 0);
const docsPercent =
valueCount !== undefined && sampleCount !== undefined
? roundToDecimalPlace((valueCount / sampleCount) * 100)
: undefined;
const metaTableItems = [
{
function: 'count',
@ -57,16 +64,20 @@ export const DocumentStatsTable: FC<FieldDataRowProps> = ({ config }) => {
),
value: count,
},
{
function: 'percentage',
display: (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.percentageLabel"
defaultMessage="percentage"
/>
),
value: `${roundToDecimalPlace((count / sampleCount) * 100)}%`,
},
...(docsPercent !== undefined
? [
{
function: 'percentage',
display: (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.percentageLabel"
defaultMessage="percentage"
/>
),
value: `${docsPercent}%`,
},
]
: []),
{
function: 'distinctValues',
display: (

View file

@ -8,32 +8,46 @@
import { EuiIcon, EuiText } from '@elastic/eui';
import React from 'react';
import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types';
import { useDataVisualizerKibana } from '../../../../../kibana_context';
import { isIndexBasedFieldVisConfig } from '../../../../../../../common/types/field_vis_config';
import type { FieldDataRowProps } from '../../types/field_data_row';
import { roundToDecimalPlace } from '../../../utils';
import { isIndexBasedFieldVisConfig } from '../../types';
interface Props extends FieldDataRowProps {
showIcon?: boolean;
totalCount?: number;
}
export const DocumentStat = ({ config, showIcon }: Props) => {
export const DocumentStat = ({ config, showIcon, totalCount }: Props) => {
const { stats } = config;
const {
services: {
data: { fieldFormats },
},
} = useDataVisualizerKibana();
if (stats === undefined) return null;
const { count, sampleCount } = stats;
const total = sampleCount ?? totalCount;
// If field exists is docs but we don't have count stats then don't show
// Otherwise if field doesn't appear in docs at all, show 0%
const docsCount =
const valueCount =
count ?? (isIndexBasedFieldVisConfig(config) && config.existsInDocs === true ? undefined : 0);
const docsPercent =
docsCount !== undefined && sampleCount !== undefined
? roundToDecimalPlace((docsCount / sampleCount) * 100)
: 0;
valueCount !== undefined && total !== undefined
? `(${roundToDecimalPlace((valueCount / total) * 100)}%)`
: null;
return docsCount !== undefined ? (
return valueCount !== undefined ? (
<>
{showIcon ? <EuiIcon type="document" size={'m'} className={'columnHeader__icon'} /> : null}
<EuiText size={'xs'}>
{docsCount} ({docsPercent}%)
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(valueCount)}{' '}
{docsPercent}
</EuiText>
</>
) : null;

View file

@ -60,6 +60,8 @@ interface DataVisualizerTableProps<T> {
/** Callback to receive any updates when table or page state is changed **/
onChange?: (update: Partial<DataVisualizerTableState>) => void;
loading?: boolean;
totalCount?: number;
overallStatsRunning: boolean;
}
export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
@ -71,6 +73,8 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
showPreviewByDefault,
onChange,
loading,
totalCount,
overallStatsRunning,
}: DataVisualizerTableProps<T>) => {
const { euiTheme } = useEuiTheme();
@ -217,12 +221,40 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
},
{
field: 'docCount',
name: i18n.translate('xpack.dataVisualizer.dataGrid.documentsCountColumnName', {
defaultMessage: 'Documents (%)',
}),
render: (value: number | undefined, item: DataVisualizerTableItem) => (
<DocumentStat config={item} showIcon={dimensions.showIcon} />
name: (
<div className={'columnHeader__title'}>
{i18n.translate('xpack.dataVisualizer.dataGrid.documentsCountColumnName', {
defaultMessage: 'Documents (%)',
})}
{
<EuiToolTip
content={i18n.translate(
'xpack.dataVisualizer.dataGrid.documentsCountColumnTooltip',
{
defaultMessage:
'Document count found is based on a smaller set of sampled records.',
}
)}
>
<EuiIcon type="questionInCircle" />
</EuiToolTip>
}
</div>
),
render: (value: number | undefined, item: DataVisualizerTableItem) => {
if (overallStatsRunning) {
return (
<EuiText textAlign="center">
<EuiLoadingSpinner size="s" />
</EuiText>
);
}
return (
<DocumentStat config={item} showIcon={dimensions.showIcon} totalCount={totalCount} />
);
},
sortable: (item: DataVisualizerTableItem) => item?.stats?.count,
align: LEFT_ALIGNMENT as HorizontalAlignment,
'data-test-subj': 'dataVisualizerTableColumnDocumentsCount',
@ -233,9 +265,19 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
name: i18n.translate('xpack.dataVisualizer.dataGrid.distinctValuesColumnName', {
defaultMessage: 'Distinct values',
}),
render: (_: undefined, item: DataVisualizerTableItem) => (
<DistinctValues cardinality={item?.stats?.cardinality} showIcon={dimensions.showIcon} />
),
render: (_: undefined, item: DataVisualizerTableItem) => {
if (overallStatsRunning) {
return (
<EuiText textAlign="center">
<EuiLoadingSpinner size="s" />
</EuiText>
);
}
return (
<DistinctValues cardinality={item?.stats?.cardinality} showIcon={dimensions.showIcon} />
);
},
sortable: (item: DataVisualizerTableItem) => item?.stats?.cardinality,
align: LEFT_ALIGNMENT as HorizontalAlignment,
'data-test-subj': 'dataVisualizerTableColumnDistinctValues',
@ -333,6 +375,7 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
extendedColumns,
dimensions.breakPoint,
toggleExpandAll,
overallStatsRunning,
]);
const itemIdToExpandedRowMap = useMemo(() => {

View file

@ -36,8 +36,7 @@ interface Props {
onAddFilter?: (field: DataViewField | string, value: string, type: '+' | '-') => void;
}
function getPercentLabel(docCount: number, topValuesSampleSize: number): string {
const percent = (100 * docCount) / topValuesSampleSize;
function getPercentLabel(percent: number): string {
if (percent >= 0.1) {
return `${roundToDecimalPlace(percent, 1)}%`;
} else {
@ -47,76 +46,54 @@ function getPercentLabel(docCount: number, topValuesSampleSize: number): string
export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed, onAddFilter }) => {
const {
services: { data },
services: {
data: { fieldFormats },
},
} = useDataVisualizerKibana();
const { fieldFormats } = data;
if (stats === undefined || !stats.topValues) return null;
const {
topValues,
topValuesSampleSize,
count,
isTopValuesSampled,
fieldName,
sampleCount,
topValuesSamplerShardSize,
} = stats;
const { topValues, fieldName, sampleCount } = stats;
const totalDocuments = stats.totalDocuments;
const totalDocuments = stats.totalDocuments ?? sampleCount ?? 0;
const topValuesOtherCountPercent =
1 - (topValues ? topValues.reduce((acc, bucket) => acc + bucket.percent, 0) : 0);
const topValuesOtherCount = Math.floor(topValuesOtherCountPercent * (sampleCount ?? 0));
const progressBarMax = isTopValuesSampled === true ? topValuesSampleSize : count;
const topValuesOtherCount =
(progressBarMax ?? 0) -
(topValues ? topValues.map((value) => value.doc_count).reduce((v, acc) => acc + v, 0) : 0);
const countsElement =
totalDocuments !== undefined ? (
<EuiText color="subdued" size="xs">
{isTopValuesSampled ? (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleRecordsLabel"
defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
values={{
sampledDocuments: sampleCount,
sampledDocumentsFormatted: (
<strong>
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(sampleCount)}
</strong>
),
}}
/>
) : (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromTotalRecordsLabel"
defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
values={{
totalDocuments,
totalDocumentsFormatted: (
<strong>
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(totalDocuments ?? 0)}
</strong>
),
}}
/>
)}
</EuiText>
) : (
<EuiText size="xs" textAlign={'center'}>
const countsElement = (
<EuiText color="subdued" size="xs">
{totalDocuments > (sampleCount ?? 0) ? (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription"
defaultMessage="Calculated from sample of {topValuesSamplerShardSize} documents per shard"
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleRecordsLabel"
defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
values={{
topValuesSamplerShardSize,
sampledDocuments: sampleCount,
sampledDocumentsFormatted: (
<strong>
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(sampleCount)}
</strong>
),
}}
/>
</EuiText>
);
) : (
<FormattedMessage
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromTotalRecordsLabel"
defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
values={{
totalDocuments,
totalDocumentsFormatted: (
<strong>
{fieldFormats
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
.convert(totalDocuments ?? 0)}
</strong>
),
}}
/>
)}
</EuiText>
);
return (
<ExpandedRowPanel
@ -139,15 +116,15 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
<EuiFlexGroup gutterSize="xs" alignItems="center" key={value.key}>
<EuiFlexItem data-test-subj="dataVisualizerFieldDataTopValueBar">
<EuiProgress
value={value.doc_count}
max={progressBarMax}
value={value.percent}
max={1}
color={barColor}
size="xs"
label={kibanaFieldFormat(value.key, fieldFormat)}
className={classNames('eui-textTruncate', 'topValuesValueLabelContainer')}
valueText={`${value.doc_count}${
progressBarMax !== undefined
? ` (${getPercentLabel(value.doc_count, progressBarMax)})`
totalDocuments !== undefined
? ` (${getPercentLabel(value.percent * 100)})`
: ''
}`}
/>
@ -222,7 +199,7 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
<EuiFlexItem data-test-subj="dataVisualizerFieldDataTopValueBar">
<EuiProgress
value={topValuesOtherCount}
max={progressBarMax}
max={totalDocuments}
color={barColor}
size="xs"
label={
@ -233,8 +210,8 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
}
className={classNames('eui-textTruncate', 'topValuesValueLabelContainer')}
valueText={`${topValuesOtherCount}${
progressBarMax !== undefined
? ` (${getPercentLabel(topValuesOtherCount, progressBarMax)})`
totalDocuments !== undefined
? ` (${getPercentLabel(topValuesOtherCountPercent * 100)})`
: ''
}`}
/>
@ -249,12 +226,10 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
</EuiFlexGroup>
) : null}
{isTopValuesSampled === true && (
<Fragment>
<EuiSpacer size="xs" />
{countsElement}
</Fragment>
)}
<Fragment>
<EuiSpacer size="xs" />
{countsElement}
</Fragment>
</div>
</ExpandedRowPanel>
);

View file

@ -551,8 +551,10 @@ export const IndexDataVisualizerView: FC<IndexDataVisualizerViewProps> = (dataVi
getItemIdToExpandedRowMap={getItemIdToExpandedRowMap}
extendedColumns={extendedColumns}
loading={progress < 100}
overallStatsRunning={overallStatsProgress.isRunning}
showPreviewByDefault={dataVisualizerListState.showDistributions ?? true}
onChange={setDataVisualizerListState}
totalCount={overallStats.totalCount}
/>
</EuiPanel>
</EuiFlexItem>

View file

@ -11,8 +11,8 @@ import { i18n } from '@kbn/i18n';
import { Query, Filter } from '@kbn/es-query';
import type { TimeRange } from '@kbn/es-query';
import { DataView, DataViewField } from '@kbn/data-views-plugin/public';
import { css } from '@emotion/react';
import { isDefined } from '../../../common/util/is_defined';
import { ShardSizeFilter } from './shard_size_select';
import { DataVisualizerFieldNamesFilter } from './field_name_filter';
import { DataVisualizerFieldTypeFilter } from './field_type_filter';
import { SupportedFieldType } from '../../../../../common/types';
@ -147,12 +147,15 @@ export const SearchPanel: FC<Props> = ({
/>
</EuiFlexItem>
<EuiFlexItem grow={2} className={'dvSearchPanel__controls'}>
<ShardSizeFilter
samplerShardSize={samplerShardSize}
setSamplerShardSize={setSamplerShardSize}
/>
<EuiFlexItem
grow={2}
className={'dvSearchPanel__controls'}
css={css`
margin-left: 0px !important;
padding-left: 0px !important;
padding-right: 0px !important;
`}
>
<DataVisualizerFieldNamesFilter
overallStats={overallStats}
setVisibleFieldNames={setVisibleFieldNames}

View file

@ -1,66 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { EuiFlexGroup, EuiFlexItem, EuiIconTip, EuiSuperSelect } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import React, { FC } from 'react';
import { FormattedMessage } from '@kbn/i18n-react';
interface Props {
samplerShardSize: number;
setSamplerShardSize(s: number): void;
}
const searchSizeOptions = [1000, 5000, 10000, 100000, -1].map((v) => {
return {
value: String(v),
inputDisplay:
v > 0 ? (
<span data-test-subj={`dataVisualizerShardSizeOption ${v}`}>
<FormattedMessage
id="xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel"
defaultMessage="Sample size (per shard): {wrappedValue}"
values={{ wrappedValue: <b>{v}</b> }}
/>
</span>
) : (
<span data-test-subj={`dataVisualizerShardSizeOption all`}>
<FormattedMessage
id="xpack.dataVisualizer.searchPanel.allOptionLabel"
defaultMessage="Search all"
/>
</span>
),
};
});
export const ShardSizeFilter: FC<Props> = ({ samplerShardSize, setSamplerShardSize }) => {
return (
<EuiFlexGroup alignItems="center" gutterSize="s" responsive={false}>
<EuiFlexItem grow={false} style={{ width: 310 }}>
<EuiSuperSelect
options={searchSizeOptions}
valueOfSelected={String(samplerShardSize)}
onChange={(value) => setSamplerShardSize(+value)}
aria-label={i18n.translate('xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel', {
defaultMessage: 'Select number of documents to sample',
})}
data-test-subj="dataVisualizerShardSizeSelect"
/>
</EuiFlexItem>
<EuiFlexItem grow={false}>
<EuiIconTip
content={i18n.translate('xpack.dataVisualizer.searchPanel.queryBarPlaceholder', {
defaultMessage:
'Selecting a smaller sample size will reduce query run times and the load on the cluster.',
})}
position="right"
/>
</EuiFlexItem>
</EuiFlexGroup>
);
};

View file

@ -24,6 +24,7 @@ import { KibanaContextProvider, KibanaThemeProvider } from '@kbn/kibana-react-pl
import type { Query } from '@kbn/es-query';
import { DataView, DataViewField } from '@kbn/data-views-plugin/public';
import { SavedSearch } from '@kbn/discover-plugin/public';
import { SamplingOption } from '../../../../../common/types/field_stats';
import { DATA_VISUALIZER_GRID_EMBEDDABLE_TYPE } from './constants';
import { EmbeddableLoading } from './embeddable_loading_fallback';
import { DataVisualizerStartDependencies } from '../../../../plugin';
@ -34,7 +35,7 @@ import {
import { FieldVisConfig } from '../../../common/components/stats_table/types';
import { getDefaultDataVisualizerListState } from '../../components/index_data_visualizer_view/index_data_visualizer_view';
import type { DataVisualizerTableState, SavedSearchSavedObject } from '../../../../../common/types';
import { DataVisualizerIndexBasedAppState } from '../../types/index_data_visualizer_state';
import type { DataVisualizerIndexBasedAppState } from '../../types/index_data_visualizer_state';
import { IndexBasedDataVisualizerExpandedRow } from '../../../common/components/expanded_row/index_based_expanded_row';
import { useDataVisualizerGridData } from '../../hooks/use_data_visualizer_grid_data';
@ -55,6 +56,7 @@ export interface DataVisualizerGridInput {
sessionId?: string;
fieldsToFetch?: string[];
totalDocuments?: number;
samplingOption?: SamplingOption;
}
export type DataVisualizerGridEmbeddableInput = EmbeddableInput & DataVisualizerGridInput;
export type DataVisualizerGridEmbeddableOutput = EmbeddableOutput;
@ -83,8 +85,15 @@ export const EmbeddableWrapper = ({
[dataVisualizerListState, onOutputChange]
);
const { configs, searchQueryLanguage, searchString, extendedColumns, progress, setLastRefresh } =
useDataVisualizerGridData(input, dataVisualizerListState);
const {
configs,
searchQueryLanguage,
searchString,
extendedColumns,
progress,
overallStatsProgress,
setLastRefresh,
} = useDataVisualizerGridData(input, dataVisualizerListState);
useEffect(() => {
setLastRefresh(Date.now());
@ -143,6 +152,7 @@ export const EmbeddableWrapper = ({
showPreviewByDefault={input?.showPreviewByDefault}
onChange={onOutputChange}
loading={progress < 100}
overallStatsRunning={overallStatsProgress.isRunning}
/>
);
};

View file

@ -5,22 +5,23 @@
* 2.0.
*/
import { Required } from 'utility-types';
import type { Required } from 'utility-types';
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
import { merge } from 'rxjs';
import { EuiTableActionsColumnType } from '@elastic/eui/src/components/basic_table/table_types';
import type { EuiTableActionsColumnType } from '@elastic/eui';
import { i18n } from '@kbn/i18n';
import { DataViewField, KBN_FIELD_TYPES, UI_SETTINGS } from '@kbn/data-plugin/common';
import seedrandom from 'seedrandom';
import { RandomSamplerOption } from '../constants/random_sampler';
import { DataVisualizerIndexBasedAppState } from '../types/index_data_visualizer_state';
import type { SamplingOption } from '@kbn/discover-plugin/public/application/main/components/field_stats_table/field_stats_table';
import type { RandomSamplerOption } from '../constants/random_sampler';
import type { DataVisualizerIndexBasedAppState } from '../types/index_data_visualizer_state';
import { useDataVisualizerKibana } from '../../kibana_context';
import { getEsQueryFromSavedSearch } from '../utils/saved_search_utils';
import { MetricFieldsStats } from '../../common/components/stats_table/components/field_count_stats';
import type { MetricFieldsStats } from '../../common/components/stats_table/components/field_count_stats';
import { useTimefilter } from './use_time_filter';
import { dataVisualizerRefresh$ } from '../services/timefilter_refresh_service';
import { TimeBuckets } from '../../../../common/services/time_buckets';
import { FieldVisConfig } from '../../common/components/stats_table/types';
import type { FieldVisConfig } from '../../common/components/stats_table/types';
import {
SUPPORTED_FIELD_TYPES,
NON_AGGREGATABLE_FIELD_TYPES,
@ -29,13 +30,13 @@ import {
import type { FieldRequestConfig, SupportedFieldType } from '../../../../common/types';
import { kbnTypeToJobType } from '../../common/util/field_types_utils';
import { getActions } from '../../common/components/field_data_row/action_menu';
import { DataVisualizerGridInput } from '../embeddables/grid_embeddable/grid_embeddable';
import type { DataVisualizerGridInput } from '../embeddables/grid_embeddable/grid_embeddable';
import { getDefaultPageState } from '../components/index_data_visualizer_view/index_data_visualizer_view';
import { useFieldStatsSearchStrategy } from './use_field_stats';
import { useOverallStats } from './use_overall_stats';
import { OverallStatsSearchStrategyParams } from '../../../../common/types/field_stats';
import { Dictionary } from '../../common/util/url_state';
import { AggregatableField, NonAggregatableField } from '../types/overall_stats';
import type { OverallStatsSearchStrategyParams } from '../../../../common/types/field_stats';
import type { Dictionary } from '../../common/util/url_state';
import type { AggregatableField, NonAggregatableField } from '../types/overall_stats';
const defaults = getDefaultPageState();
@ -43,6 +44,11 @@ function isDisplayField(fieldName: string): boolean {
return !OMIT_FIELDS.includes(fieldName);
}
const DEFAULT_SAMPLING_OPTION: SamplingOption = {
mode: 'random_sampling',
seed: '',
probability: 0,
};
export const useDataVisualizerGridData = (
input: DataVisualizerGridInput,
dataVisualizerListState: Required<DataVisualizerIndexBasedAppState>,
@ -76,6 +82,7 @@ export const useDataVisualizerGridData = (
currentFilters,
visibleFieldNames,
fieldsToFetch,
samplingOption,
} = useMemo(
() => ({
currentSavedSearch: input?.savedSearch,
@ -84,6 +91,8 @@ export const useDataVisualizerGridData = (
visibleFieldNames: input?.visibleFieldNames ?? [],
currentFilters: input?.filters,
fieldsToFetch: input?.fieldsToFetch,
/** By default, use random sampling **/
samplingOption: input?.samplingOption ?? DEFAULT_SAMPLING_OPTION,
}),
[input]
);
@ -203,6 +212,7 @@ export const useDataVisualizerGridData = (
}
}
});
return {
earliest,
latest,
@ -217,6 +227,8 @@ export const useDataVisualizerGridData = (
aggregatableFields,
nonAggregatableFields,
fieldsToFetch,
browserSessionSeed,
samplingOption: { ...samplingOption, seed: browserSessionSeed.toString() },
};
},
// eslint-disable-next-line react-hooks/exhaustive-deps
@ -226,17 +238,19 @@ export const useDataVisualizerGridData = (
currentDataView.id,
// eslint-disable-next-line react-hooks/exhaustive-deps
JSON.stringify(searchQuery),
// eslint-disable-next-line react-hooks/exhaustive-deps
JSON.stringify(samplingOption),
samplerShardSize,
searchSessionId,
lastRefresh,
fieldsToFetch,
browserSessionSeed,
]
);
const { overallStats, progress: overallStatsProgress } = useOverallStats(
fieldStatsRequest,
lastRefresh,
browserSessionSeed,
dataVisualizerListState.probability
);
@ -269,10 +283,20 @@ export const useDataVisualizerGridData = (
return { metricConfigs: existMetricFields, nonMetricConfigs: existNonMetricFields };
}, [metricConfigs, nonMetricConfigs, overallStatsProgress.loaded]);
const probability = useMemo(
() =>
// If random sampler probability is already manually selected, or is available from the URL
// use that instead of using the probability calculated from the doc count
(dataVisualizerListState.probability === null
? overallStats?.documentCountStats?.probability
: dataVisualizerListState.probability) ?? 1,
[dataVisualizerListState.probability, overallStats?.documentCountStats?.probability]
);
const strategyResponse = useFieldStatsSearchStrategy(
fieldStatsRequest,
configsWithoutStats,
dataVisualizerListState
dataVisualizerListState,
probability
);
const combinedProgress = useMemo(

View file

@ -65,7 +65,8 @@ const createBatchedRequests = (fields: Field[], maxBatchSize = 10) => {
export function useFieldStatsSearchStrategy(
searchStrategyParams: OverallStatsSearchStrategyParams | undefined,
fieldStatsParams: FieldStatsParams | undefined,
dataVisualizerListState: DataVisualizerIndexBasedAppState
dataVisualizerListState: DataVisualizerIndexBasedAppState,
samplingProbability: number | null
): FieldStatsSearchStrategyReturnBase {
const {
services: {
@ -168,6 +169,9 @@ export function useFieldStatsSearchStrategy(
},
},
maxExamples: MAX_EXAMPLES_DEFAULT,
samplingProbability,
browserSessionSeed: searchStrategyParams.browserSessionSeed,
samplingOption: searchStrategyParams.samplingOption,
};
const searchOptions: ISearchOptions = {
abortSignal: abortCtrl.current.signal,
@ -295,6 +299,7 @@ export function useFieldStatsSearchStrategy(
dataVisualizerListState.pageIndex,
dataVisualizerListState.sortDirection,
dataVisualizerListState.sortField,
samplingProbability,
]);
const cancelFetch = useCallback(() => {

View file

@ -30,14 +30,14 @@ import {
import type { OverallStats } from '../types/overall_stats';
import { getDefaultPageState } from '../components/index_data_visualizer_view/index_data_visualizer_view';
import { extractErrorProperties } from '../utils/error_utils';
import type {
import {
DataStatsFetchProgress,
isRandomSamplingOption,
OverallStatsSearchStrategyParams,
} from '../../../../common/types/field_stats';
import { getDocumentCountStats } from '../search_strategy/requests/get_document_stats';
import { getInitialProgress, getReducer } from '../progress_utils';
import { MAX_CONCURRENT_REQUESTS } from '../constants/index_data_visualizer_viewer';
import { DocumentCountStats } from '../../../../common/types/field_stats';
/**
* Helper function to run forkJoin
@ -92,7 +92,6 @@ function displayError(toastNotifications: ToastsStart, index: string, err: any)
export function useOverallStats<TParams extends OverallStatsSearchStrategyParams>(
searchStrategyParams: TParams | undefined,
lastRefresh: number,
browserSessionSeed: number,
probability?: number | null
): {
progress: DataStatsFetchProgress;
@ -114,167 +113,163 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams
const abortCtrl = useRef(new AbortController());
const searchSubscription$ = useRef<Subscription>();
const startFetch = useCallback(() => {
searchSubscription$.current?.unsubscribe();
abortCtrl.current.abort();
abortCtrl.current = new AbortController();
const startFetch = useCallback(async () => {
try {
searchSubscription$.current?.unsubscribe();
abortCtrl.current.abort();
abortCtrl.current = new AbortController();
if (!searchStrategyParams || lastRefresh === 0) return;
if (!searchStrategyParams || lastRefresh === 0) return;
setFetchState({
...getInitialProgress(),
error: undefined,
});
setFetchState({
...getInitialProgress(),
isRunning: true,
error: undefined,
});
const {
aggregatableFields,
nonAggregatableFields,
index,
searchQuery,
timeFieldName,
earliest,
latest,
runtimeFieldMap,
samplerShardSize,
} = searchStrategyParams;
const {
aggregatableFields,
nonAggregatableFields,
index,
searchQuery,
timeFieldName,
earliest,
latest,
runtimeFieldMap,
samplingOption,
} = searchStrategyParams;
const searchOptions: ISearchOptions = {
abortSignal: abortCtrl.current.signal,
sessionId: searchStrategyParams?.sessionId,
};
const searchOptions: ISearchOptions = {
abortSignal: abortCtrl.current.signal,
sessionId: searchStrategyParams?.sessionId,
};
const nonAggregatableFieldsObs = nonAggregatableFields.map((fieldName: string) =>
data.search
.search<IKibanaSearchRequest, IKibanaSearchResponse>(
{
params: checkNonAggregatableFieldExistsRequest(
index,
searchQuery,
fieldName,
timeFieldName,
earliest,
latest,
runtimeFieldMap
),
},
searchOptions
)
.pipe(
map((resp) => {
return {
...resp,
rawResponse: { ...resp.rawResponse, fieldName },
} as IKibanaSearchResponse;
})
)
);
const documentCountStats = await getDocumentCountStats(
data.search,
searchStrategyParams,
searchOptions,
samplingOption.seed,
probability
);
// Have to divide into smaller requests to avoid 413 payload too large
const aggregatableFieldsChunks = chunk(aggregatableFields, 30);
const aggregatableOverallStatsObs = aggregatableFieldsChunks.map((aggregatableFieldsChunk) =>
data.search
.search(
{
params: checkAggregatableFieldsExistRequest(
index,
searchQuery,
aggregatableFieldsChunk,
samplerShardSize,
timeFieldName,
earliest,
latest,
undefined,
runtimeFieldMap
),
},
searchOptions
)
.pipe(
map((resp) => {
return {
...resp,
aggregatableFields: aggregatableFieldsChunk,
} as AggregatableFieldOverallStats;
})
)
);
const sub = rateLimitingForkJoin<
| DocumentCountStats
| AggregatableFieldOverallStats
| NonAggregatableFieldOverallStats
| undefined
>(
[
from(
getDocumentCountStats(
data.search,
searchStrategyParams,
searchOptions,
browserSessionSeed,
probability
const nonAggregatableFieldsObs = nonAggregatableFields.map((fieldName: string) =>
data.search
.search<IKibanaSearchRequest, IKibanaSearchResponse>(
{
params: checkNonAggregatableFieldExistsRequest(
index,
searchQuery,
fieldName,
timeFieldName,
earliest,
latest,
runtimeFieldMap
),
},
searchOptions
)
),
...aggregatableOverallStatsObs,
...nonAggregatableFieldsObs,
],
MAX_CONCURRENT_REQUESTS
);
.pipe(
map((resp) => {
return {
...resp,
rawResponse: { ...resp.rawResponse, fieldName },
} as IKibanaSearchResponse;
})
)
);
searchSubscription$.current = sub.subscribe({
next: (value) => {
const aggregatableOverallStatsResp: AggregatableFieldOverallStats[] = [];
const nonAggregatableOverallStatsResp: NonAggregatableFieldOverallStats[] = [];
const documentCountStats = value[0] as DocumentCountStats;
// Have to divide into smaller requests to avoid 413 payload too large
const aggregatableFieldsChunks = chunk(aggregatableFields, 30);
value.forEach((resp, idx) => {
if (!resp || idx === 0) return;
if (isAggregatableFieldOverallStats(resp)) {
aggregatableOverallStatsResp.push(resp);
}
if (isRandomSamplingOption(samplingOption)) {
samplingOption.probability = documentCountStats.probability ?? 1;
}
const aggregatableOverallStatsObs = aggregatableFieldsChunks.map((aggregatableFieldsChunk) =>
data.search
.search(
{
params: checkAggregatableFieldsExistRequest(
index,
searchQuery,
aggregatableFieldsChunk,
samplingOption,
timeFieldName,
earliest,
latest,
undefined,
runtimeFieldMap
),
},
searchOptions
)
.pipe(
map((resp) => {
return {
...resp,
aggregatableFields: aggregatableFieldsChunk,
} as AggregatableFieldOverallStats;
})
)
);
if (isNonAggregatableFieldOverallStats(resp)) {
nonAggregatableOverallStatsResp.push(resp);
}
});
const sub = rateLimitingForkJoin<
AggregatableFieldOverallStats | NonAggregatableFieldOverallStats | undefined
>([...aggregatableOverallStatsObs, ...nonAggregatableFieldsObs], MAX_CONCURRENT_REQUESTS);
const totalCount = documentCountStats?.totalCount ?? 0;
searchSubscription$.current = sub.subscribe({
next: (value) => {
const aggregatableOverallStatsResp: AggregatableFieldOverallStats[] = [];
const nonAggregatableOverallStatsResp: NonAggregatableFieldOverallStats[] = [];
const aggregatableOverallStats = processAggregatableFieldsExistResponse(
aggregatableOverallStatsResp,
aggregatableFields,
samplerShardSize,
totalCount
);
value.forEach((resp, idx) => {
if (isAggregatableFieldOverallStats(resp)) {
aggregatableOverallStatsResp.push(resp);
}
const nonAggregatableOverallStats = processNonAggregatableFieldsExistResponse(
nonAggregatableOverallStatsResp,
nonAggregatableFields
);
if (isNonAggregatableFieldOverallStats(resp)) {
nonAggregatableOverallStatsResp.push(resp);
}
});
setOverallStats({
documentCountStats,
...nonAggregatableOverallStats,
...aggregatableOverallStats,
totalCount,
});
},
error: (error) => {
displayError(toasts, searchStrategyParams.index, extractErrorProperties(error));
setFetchState({
isRunning: false,
error,
});
},
complete: () => {
setFetchState({
loaded: 100,
isRunning: false,
});
},
});
// eslint-disable-next-line react-hooks/exhaustive-deps
const totalCount = documentCountStats?.totalCount ?? 0;
const aggregatableOverallStats = processAggregatableFieldsExistResponse(
aggregatableOverallStatsResp,
aggregatableFields
);
const nonAggregatableOverallStats = processNonAggregatableFieldsExistResponse(
nonAggregatableOverallStatsResp,
nonAggregatableFields
);
setOverallStats({
documentCountStats,
...nonAggregatableOverallStats,
...aggregatableOverallStats,
totalCount,
});
},
error: (error) => {
displayError(toasts, searchStrategyParams.index, extractErrorProperties(error));
setFetchState({
isRunning: false,
error,
});
},
complete: () => {
setFetchState({
loaded: 100,
isRunning: false,
});
},
});
} catch (error) {
// An `AbortError` gets triggered when a user cancels a request by navigating away, we need to ignore these errors.
if (error.name !== 'AbortError') {
displayError(toasts, searchStrategyParams!.index, extractErrorProperties(error));
}
}
}, [data.search, searchStrategyParams, toasts, lastRefresh, probability]);
const cancelFetch = useCallback(() => {
@ -286,8 +281,11 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams
// auto-update
useEffect(() => {
startFetch();
}, [startFetch]);
useEffect(() => {
return cancelFetch;
}, [startFetch, cancelFetch]);
}, [cancelFetch]);
return useMemo(
() => ({

View file

@ -0,0 +1,103 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import {
Aggs,
SamplingOption,
isNormalSamplingOption,
isRandomSamplingOption,
} from '../../../../../common/types/field_stats';
export function buildAggregationWithSamplingOption(
aggs: Aggs,
samplingOption: SamplingOption
): Record<string, estypes.AggregationsAggregationContainer> {
if (!samplingOption) {
return aggs;
}
const { seed } = samplingOption;
if (isNormalSamplingOption(samplingOption)) {
return {
sample: {
sampler: {
shard_size: samplingOption.shardSize,
},
aggs,
},
};
}
if (isRandomSamplingOption(samplingOption)) {
return {
sample: {
// @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
random_sampler: {
probability: samplingOption.probability,
...(seed ? { seed } : {}),
},
aggs,
},
};
}
// Else, if no sampling, use random sampler with probability set to 1
// this is so that all results are returned under 'sample' path
return {
sample: {
aggs,
// @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
random_sampler: {
probability: 1,
...(seed ? { seed } : {}),
},
},
};
}
/**
* Wraps the supplied aggregations in a random sampler aggregation.
*/
export function buildRandomSamplerAggregation(
aggs: Aggs,
probability: number | null,
seed: number
): Record<string, estypes.AggregationsAggregationContainer> {
if (probability === null || probability <= 0 || probability > 1) {
return aggs;
}
return {
sample: {
aggs,
// @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
random_sampler: {
probability,
...(seed ? { seed } : {}),
},
},
};
}
export function buildSamplerAggregation(
aggs: Aggs,
shardSize: number
): Record<string, estypes.AggregationsAggregationContainer> {
if (shardSize <= 0) {
return aggs;
}
return {
sample: {
aggs,
sampler: {
shard_size: shardSize,
},
},
};
}

View file

@ -14,9 +14,10 @@ import type {
ISearchOptions,
ISearchStart,
} from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { processTopValues } from './utils';
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
import type {
Field,
BooleanFieldStats,
@ -30,7 +31,7 @@ export const getBooleanFieldsStatsRequest = (
params: FieldStatsCommonRequestParams,
fields: Field[]
) => {
const { index, query, runtimeFieldMap, samplerShardSize } = params;
const { index, query, runtimeFieldMap } = params;
const size = 0;
const aggs: Aggs = {};
@ -48,7 +49,7 @@ export const getBooleanFieldsStatsRequest = (
});
const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};
@ -65,7 +66,6 @@ export const fetchBooleanFieldsStats = (
fields: Field[],
options: ISearchOptions
): Observable<BooleanFieldStats[] | FieldStatsError> => {
const { samplerShardSize } = params;
const request: estypes.SearchRequest = getBooleanFieldsStatsRequest(params, fields);
return dataSearch
.search<IKibanaSearchRequest, IKibanaSearchResponse>({ params: request }, options)
@ -80,15 +80,34 @@ export const fetchBooleanFieldsStats = (
if (!isIKibanaSearchResponse(resp)) return resp;
const aggregations = resp.rawResponse.aggregations;
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggsPath = ['sample'];
const sampleCount = get(aggregations, [...aggsPath, 'doc_count'], 0);
const batchStats: BooleanFieldStats[] = fields.map((field, i) => {
const safeFieldName = field.fieldName;
// Sampler agg will yield doc_count that's bigger than the actual # of sampled records
// because it uses the stored _doc_count if available
// https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
// therefore we need to correct it by multiplying by the sampled probability
const count = get(
aggregations,
[...aggsPath, `${safeFieldName}_value_count`, 'doc_count'],
0
);
const fieldAgg = get(aggregations, [...aggsPath, `${safeFieldName}_values`], {});
const { topValuesSampleSize, topValues } = processTopValues(fieldAgg);
const multiplier =
count > sampleCount ? get(aggregations, [...aggsPath, 'probability'], 1) : 1;
const stats: BooleanFieldStats = {
fieldName: field.fieldName,
count: get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0),
count: count * multiplier,
trueCount: 0,
falseCount: 0,
topValues,
topValuesSampleSize,
};
const valueBuckets: Array<{ [key: string]: number }> = get(
@ -97,7 +116,7 @@ export const fetchBooleanFieldsStats = (
[]
);
valueBuckets.forEach((bucket) => {
stats[`${bucket.key_as_string}Count`] = bucket.doc_count;
stats[`${bucket.key_as_string}Count` as 'trueCount' | 'falseCount'] = bucket.doc_count;
});
return stats;
});

View file

@ -15,8 +15,8 @@ import type {
ISearchOptions,
ISearchStart,
} from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
import type { FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats';
import type { Field, DateFieldStats, Aggs } from '../../../../../common/types/field_stats';
import { FieldStatsError, isIKibanaSearchResponse } from '../../../../../common/types/field_stats';
@ -26,7 +26,7 @@ export const getDateFieldsStatsRequest = (
params: FieldStatsCommonRequestParams,
fields: Field[]
) => {
const { index, query, runtimeFieldMap, samplerShardSize } = params;
const { index, query, runtimeFieldMap } = params;
const size = 0;
@ -45,7 +45,7 @@ export const getDateFieldsStatsRequest = (
const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};
return {
@ -61,8 +61,6 @@ export const fetchDateFieldsStats = (
fields: Field[],
options: ISearchOptions
): Observable<DateFieldStats[] | FieldStatsError> => {
const { samplerShardSize } = params;
const request: estypes.SearchRequest = getDateFieldsStatsRequest(params, fields);
return dataSearch
.search<IKibanaSearchRequest, IKibanaSearchResponse>({ params: request }, options)
@ -76,15 +74,10 @@ export const fetchDateFieldsStats = (
map((resp) => {
if (!isIKibanaSearchResponse(resp)) return resp;
const aggregations = resp.rawResponse.aggregations;
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggsPath = ['sample'];
const batchStats: DateFieldStats[] = fields.map((field, i) => {
const safeFieldName = field.safeFieldName;
const docCount = get(
aggregations,
[...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
0
);
const fieldStatsResp = get(
aggregations,
[...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
@ -92,7 +85,6 @@ export const fetchDateFieldsStats = (
);
return {
fieldName: field.fieldName,
count: docCount,
earliest: get(fieldStatsResp, 'min', 0),
latest: get(fieldStatsResp, 'max', 0),
} as DateFieldStats;

View file

@ -19,6 +19,8 @@ import type {
} from '../../../../../common/types/field_stats';
const MINIMUM_RANDOM_SAMPLER_DOC_COUNT = 100000;
const DEFAULT_INITIAL_RANDOM_SAMPLER_PROBABILITY = 0.000001;
export const getDocumentCountStatsRequest = (params: OverallStatsSearchStrategyParams) => {
const {
index,
@ -69,11 +71,11 @@ export const getDocumentCountStats = async (
search: DataPublicPluginStart['search'],
params: OverallStatsSearchStrategyParams,
searchOptions: ISearchOptions,
browserSessionSeed: number,
browserSessionSeed: string,
probability?: number | null,
minimumRandomSamplerDocCount?: number
): Promise<DocumentCountStats> => {
const seed = browserSessionSeed ?? Math.abs(seedrandom().int32());
const seed = browserSessionSeed ?? Math.abs(seedrandom().int32()).toString();
const {
index,
@ -83,10 +85,11 @@ export const getDocumentCountStats = async (
runtimeFieldMap,
searchQuery,
intervalMs,
fieldsToFetch,
} = params;
const result = { randomlySampled: false, took: 0, totalCount: 0 };
// Probability = 1 represents no sampling
const result = { randomlySampled: false, took: 0, totalCount: 0, probability: 1 };
const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, searchQuery);
const query = {
@ -109,7 +112,7 @@ export const getDocumentCountStats = async (
// If probability is provided, use that
// Else, make an initial query using very low p
// so that we can calculate the next p value that's appropriate for the data set
const initialDefaultProbability = probability ?? 0.000001;
const initialDefaultProbability = probability ?? DEFAULT_INITIAL_RANDOM_SAMPLER_PROBABILITY;
const getAggsWithRandomSampling = (p: number) => ({
sampler: {
@ -121,16 +124,13 @@ export const getDocumentCountStats = async (
},
});
const hasTimeField = timeFieldName !== undefined && intervalMs !== undefined && intervalMs > 0;
const getSearchParams = (aggregations: unknown, trackTotalHits = false) => ({
index,
body: {
query,
...(!fieldsToFetch &&
timeFieldName !== undefined &&
intervalMs !== undefined &&
intervalMs > 0
? { aggs: aggregations }
: {}),
...(hasTimeField ? { aggs: aggregations } : {}),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
},
track_total_hits: trackTotalHits,
@ -142,7 +142,7 @@ export const getDocumentCountStats = async (
params: getSearchParams(
getAggsWithRandomSampling(initialDefaultProbability),
// Track total hits if time field is not defined
timeFieldName === undefined
!hasTimeField
),
},
searchOptions
@ -189,13 +189,10 @@ export const getDocumentCountStats = async (
const newProbability =
(initialDefaultProbability * numDocs) / (numSampled - 2 * Math.sqrt(numSampled));
// If the number of docs sampled is indicative of query with < 10 million docs
// proceed to make a vanilla aggregation without any sampling
if (
numSampled === 0 ||
newProbability === Infinity ||
numSampled / initialDefaultProbability < 1e7
) {
// If the number of docs is < 3 million
// proceed to make a vanilla aggregation without any sampling (probability = 1)
// Minimum of 4 docs (3e6 * 0.000001 + 1) sampled gives us 90% confidence interval # docs is within
if (newProbability === Infinity || numSampled <= 4) {
const vanillaAggResp = await search
.search(
{
@ -241,7 +238,7 @@ export const processDocumentCountStats = (
body: estypes.SearchResponse | undefined,
params: OverallStatsSearchStrategyParams,
randomlySampled = false
): DocumentCountStats | undefined => {
): Omit<DocumentCountStats, 'probability'> | undefined => {
if (!body) return undefined;
let totalCount = 0;

View file

@ -16,30 +16,33 @@ import {
ISearchOptions,
} from '@kbn/data-plugin/common';
import type { ISearchStart } from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import {
MAX_PERCENT,
PERCENTILE_SPACING,
SAMPLER_TOP_TERMS_SHARD_SIZE,
SAMPLER_TOP_TERMS_THRESHOLD,
} from './constants';
import type { Aggs, FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats';
import { processTopValues } from './utils';
import { isDefined } from '../../../common/util/is_defined';
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
import { MAX_PERCENT, PERCENTILE_SPACING, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
import type {
Aggs,
Bucket,
FieldStatsCommonRequestParams,
} from '../../../../../common/types/field_stats';
import type {
Field,
NumericFieldStats,
Bucket,
FieldStatsError,
} from '../../../../../common/types/field_stats';
import { processDistributionData } from '../../utils/process_distribution_data';
import { extractErrorProperties } from '../../utils/error_utils';
import { isIKibanaSearchResponse } from '../../../../../common/types/field_stats';
import {
isIKibanaSearchResponse,
isNormalSamplingOption,
} from '../../../../../common/types/field_stats';
export const getNumericFieldsStatsRequest = (
params: FieldStatsCommonRequestParams,
fields: Field[]
) => {
const { index, query, runtimeFieldMap, samplerShardSize } = params;
const { index, query, runtimeFieldMap } = params;
const size = 0;
@ -83,23 +86,12 @@ export const getNumericFieldsStatsRequest = (
} as AggregationsTermsAggregation,
};
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
aggs[`${safeFieldName}_top`] = buildSamplerAggregation(
{
top,
},
0.05
);
} else {
aggs[`${safeFieldName}_top`] = top;
}
aggs[`${safeFieldName}_top`] = top;
});
const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};
@ -132,7 +124,7 @@ export const fetchNumericFieldsStats = (
if (!isIKibanaSearchResponse(resp)) return resp;
const aggregations = resp.rawResponse.aggregations;
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggsPath = ['sample'];
const batchStats: NumericFieldStats[] = [];
@ -154,28 +146,23 @@ export const fetchNumericFieldsStats = (
topAggsPath.push('top');
}
const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
const fieldAgg = get(aggregations, [...topAggsPath], {}) as { buckets: Bucket[] };
const { topValuesSampleSize, topValues } = processTopValues(fieldAgg);
const stats: NumericFieldStats = {
fieldName: field.fieldName,
count: docCount,
min: get(fieldStatsResp, 'min', 0),
max: get(fieldStatsResp, 'max', 0),
avg: get(fieldStatsResp, 'avg', 0),
isTopValuesSampled:
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
isNormalSamplingOption(params.samplingOption) ||
(isDefined(params.samplingProbability) && params.samplingProbability < 1),
topValues,
topValuesSampleSize: topValues.reduce(
(acc, curr) => acc + curr.doc_count,
get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
),
topValuesSamplerShardSize:
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
? SAMPLER_TOP_TERMS_SHARD_SIZE
: samplerShardSize,
topValuesSampleSize,
topValuesSamplerShardSize: get(aggregations, ['sample', 'doc_count']),
};
if (stats.count > 0) {
if (docCount > 0) {
const percentiles = get(
aggregations,
[...aggsPath, `${safeFieldName}_percentiles`, 'values'],

View file

@ -15,12 +15,12 @@ import type {
ISearchOptions,
ISearchStart,
} from '@kbn/data-plugin/public';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { SAMPLER_TOP_TERMS_SHARD_SIZE, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
import { processTopValues } from './utils';
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
import { SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
import type {
Aggs,
Bucket,
Field,
FieldStatsCommonRequestParams,
StringFieldStats,
@ -32,7 +32,7 @@ export const getStringFieldStatsRequest = (
params: FieldStatsCommonRequestParams,
fields: Field[]
) => {
const { index, query, runtimeFieldMap, samplerShardSize } = params;
const { index, query, runtimeFieldMap } = params;
const size = 0;
@ -49,25 +49,12 @@ export const getStringFieldStatsRequest = (
} as AggregationsTermsAggregation,
};
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
aggs[`${safeFieldName}_top`] = {
sampler: {
shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
},
aggs: {
top,
},
};
} else {
aggs[`${safeFieldName}_top`] = top;
}
aggs[`${safeFieldName}_top`] = top;
});
const searchBody = {
query,
aggs: buildSamplerAggregation(aggs, samplerShardSize),
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
};
@ -99,7 +86,8 @@ export const fetchStringFieldsStats = (
map((resp) => {
if (!isIKibanaSearchResponse(resp)) return resp;
const aggregations = resp.rawResponse.aggregations;
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const aggsPath = ['sample'];
const batchStats: StringFieldStats[] = [];
fields.forEach((field, i) => {
@ -110,21 +98,18 @@ export const fetchStringFieldsStats = (
topAggsPath.push('top');
}
const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
const fieldAgg = get(aggregations, [...topAggsPath], {});
const { topValuesSampleSize, topValues } = processTopValues(
fieldAgg,
get(aggregations, ['sample', 'doc_count'])
);
const stats = {
fieldName: field.fieldName,
isTopValuesSampled:
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
isTopValuesSampled: true,
topValues,
topValuesSampleSize: topValues.reduce(
(acc, curr) => acc + curr.doc_count,
get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
),
topValuesSamplerShardSize:
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
? SAMPLER_TOP_TERMS_SHARD_SIZE
: samplerShardSize,
topValuesSampleSize,
topValuesSamplerShardSize: get(aggregations, ['sample', 'doc_count']),
};
batchStats.push(stats);

View file

@ -10,21 +10,21 @@ import { get } from 'lodash';
import { Query } from '@kbn/es-query';
import { IKibanaSearchResponse } from '@kbn/data-plugin/common';
import type { AggCardinality } from '@kbn/ml-agg-utils';
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
import {
buildBaseFilterCriteria,
getSafeAggregationName,
} from '../../../../../common/utils/query_utils';
import { getDatafeedAggregations } from '../../../../../common/utils/datafeed_utils';
import { AggregatableField, NonAggregatableField } from '../../types/overall_stats';
import { Aggs } from '../../../../../common/types/field_stats';
import { Aggs, SamplingOption } from '../../../../../common/types/field_stats';
export const checkAggregatableFieldsExistRequest = (
dataViewTitle: string,
query: Query['query'],
aggregatableFields: string[],
samplerShardSize: number,
samplingOption: SamplingOption,
timeFieldName: string | undefined,
earliestMs?: number,
latestMs?: number,
@ -73,7 +73,9 @@ export const checkAggregatableFieldsExistRequest = (
filter: filterCriteria,
},
},
...(isPopulatedObject(aggs) ? { aggs: buildSamplerAggregation(aggs, samplerShardSize) } : {}),
...(isPopulatedObject(aggs)
? { aggs: buildAggregationWithSamplingOption(aggs, samplingOption) }
: {}),
...(isPopulatedObject(combinedRuntimeMappings)
? { runtime_mappings: combinedRuntimeMappings }
: {}),
@ -109,8 +111,6 @@ export function isNonAggregatableFieldOverallStats(
export const processAggregatableFieldsExistResponse = (
responses: AggregatableFieldOverallStats[] | undefined,
aggregatableFields: string[],
samplerShardSize: number,
totalCount: number,
datafeedConfig?: estypes.MlDatafeed
) => {
const stats = {
@ -123,12 +123,17 @@ export const processAggregatableFieldsExistResponse = (
responses.forEach(({ rawResponse: body, aggregatableFields: aggregatableFieldsChunk }) => {
const aggregations = body.aggregations;
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
const sampleCount =
samplerShardSize > 0 ? get(aggregations, ['sample', 'doc_count'], 0) : totalCount;
const aggsPath = ['sample'];
const sampleCount = aggregations.sample.doc_count;
aggregatableFieldsChunk.forEach((field, i) => {
const safeFieldName = getSafeAggregationName(field, i);
// Sampler agg will yield doc_count that's bigger than the actual # of sampled records
// because it uses the stored _doc_count if available
// https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
// therefore we need to correct it by multiplying by the sampled probability
const count = get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0);
const multiplier =
count > sampleCount ? get(aggregations, [...aggsPath, 'probability'], 1) : 1;
if (count > 0) {
const cardinality = get(
aggregations,
@ -140,7 +145,7 @@ export const processAggregatableFieldsExistResponse = (
existsInDocs: true,
stats: {
sampleCount,
count,
count: count * multiplier,
cardinality,
},
});

View file

@ -0,0 +1,42 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
import { Bucket } from '../../../../../common/types/field_stats';
/** Utility to calculate the correct sample size, whether or not _doc_count is set
* and calculate the percentage (in fraction) for each bucket
* https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
* @param aggResult
*/
export const processTopValues = (aggResult: object, sampledCount?: number) => {
const topValuesBuckets: Bucket[] = isPopulatedObject<'buckets', Bucket[]>(aggResult, ['buckets'])
? aggResult.buckets
: [];
const sumOtherDocCount = isPopulatedObject<'sum_other_doc_count', number>(aggResult, [
'sum_other_doc_count',
])
? aggResult.sum_other_doc_count
: 0;
const valuesInTopBuckets =
topValuesBuckets?.reduce((prev, bucket) => bucket.doc_count + prev, 0) || 0;
// We could use `aggregations.sample.sample_count.value` instead, but it does not always give a correct sum
// See Github issue #144625
const realNumberOfDocuments = valuesInTopBuckets + sumOtherDocCount;
const topValues = topValuesBuckets.map((bucket) => ({
...bucket,
doc_count: sampledCount
? Math.floor(bucket.doc_count * (sampledCount / realNumberOfDocuments))
: bucket.doc_count,
percent: bucket.doc_count / realNumberOfDocuments,
}));
return {
topValuesSampleSize: realNumberOfDocuments,
topValues,
};
};

View file

@ -9,7 +9,6 @@ import type { Filter } from '@kbn/es-query';
import type { Query } from '@kbn/data-plugin/common/query';
import type { RandomSamplerOption } from '../constants/random_sampler';
import type { SearchQueryLanguage } from './combined_query';
export interface ListingPageUrlState {
pageSize: number;
pageIndex: number;

View file

@ -656,20 +656,7 @@ export class DataVisualizer {
},
};
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
aggs[`${safeFieldName}_top`] = {
sampler: {
shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
},
aggs: {
top,
},
};
} else {
aggs[`${safeFieldName}_top`] = top;
}
aggs[`${safeFieldName}_top`] = top;
});
const searchBody = {
@ -782,20 +769,7 @@ export class DataVisualizer {
},
};
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
aggs[`${safeFieldName}_top`] = {
sampler: {
shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
},
aggs: {
top,
},
};
} else {
aggs[`${safeFieldName}_top`] = top;
}
aggs[`${safeFieldName}_top`] = top;
});
const searchBody = {

View file

@ -9941,8 +9941,6 @@
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent} % des documents ont des valeurs comprises entre {minValFormatted} et {maxValFormatted}",
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent} % des documents ont une valeur de {valFormatted}",
"xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "Exclure le {fieldName} : \"{value}\"",
"xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "Calculé à partir d'un échantillon de {topValuesSamplerShardSize} documents par partition",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "Calculé à partir d'un échantillon de {topValuesSamplerShardSize} documents par partition",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "Affichage de {minPercent} - {maxPercent} centiles",
"xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "Il peut être rempli, par exemple, à l'aide d'un paramètre {copyToParam} dans le mapping du document ou être réduit à partir du champ {sourceParam} après une indexation par l'utilisation des paramètres {includesParam} et {excludesParam}.",
"xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "Ce champ n'était pas présent dans le champ {sourceParam} des documents interrogés.",
@ -9980,7 +9978,6 @@
"xpack.dataVisualizer.nameCollisionMsg": "\"{name}\" existe déjà, veuillez fournir un nom unique",
"xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "Probabilité utilisée : {samplingProbability} %",
"xpack.dataVisualizer.searchPanel.ofFieldsTotal": "sur un total de {totalCount}",
"xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "Taille de l'échantillon (par partition) : {wrappedValue}",
"xpack.dataVisualizer.searchPanel.totalDocCountLabel": "Total des documents : {prepend}{strongTotalCount}",
"xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
"xpack.dataVisualizer.addCombinedFieldsLabel": "Ajouter un champ combiné",
@ -10013,8 +10010,6 @@
"xpack.dataVisualizer.dataGrid.field.loadingLabel": "Chargement",
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "distribution",
"xpack.dataVisualizer.dataGrid.field.topValuesLabel": "Valeurs les plus élevées",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "faux",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "vrai",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "compte",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "valeurs distinctes",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "Statistiques des documents",
@ -10259,13 +10254,10 @@
"xpack.dataVisualizer.removeCombinedFieldsLabel": "Retirer le champ combiné",
"xpack.dataVisualizer.samplingOptionsButton": "Options déchantillonnage",
"xpack.dataVisualizer.searchPanel.allFieldsLabel": "Tous les champs",
"xpack.dataVisualizer.searchPanel.allOptionLabel": "Tout rechercher",
"xpack.dataVisualizer.searchPanel.invalidSyntax": "Syntaxe non valide",
"xpack.dataVisualizer.searchPanel.numberFieldsLabel": "Champs de numéros",
"xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "La sélection d'une taille d'échantillon plus petite réduira les temps d'exécution de la requête et la charge sur le cluster.",
"xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "Rechercher… (par exemple, status:200 AND extension:\"PHP\")",
"xpack.dataVisualizer.searchPanel.randomSamplerMessage": "Des valeurs approximatives sont indiquées dans le décompte de documents et le graphique, qui utilisent des agrégations par échantillonnage aléatoire.",
"xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "Sélectionner le nombre de documents à échantillonner",
"xpack.dataVisualizer.searchPanel.showEmptyFields": "Afficher les champs vides",
"xpack.dataVisualizer.title": "Charger un fichier",
"xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "Le panneau comporte {count} recherches",

View file

@ -9928,8 +9928,6 @@
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent}% のドキュメントに {minValFormatted} から {maxValFormatted} の間の値があります",
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent}% のドキュメントに {valFormatted} の値があります",
"xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "{fieldName}の除外:\"{value}\"",
"xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "1 つのシャードにつき {topValuesSamplerShardSize} のドキュメントのサンプルで計算されています",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "1 つのシャードにつき {topValuesSamplerShardSize} のドキュメントのサンプルで計算されています",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "{minPercent} - {maxPercent} パーセンタイルを表示中",
"xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "たとえば、ドキュメントマッピングで {copyToParam} パラメーターを使ったり、{includesParam} と {excludesParam} パラメーターを使用してインデックスした後に {sourceParam} フィールドから切り取ったりして入力される場合があります。",
"xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "このフィールドはクエリが実行されたドキュメントの {sourceParam} フィールドにありませんでした。",
@ -9966,7 +9964,6 @@
"xpack.dataVisualizer.nameCollisionMsg": "「{name}」はすでに存在します。一意の名前を入力してください。",
"xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "使用された確率:{samplingProbability}%",
"xpack.dataVisualizer.searchPanel.ofFieldsTotal": "合計 {totalCount}",
"xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "サンプルサイズ(シャード単位):{wrappedValue}",
"xpack.dataVisualizer.searchPanel.totalDocCountLabel": "合計ドキュメント数:{prepend}{strongTotalCount}",
"xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
"xpack.dataVisualizer.addCombinedFieldsLabel": "結合されたフィールドを追加",
@ -9999,8 +9996,6 @@
"xpack.dataVisualizer.dataGrid.field.loadingLabel": "読み込み中",
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "分布",
"xpack.dataVisualizer.dataGrid.field.topValuesLabel": "トップの値",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "false",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "true",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "カウント",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "固有の値",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "ドキュメント統計情報",
@ -10245,13 +10240,10 @@
"xpack.dataVisualizer.removeCombinedFieldsLabel": "結合されたフィールドを削除",
"xpack.dataVisualizer.samplingOptionsButton": "抽出オプション",
"xpack.dataVisualizer.searchPanel.allFieldsLabel": "すべてのフィールド",
"xpack.dataVisualizer.searchPanel.allOptionLabel": "すべて検索",
"xpack.dataVisualizer.searchPanel.invalidSyntax": "無効な構文",
"xpack.dataVisualizer.searchPanel.numberFieldsLabel": "数値フィールド",
"xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "小さいサンプルサイズを選択することで、クエリの実行時間を短縮しクラスターへの負荷を軽減できます。",
"xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "検索…status:200 AND extension:\"PHP\"",
"xpack.dataVisualizer.searchPanel.randomSamplerMessage": "近似値は、ランダムサンプラーアグリゲーションを使用する、合計ドキュメント数およびグラフに表示されます。",
"xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "サンプリングするドキュメント数を選択してください",
"xpack.dataVisualizer.searchPanel.showEmptyFields": "空のフィールドを表示",
"xpack.dataVisualizer.title": "ファイルをアップロード",
"xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "パネルには{count}個のドリルダウンがあります",

View file

@ -9946,8 +9946,6 @@
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent}% 的文档具有介于 {minValFormatted} 和 {maxValFormatted} 之间的值",
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent}% 的文档的值为 {valFormatted}",
"xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "筛除 {fieldName}:“{value}”",
"xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "基于每个分片的 {topValuesSamplerShardSize} 文档样例计算",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "基于每个分片的 {topValuesSamplerShardSize} 文档样例计算",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "正在显示 {minPercent} - {maxPercent} 百分位数",
"xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "例如,可以使用文档映射中的 {copyToParam} 参数进行填充,也可以在索引后通过使用 {includesParam} 和 {excludesParam} 参数从 {sourceParam} 字段中修剪。",
"xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "查询的文档的 {sourceParam} 字段中不存在此字段。",
@ -9985,7 +9983,6 @@
"xpack.dataVisualizer.nameCollisionMsg": "“{name}”已存在,请提供唯一名称",
"xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "使用的概率:{samplingProbability}%",
"xpack.dataVisualizer.searchPanel.ofFieldsTotal": ",共 {totalCount} 个",
"xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "样本大小(每分片):{wrappedValue}",
"xpack.dataVisualizer.searchPanel.totalDocCountLabel": "文档总数:{prepend}{strongTotalCount}",
"xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
"xpack.dataVisualizer.addCombinedFieldsLabel": "添加组合字段",
@ -10018,8 +10015,6 @@
"xpack.dataVisualizer.dataGrid.field.loadingLabel": "正在加载",
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "分布",
"xpack.dataVisualizer.dataGrid.field.topValuesLabel": "排名最前值",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "false",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "true",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "计数",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "不同值",
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "文档统计",
@ -10264,13 +10259,10 @@
"xpack.dataVisualizer.removeCombinedFieldsLabel": "移除组合字段",
"xpack.dataVisualizer.samplingOptionsButton": "采样选项",
"xpack.dataVisualizer.searchPanel.allFieldsLabel": "所有字段",
"xpack.dataVisualizer.searchPanel.allOptionLabel": "搜索全部",
"xpack.dataVisualizer.searchPanel.invalidSyntax": "语法无效",
"xpack.dataVisualizer.searchPanel.numberFieldsLabel": "字段数目",
"xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "选择较小的样例大小将减少查询运行时间和集群上的负载。",
"xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "搜索……例如status:200 AND extension:\"PHP\"",
"xpack.dataVisualizer.searchPanel.randomSamplerMessage": "总文档计数和图表中将显示近似值,它们使用随机采样器聚合。",
"xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "选择要采样的文档数目",
"xpack.dataVisualizer.searchPanel.showEmptyFields": "显示空字段",
"xpack.dataVisualizer.title": "上传文件",
"xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "面板有 {count} 个向下钻取",

View file

@ -14,7 +14,7 @@ import {
farequoteKQLSearchTestData,
farequoteLuceneSearchTestData,
sampleLogTestData,
} from './index_test_data';
} from './index_test_data_random_sampler';
export default function ({ getPageObject, getService }: FtrProviderContext) {
const headerPage = getPageObject('header');
@ -62,7 +62,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
}
await ml.dataVisualizerTable.assertSearchPanelExist();
await ml.dataVisualizerTable.assertSampleSizeInputExists();
await ml.dataVisualizerTable.assertFieldTypeInputExists();
await ml.dataVisualizerTable.assertFieldNameInputExists();
@ -113,18 +112,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
);
}
await ml.testExecution.logTestStep(
`${testData.suiteTitle} sample size control changes non-metric fields`
);
for (const sampleSizeCase of testData.sampleSizeValidations) {
const { size, expected } = sampleSizeCase;
await ml.dataVisualizerTable.setSampleSizeInputValue(
size,
expected.field,
expected.docCountFormatted
);
}
await ml.testExecution.logTestStep('sets and resets field type filter correctly');
await ml.dataVisualizerTable.setFieldTypeFilter(
testData.fieldTypeFilters,

View file

@ -63,7 +63,7 @@ export default function ({ getService }: FtrProviderContext) {
aggregatable: true,
loading: false,
exampleCount: 11,
docCountFormatted: '5000 (100%)',
docCountFormatted: '86,274 (100%)',
viewableInLens: true,
hasActionMenu: true,
},
@ -92,7 +92,7 @@ export default function ({ getService }: FtrProviderContext) {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '86,274 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
@ -153,7 +153,6 @@ export default function ({ getService }: FtrProviderContext) {
}
await ml.dataVisualizerTable.assertSearchPanelExist();
await ml.dataVisualizerTable.assertSampleSizeInputExists();
await ml.dataVisualizerTable.assertFieldTypeInputExists();
await ml.dataVisualizerTable.assertFieldNameInputExists();

View file

@ -15,8 +15,8 @@ export const farequoteDataViewTestData: TestData = {
fieldNameFilters: ['airline', '@timestamp'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.KEYWORD],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
totalDocCountFormatted: '86,274',
@ -27,7 +27,7 @@ export const farequoteDataViewTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
@ -40,7 +40,7 @@ export const farequoteDataViewTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
exampleCount: 2,
viewableInLens: true,
},
@ -61,7 +61,7 @@ export const farequoteDataViewTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -71,7 +71,7 @@ export const farequoteDataViewTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 11,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -91,7 +91,7 @@ export const farequoteDataViewTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
],
@ -112,8 +112,8 @@ export const farequoteKQLSearchTestData: TestData = {
fieldNameFilters: ['@version'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
totalDocCountFormatted: '34,415',
@ -124,7 +124,7 @@ export const farequoteKQLSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
@ -137,7 +137,7 @@ export const farequoteKQLSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
exampleCount: 2,
viewableInLens: true,
},
@ -158,7 +158,7 @@ export const farequoteKQLSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -168,7 +168,7 @@ export const farequoteKQLSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 5,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -188,7 +188,7 @@ export const farequoteKQLSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
],
@ -209,8 +209,8 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
fieldNameFilters: ['@version'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
filters: [{ key: 'airline', value: 'ASA' }],
@ -222,7 +222,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
@ -235,7 +235,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
exampleCount: 2,
viewableInLens: true,
},
@ -256,7 +256,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -267,7 +267,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
loading: false,
exampleCount: 1,
exampleContent: ['ASA'],
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -287,7 +287,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
],
@ -308,8 +308,8 @@ export const farequoteLuceneSearchTestData: TestData = {
fieldNameFilters: ['@version.keyword', 'type'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
totalDocCountFormatted: '34,416',
@ -320,7 +320,7 @@ export const farequoteLuceneSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
@ -333,7 +333,7 @@ export const farequoteLuceneSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
exampleCount: 2,
viewableInLens: true,
},
@ -354,7 +354,7 @@ export const farequoteLuceneSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -364,7 +364,7 @@ export const farequoteLuceneSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 5,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -384,7 +384,7 @@ export const farequoteLuceneSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
],
@ -405,8 +405,8 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
fieldNameFilters: ['@version.keyword', 'type'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
filters: [{ key: 'airline', value: 'ASA' }],
@ -418,7 +418,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
@ -431,7 +431,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
exampleCount: 2,
viewableInLens: true,
},
@ -452,7 +452,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -463,7 +463,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
loading: false,
exampleCount: 1,
exampleContent: ['ASA'],
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
{
@ -483,7 +483,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5000 (100%)',
docCountFormatted: '5,000 (100%)',
viewableInLens: true,
},
],

View file

@ -0,0 +1,535 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ML_JOB_FIELD_TYPES } from '@kbn/ml-plugin/common/constants/field_types';
import { TestData } from './types';
export const farequoteDataViewTestData: TestData = {
suiteTitle: 'farequote index pattern',
isSavedSearch: false,
sourceIndexOrSavedSearch: 'ft_farequote',
fieldNameFilters: ['airline', '@timestamp'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.KEYWORD],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
totalDocCountFormatted: '86,274',
metricFields: [
{
fieldName: 'responsetime',
type: ML_JOB_FIELD_TYPES.NUMBER,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '86,274 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
},
],
nonMetricFields: [
{
fieldName: '@timestamp',
type: ML_JOB_FIELD_TYPES.DATE,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '86,274 (100%)',
exampleCount: 2,
viewableInLens: true,
},
{
fieldName: '@version',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: '@version.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '86,274 (100%)',
viewableInLens: true,
},
{
fieldName: 'airline',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 11,
docCountFormatted: '86,274 (100%)',
viewableInLens: true,
},
{
fieldName: 'type',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: 'type.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '86,274 (100%)',
viewableInLens: true,
},
],
emptyFields: ['sourcetype'],
visibleMetricFieldsCount: 1,
totalMetricFieldsCount: 1,
populatedFieldsCount: 7,
totalFieldsCount: 8,
fieldNameFiltersResultCount: 2,
fieldTypeFiltersResultCount: 3,
},
};
export const farequoteKQLSearchTestData: TestData = {
suiteTitle: 'KQL saved search',
isSavedSearch: true,
sourceIndexOrSavedSearch: 'ft_farequote_kuery',
fieldNameFilters: ['@version'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
totalDocCountFormatted: '34,415',
metricFields: [
{
fieldName: 'responsetime',
type: ML_JOB_FIELD_TYPES.NUMBER,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '34,415 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
},
],
nonMetricFields: [
{
fieldName: '@timestamp',
type: ML_JOB_FIELD_TYPES.DATE,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '34,415 (100%)',
exampleCount: 2,
viewableInLens: true,
},
{
fieldName: '@version',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: '@version.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '34,415 (100%)',
viewableInLens: true,
},
{
fieldName: 'airline',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 5,
docCountFormatted: '34,415 (100%)',
viewableInLens: true,
},
{
fieldName: 'type',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: 'type.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '34,415 (100%)',
viewableInLens: true,
},
],
emptyFields: ['sourcetype'],
visibleMetricFieldsCount: 1,
totalMetricFieldsCount: 1,
populatedFieldsCount: 7,
totalFieldsCount: 8,
fieldNameFiltersResultCount: 1,
fieldTypeFiltersResultCount: 3,
},
};
export const farequoteKQLFiltersSearchTestData: TestData = {
suiteTitle: 'KQL saved search and filters',
isSavedSearch: true,
sourceIndexOrSavedSearch: 'ft_farequote_filter_and_kuery',
fieldNameFilters: ['@version'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
filters: [{ key: 'airline', value: 'ASA' }],
totalDocCountFormatted: '5,674',
metricFields: [
{
fieldName: 'responsetime',
type: ML_JOB_FIELD_TYPES.NUMBER,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5,674 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
},
],
nonMetricFields: [
{
fieldName: '@timestamp',
type: ML_JOB_FIELD_TYPES.DATE,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5,674 (100%)',
exampleCount: 2,
viewableInLens: true,
},
{
fieldName: '@version',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: '@version.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5,674 (100%)',
viewableInLens: true,
},
{
fieldName: 'airline',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
exampleContent: ['ASA'],
docCountFormatted: '5,674 (100%)',
viewableInLens: true,
},
{
fieldName: 'type',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: 'type.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5,674 (100%)',
viewableInLens: true,
},
],
emptyFields: ['sourcetype'],
visibleMetricFieldsCount: 1,
totalMetricFieldsCount: 1,
populatedFieldsCount: 7,
totalFieldsCount: 8,
fieldNameFiltersResultCount: 1,
fieldTypeFiltersResultCount: 3,
},
};
export const farequoteLuceneSearchTestData: TestData = {
suiteTitle: 'lucene saved search',
isSavedSearch: true,
sourceIndexOrSavedSearch: 'ft_farequote_lucene',
fieldNameFilters: ['@version.keyword', 'type'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
totalDocCountFormatted: '34,416',
metricFields: [
{
fieldName: 'responsetime',
type: ML_JOB_FIELD_TYPES.NUMBER,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '34,416 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
},
],
nonMetricFields: [
{
fieldName: '@timestamp',
type: ML_JOB_FIELD_TYPES.DATE,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '34,416 (100%)',
exampleCount: 2,
viewableInLens: true,
},
{
fieldName: '@version',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: '@version.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '34,416 (100%)',
viewableInLens: true,
},
{
fieldName: 'airline',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 5,
docCountFormatted: '34,416 (100%)',
viewableInLens: true,
},
{
fieldName: 'type',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: 'type.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '34,416 (100%)',
viewableInLens: true,
},
],
emptyFields: ['sourcetype'],
visibleMetricFieldsCount: 1,
totalMetricFieldsCount: 1,
populatedFieldsCount: 7,
totalFieldsCount: 8,
fieldNameFiltersResultCount: 2,
fieldTypeFiltersResultCount: 1,
},
};
export const farequoteLuceneFiltersSearchTestData: TestData = {
suiteTitle: 'lucene saved search and filter',
isSavedSearch: true,
sourceIndexOrSavedSearch: 'ft_farequote_filter_and_lucene',
fieldNameFilters: ['@version.keyword', 'type'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
sampleSizeValidations: [
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
],
expected: {
filters: [{ key: 'airline', value: 'ASA' }],
totalDocCountFormatted: '5,673',
metricFields: [
{
fieldName: 'responsetime',
type: ML_JOB_FIELD_TYPES.NUMBER,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5,673 (100%)',
statsMaxDecimalPlaces: 3,
topValuesCount: 11,
viewableInLens: true,
},
],
nonMetricFields: [
{
fieldName: '@timestamp',
type: ML_JOB_FIELD_TYPES.DATE,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '5,673 (100%)',
exampleCount: 2,
viewableInLens: true,
},
{
fieldName: '@version',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: '@version.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5,673 (100%)',
viewableInLens: true,
},
{
fieldName: 'airline',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
exampleContent: ['ASA'],
docCountFormatted: '5,673 (100%)',
viewableInLens: true,
},
{
fieldName: 'type',
type: ML_JOB_FIELD_TYPES.TEXT,
existsInDocs: true,
aggregatable: false,
loading: false,
exampleCount: 1,
docCountFormatted: '',
viewableInLens: false,
},
{
fieldName: 'type.keyword',
type: ML_JOB_FIELD_TYPES.KEYWORD,
existsInDocs: true,
aggregatable: true,
loading: false,
exampleCount: 1,
docCountFormatted: '5,673 (100%)',
viewableInLens: true,
},
],
emptyFields: ['sourcetype'],
visibleMetricFieldsCount: 1,
totalMetricFieldsCount: 1,
populatedFieldsCount: 7,
totalFieldsCount: 8,
fieldNameFiltersResultCount: 2,
fieldTypeFiltersResultCount: 1,
},
};
export const sampleLogTestData: TestData = {
suiteTitle: 'geo point field',
isSavedSearch: false,
sourceIndexOrSavedSearch: 'ft_module_sample_logs',
fieldNameFilters: ['geo.coordinates'],
fieldTypeFilters: [ML_JOB_FIELD_TYPES.GEO_POINT],
rowsPerPage: 50,
expected: {
totalDocCountFormatted: '408',
metricFields: [],
// only testing the geo_point fields
nonMetricFields: [
{
fieldName: 'geo.coordinates',
type: ML_JOB_FIELD_TYPES.GEO_POINT,
existsInDocs: true,
aggregatable: true,
loading: false,
docCountFormatted: '408 (100%)',
exampleCount: 10,
viewableInLens: false,
},
],
emptyFields: [],
visibleMetricFieldsCount: 4,
totalMetricFieldsCount: 5,
populatedFieldsCount: 35,
totalFieldsCount: 36,
fieldNameFiltersResultCount: 1,
fieldTypeFiltersResultCount: 1,
},
sampleSizeValidations: [
{ size: 1000, expected: { field: 'geo.coordinates', docCountFormatted: '408 (100%)' } },
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '408 (100%)' } },
],
};

View file

@ -290,25 +290,6 @@ export function MachineLearningDataVisualizerTableProvider(
await testSubjects.existOrFail('dataVisualizerFieldTypeSelect');
}
public async assertSampleSizeInputExists() {
await testSubjects.existOrFail('dataVisualizerShardSizeSelect');
}
public async setSampleSizeInputValue(
sampleSize: number | 'all',
fieldName: string,
docCountFormatted: string
) {
await this.assertSampleSizeInputExists();
await testSubjects.clickWhenNotDisabledWithoutRetry('dataVisualizerShardSizeSelect');
await testSubjects.existOrFail(`dataVisualizerShardSizeOption ${sampleSize}`);
await testSubjects.click(`dataVisualizerShardSizeOption ${sampleSize}`);
await retry.tryForTime(5000, async () => {
await this.assertFieldDocCount(fieldName, docCountFormatted);
});
}
public async setFieldTypeFilter(fieldTypes: string[], expectedRowCount = 1) {
await this.assertFieldTypeInputExists();
await mlCommonUI.setMultiSelectFilter('dataVisualizerFieldTypeSelect', fieldTypes);

View file

@ -103,11 +103,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
await ml.testExecution.logTestStep('set data visualizer options');
await ml.dataVisualizerIndexBased.assertTimeRangeSelectorSectionExists();
await ml.dataVisualizerIndexBased.clickUseFullDataButton('14,074');
await ml.dataVisualizerTable.setSampleSizeInputValue(
'all',
'geo.coordinates',
'14074 (100%)'
);
await ml.dataVisualizerTable.setFieldTypeFilter([ML_JOB_FIELD_TYPES.GEO_POINT]);
await ml.testExecution.logTestStep('set maps options and take screenshot');

View file

@ -66,11 +66,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
await ml.testExecution.logTestStep('set data visualizer options');
await ml.dataVisualizerIndexBased.assertTimeRangeSelectorSectionExists();
await ml.dataVisualizerIndexBased.clickUseFullDataButton('14,074');
await ml.dataVisualizerTable.setSampleSizeInputValue(
'all',
'geo.coordinates',
'14074 (100%)'
);
await ml.dataVisualizerTable.setFieldNameFilter(['geo.dest']);
await ml.testExecution.logTestStep('set maps options and take screenshot');