mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 17:28:26 -04:00
[ML] Switch from normal sampling to random sampler for Index data visualizer table (#144646)
Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
8a6e91b23f
commit
22d0fa742d
39 changed files with 1330 additions and 656 deletions
|
@ -24,6 +24,24 @@ import { useDiscoverServices } from '../../../../hooks/use_discover_services';
|
|||
import { FIELD_STATISTICS_LOADED } from './constants';
|
||||
import type { GetStateReturn } from '../../services/discover_state';
|
||||
import { AvailableFields$, DataRefetch$, DataTotalHits$ } from '../../hooks/use_saved_search';
|
||||
export interface RandomSamplingOption {
|
||||
mode: 'random_sampling';
|
||||
seed: string;
|
||||
probability: number;
|
||||
}
|
||||
|
||||
export interface NormalSamplingOption {
|
||||
mode: 'normal_sampling';
|
||||
seed: string;
|
||||
shardSize: number;
|
||||
}
|
||||
|
||||
export interface NoSamplingOption {
|
||||
mode: 'no_sampling';
|
||||
seed: string;
|
||||
}
|
||||
|
||||
export type SamplingOption = RandomSamplingOption | NormalSamplingOption | NoSamplingOption;
|
||||
|
||||
export interface DataVisualizerGridEmbeddableInput extends EmbeddableInput {
|
||||
dataView: DataView;
|
||||
|
@ -39,6 +57,7 @@ export interface DataVisualizerGridEmbeddableInput extends EmbeddableInput {
|
|||
sessionId?: string;
|
||||
fieldsToFetch?: string[];
|
||||
totalDocuments?: number;
|
||||
samplingOption?: SamplingOption;
|
||||
}
|
||||
export interface DataVisualizerGridEmbeddableOutput extends EmbeddableOutput {
|
||||
showDistributions?: boolean;
|
||||
|
@ -163,6 +182,11 @@ export const FieldStatisticsTable = (props: FieldStatisticsTableProps) => {
|
|||
totalDocuments: savedSearchDataTotalHits$
|
||||
? savedSearchDataTotalHits$.getValue()?.result
|
||||
: undefined,
|
||||
samplingOption: {
|
||||
mode: 'normal_sampling',
|
||||
shardSize: 5000,
|
||||
seed: searchSessionId,
|
||||
} as NormalSamplingOption,
|
||||
});
|
||||
embeddable.reload();
|
||||
}
|
||||
|
|
|
@ -22,10 +22,10 @@
|
|||
* Otherwise you'd just satisfy TS requirements but might still
|
||||
* run into runtime issues.
|
||||
*/
|
||||
export const isPopulatedObject = <U extends string = string>(
|
||||
export const isPopulatedObject = <U extends string = string, T extends unknown = unknown>(
|
||||
arg: unknown,
|
||||
requiredAttributes: U[] = []
|
||||
): arg is Record<U, unknown> => {
|
||||
): arg is Record<U, T> => {
|
||||
return (
|
||||
typeof arg === 'object' &&
|
||||
arg !== null &&
|
||||
|
|
|
@ -64,9 +64,7 @@ export interface FieldVisStats {
|
|||
max?: number;
|
||||
median?: number;
|
||||
min?: number;
|
||||
topValues?: Array<{ key: number | string; doc_count: number }>;
|
||||
topValuesSampleSize?: number;
|
||||
topValuesSamplerShardSize?: number;
|
||||
topValues?: Array<{ key: number | string; doc_count: number; percent: number }>;
|
||||
examples?: Array<string | GeoPointExample | object>;
|
||||
timeRangeEarliest?: number;
|
||||
timeRangeLatest?: number;
|
||||
|
|
|
@ -11,6 +11,25 @@ import { IKibanaSearchResponse } from '@kbn/data-plugin/common';
|
|||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { TimeBucketsInterval } from '../services/time_buckets';
|
||||
|
||||
export interface RandomSamplingOption {
|
||||
mode: 'random_sampling';
|
||||
seed: string;
|
||||
probability: number;
|
||||
}
|
||||
|
||||
export interface NormalSamplingOption {
|
||||
mode: 'normal_sampling';
|
||||
seed: string;
|
||||
shardSize: number;
|
||||
}
|
||||
|
||||
export interface NoSamplingOption {
|
||||
mode: 'no_sampling';
|
||||
seed: string;
|
||||
}
|
||||
|
||||
export type SamplingOption = RandomSamplingOption | NormalSamplingOption | NoSamplingOption;
|
||||
|
||||
export interface FieldData {
|
||||
fieldName: string;
|
||||
existsInDocs: boolean;
|
||||
|
@ -54,7 +73,7 @@ export const isIKibanaSearchResponse = (arg: unknown): arg is IKibanaSearchRespo
|
|||
|
||||
export interface NumericFieldStats {
|
||||
fieldName: string;
|
||||
count: number;
|
||||
count?: number;
|
||||
min: number;
|
||||
max: number;
|
||||
avg: number;
|
||||
|
@ -86,7 +105,8 @@ export interface BooleanFieldStats {
|
|||
count: number;
|
||||
trueCount: number;
|
||||
falseCount: number;
|
||||
[key: string]: number | string;
|
||||
topValues: Bucket[];
|
||||
topValuesSampleSize: number;
|
||||
}
|
||||
|
||||
export interface DocumentCountStats {
|
||||
|
@ -186,6 +206,9 @@ export interface FieldStatsCommonRequestParams {
|
|||
intervalMs?: number;
|
||||
query: estypes.QueryDslQueryContainer;
|
||||
maxExamples?: number;
|
||||
samplingProbability: number | null;
|
||||
browserSessionSeed: number;
|
||||
samplingOption: SamplingOption;
|
||||
}
|
||||
|
||||
export interface OverallStatsSearchStrategyParams {
|
||||
|
@ -202,6 +225,8 @@ export interface OverallStatsSearchStrategyParams {
|
|||
aggregatableFields: string[];
|
||||
nonAggregatableFields: string[];
|
||||
fieldsToFetch?: string[];
|
||||
browserSessionSeed: number;
|
||||
samplingOption: SamplingOption;
|
||||
}
|
||||
|
||||
export interface FieldStatsSearchStrategyReturnBase {
|
||||
|
@ -238,3 +263,20 @@ export interface Field {
|
|||
export interface Aggs {
|
||||
[key: string]: estypes.AggregationsAggregationContainer;
|
||||
}
|
||||
|
||||
export const EMBEDDABLE_SAMPLER_OPTION = {
|
||||
RANDOM: 'random_sampling',
|
||||
NORMAL: 'normal_sampling',
|
||||
};
|
||||
export type FieldStatsEmbeddableSamplerOption =
|
||||
typeof EMBEDDABLE_SAMPLER_OPTION[keyof typeof EMBEDDABLE_SAMPLER_OPTION];
|
||||
|
||||
export function isRandomSamplingOption(arg: SamplingOption): arg is RandomSamplingOption {
|
||||
return arg.mode === 'random_sampling';
|
||||
}
|
||||
export function isNormalSamplingOption(arg: SamplingOption): arg is NormalSamplingOption {
|
||||
return arg.mode === 'normal_sampling';
|
||||
}
|
||||
export function isNoSamplingOption(arg: SamplingOption): arg is NoSamplingOption {
|
||||
return arg.mode === 'no_sampling' || (arg.mode === 'random_sampling' && arg.probability === 1);
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ import {
|
|||
EuiFormRow,
|
||||
} from '@elastic/eui';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { sortedIndex } from 'lodash';
|
||||
import { debounce, sortedIndex } from 'lodash';
|
||||
import { FormattedMessage } from '@kbn/i18n-react';
|
||||
import { isDefined } from '../../util/is_defined';
|
||||
import type { DocumentCountChartPoint } from './document_count_chart';
|
||||
|
@ -64,6 +64,24 @@ export const DocumentCountContent: FC<Props> = ({
|
|||
setShowSamplingOptionsPopover(false);
|
||||
}, [setShowSamplingOptionsPopover]);
|
||||
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
const updateSamplingProbability = useCallback(
|
||||
debounce((newProbability: number) => {
|
||||
if (setSamplingProbability) {
|
||||
const idx = sortedIndex(RANDOM_SAMPLER_PROBABILITIES, newProbability);
|
||||
const closestPrev = RANDOM_SAMPLER_PROBABILITIES[idx - 1];
|
||||
const closestNext = RANDOM_SAMPLER_PROBABILITIES[idx];
|
||||
const closestProbability =
|
||||
Math.abs(closestPrev - newProbability) < Math.abs(closestNext - newProbability)
|
||||
? closestPrev
|
||||
: closestNext;
|
||||
|
||||
setSamplingProbability(closestProbability / 100);
|
||||
}
|
||||
}, 100),
|
||||
[setSamplingProbability]
|
||||
);
|
||||
|
||||
const calloutInfoMessage = useMemo(() => {
|
||||
switch (randomSamplerPreference) {
|
||||
case RANDOM_SAMPLER_OPTION.OFF:
|
||||
|
@ -125,7 +143,7 @@ export const DocumentCountContent: FC<Props> = ({
|
|||
<>
|
||||
<EuiFlexGroup alignItems="center" gutterSize="xs">
|
||||
<TotalCountHeader totalCount={totalCount} approximate={approximate} loading={loading} />
|
||||
<EuiFlexItem grow={false}>
|
||||
<EuiFlexItem grow={false} style={{ marginLeft: 'auto' }}>
|
||||
<EuiPopover
|
||||
data-test-subj="dvRandomSamplerOptionsPopover"
|
||||
id="dataVisualizerSamplingOptions"
|
||||
|
@ -199,21 +217,7 @@ export const DocumentCountContent: FC<Props> = ({
|
|||
value: d,
|
||||
label: d === 0.001 || d >= 5 ? `${d}%` : '',
|
||||
}))}
|
||||
onChange={(e) => {
|
||||
const newProbability = Number(e.currentTarget.value);
|
||||
const idx = sortedIndex(RANDOM_SAMPLER_PROBABILITIES, newProbability);
|
||||
const closestPrev = RANDOM_SAMPLER_PROBABILITIES[idx - 1];
|
||||
const closestNext = RANDOM_SAMPLER_PROBABILITIES[idx];
|
||||
const closestProbability =
|
||||
Math.abs(closestPrev - newProbability) <
|
||||
Math.abs(closestNext - newProbability)
|
||||
? closestPrev
|
||||
: closestNext;
|
||||
|
||||
if (setSamplingProbability) {
|
||||
setSamplingProbability(closestProbability / 100);
|
||||
}
|
||||
}}
|
||||
onChange={(e) => updateSamplingProbability(Number(e.currentTarget.value))}
|
||||
step={RANDOM_SAMPLER_STEP}
|
||||
data-test-subj="dvRandomSamplerProbabilityRange"
|
||||
/>
|
||||
|
|
|
@ -112,6 +112,7 @@ export const FieldsStatsGrid: FC<Props> = ({ results }) => {
|
|||
pageState={dataVisualizerListState}
|
||||
updatePageState={setDataVisualizerListState}
|
||||
getItemIdToExpandedRowMap={getItemIdToExpandedRowMap}
|
||||
overallStatsRunning={false}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
|
|
|
@ -10,7 +10,6 @@ import { EuiSpacer } from '@elastic/eui';
|
|||
import { Axis, BarSeries, Chart, Settings, ScaleType } from '@elastic/charts';
|
||||
|
||||
import { FormattedMessage } from '@kbn/i18n-react';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { TopValues } from '../../../top_values';
|
||||
import type { FieldDataRowProps } from '../../types/field_data_row';
|
||||
import { ExpandedRowFieldHeader } from '../expanded_row_field_header';
|
||||
|
@ -45,32 +44,13 @@ export const BooleanContent: FC<FieldDataRowProps> = ({ config, onAddFilter }) =
|
|||
const theme = useDataVizChartTheme();
|
||||
if (!formattedPercentages) return null;
|
||||
|
||||
const { trueCount, falseCount, count } = formattedPercentages;
|
||||
const stats = {
|
||||
...config.stats,
|
||||
topValues: [
|
||||
{
|
||||
key: i18n.translate(
|
||||
'xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel',
|
||||
{ defaultMessage: 'true' }
|
||||
),
|
||||
doc_count: trueCount ?? 0,
|
||||
},
|
||||
{
|
||||
key: i18n.translate(
|
||||
'xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel',
|
||||
{ defaultMessage: 'false' }
|
||||
),
|
||||
doc_count: falseCount ?? 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
const { count } = formattedPercentages;
|
||||
return (
|
||||
<ExpandedRowContent dataTestSubj={'dataVisualizerBooleanContent'}>
|
||||
<DocumentStatsTable config={config} />
|
||||
|
||||
<TopValues
|
||||
stats={stats}
|
||||
stats={config.stats}
|
||||
fieldFormat={fieldFormat}
|
||||
barColor="success"
|
||||
onAddFilter={onAddFilter}
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
|
||||
import React, { FC, useMemo } from 'react';
|
||||
import { EuiSpacer, EuiText, htmlIdGenerator } from '@elastic/eui';
|
||||
import { EuiText, htmlIdGenerator } from '@elastic/eui';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { FormattedMessage } from '@kbn/i18n-react';
|
||||
import {
|
||||
|
@ -18,6 +18,8 @@ import {
|
|||
VectorLayerDescriptor,
|
||||
} from '@kbn/maps-plugin/common';
|
||||
import { EMSTermJoinConfig } from '@kbn/maps-plugin/public';
|
||||
import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types';
|
||||
import { useDataVisualizerKibana } from '../../../../../kibana_context';
|
||||
import { EmbeddedMapComponent } from '../../../embedded_map';
|
||||
import { FieldVisStats } from '../../../../../../../common/types';
|
||||
import { ExpandedRowPanel } from './expanded_row_panel';
|
||||
|
@ -97,13 +99,59 @@ interface Props {
|
|||
}
|
||||
|
||||
export const ChoroplethMap: FC<Props> = ({ stats, suggestion }) => {
|
||||
const { fieldName, isTopValuesSampled, topValues, topValuesSamplerShardSize } = stats!;
|
||||
const {
|
||||
services: {
|
||||
data: { fieldFormats },
|
||||
},
|
||||
} = useDataVisualizerKibana();
|
||||
|
||||
const { fieldName, isTopValuesSampled, topValues, sampleCount } = stats!;
|
||||
|
||||
const layerList: VectorLayerDescriptor[] = useMemo(
|
||||
() => [getChoroplethTopValuesLayer(fieldName || '', topValues || [], suggestion)],
|
||||
[suggestion, fieldName, topValues]
|
||||
);
|
||||
|
||||
if (!stats) return null;
|
||||
|
||||
const totalDocuments = stats.totalDocuments ?? sampleCount ?? 0;
|
||||
|
||||
const countsElement = totalDocuments ? (
|
||||
<EuiText color="subdued" size="xs">
|
||||
{isTopValuesSampled ? (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleRecordsLabel"
|
||||
defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
|
||||
values={{
|
||||
sampledDocuments: sampleCount,
|
||||
sampledDocumentsFormatted: (
|
||||
<strong>
|
||||
{fieldFormats
|
||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
|
||||
.convert(sampleCount)}
|
||||
</strong>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromTotalRecordsLabel"
|
||||
defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
|
||||
values={{
|
||||
totalDocuments,
|
||||
totalDocumentsFormatted: (
|
||||
<strong>
|
||||
{fieldFormats
|
||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
|
||||
.convert(totalDocuments ?? 0)}
|
||||
</strong>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</EuiText>
|
||||
) : null;
|
||||
|
||||
return (
|
||||
<ExpandedRowPanel
|
||||
dataTestSubj={'fileDataVisualizerChoroplethMapTopValues'}
|
||||
|
@ -114,20 +162,7 @@ export const ChoroplethMap: FC<Props> = ({ stats, suggestion }) => {
|
|||
<EmbeddedMapComponent layerList={layerList} />
|
||||
</div>
|
||||
|
||||
{isTopValuesSampled === true && (
|
||||
<div>
|
||||
<EuiSpacer size={'s'} />
|
||||
<EuiText size="xs" textAlign={'center'}>
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription"
|
||||
defaultMessage="Calculated from sample of {topValuesSamplerShardSize} documents per shard"
|
||||
values={{
|
||||
topValuesSamplerShardSize,
|
||||
}}
|
||||
/>
|
||||
</EuiText>
|
||||
</div>
|
||||
)}
|
||||
{countsElement}
|
||||
</ExpandedRowPanel>
|
||||
);
|
||||
};
|
||||
|
|
|
@ -10,7 +10,7 @@ import React, { FC, ReactNode } from 'react';
|
|||
import { i18n } from '@kbn/i18n';
|
||||
import { EuiBasicTable, HorizontalAlignment, LEFT_ALIGNMENT, RIGHT_ALIGNMENT } from '@elastic/eui';
|
||||
import { ExpandedRowFieldHeader } from '../expanded_row_field_header';
|
||||
import { FieldDataRowProps } from '../../types';
|
||||
import { FieldDataRowProps, isIndexBasedFieldVisConfig } from '../../types';
|
||||
import { roundToDecimalPlace } from '../../../utils';
|
||||
import { ExpandedRowPanel } from './expanded_row_panel';
|
||||
|
||||
|
@ -46,6 +46,13 @@ export const DocumentStatsTable: FC<FieldDataRowProps> = ({ config }) => {
|
|||
)
|
||||
return null;
|
||||
const { cardinality, count, sampleCount } = config.stats;
|
||||
|
||||
const valueCount =
|
||||
count ?? (isIndexBasedFieldVisConfig(config) && config.existsInDocs === true ? undefined : 0);
|
||||
const docsPercent =
|
||||
valueCount !== undefined && sampleCount !== undefined
|
||||
? roundToDecimalPlace((valueCount / sampleCount) * 100)
|
||||
: undefined;
|
||||
const metaTableItems = [
|
||||
{
|
||||
function: 'count',
|
||||
|
@ -57,16 +64,20 @@ export const DocumentStatsTable: FC<FieldDataRowProps> = ({ config }) => {
|
|||
),
|
||||
value: count,
|
||||
},
|
||||
{
|
||||
function: 'percentage',
|
||||
display: (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.percentageLabel"
|
||||
defaultMessage="percentage"
|
||||
/>
|
||||
),
|
||||
value: `${roundToDecimalPlace((count / sampleCount) * 100)}%`,
|
||||
},
|
||||
...(docsPercent !== undefined
|
||||
? [
|
||||
{
|
||||
function: 'percentage',
|
||||
display: (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.percentageLabel"
|
||||
defaultMessage="percentage"
|
||||
/>
|
||||
),
|
||||
value: `${docsPercent}%`,
|
||||
},
|
||||
]
|
||||
: []),
|
||||
{
|
||||
function: 'distinctValues',
|
||||
display: (
|
||||
|
|
|
@ -8,32 +8,46 @@
|
|||
import { EuiIcon, EuiText } from '@elastic/eui';
|
||||
|
||||
import React from 'react';
|
||||
import { ES_FIELD_TYPES, KBN_FIELD_TYPES } from '@kbn/field-types';
|
||||
import { useDataVisualizerKibana } from '../../../../../kibana_context';
|
||||
import { isIndexBasedFieldVisConfig } from '../../../../../../../common/types/field_vis_config';
|
||||
import type { FieldDataRowProps } from '../../types/field_data_row';
|
||||
import { roundToDecimalPlace } from '../../../utils';
|
||||
import { isIndexBasedFieldVisConfig } from '../../types';
|
||||
|
||||
interface Props extends FieldDataRowProps {
|
||||
showIcon?: boolean;
|
||||
totalCount?: number;
|
||||
}
|
||||
export const DocumentStat = ({ config, showIcon }: Props) => {
|
||||
export const DocumentStat = ({ config, showIcon, totalCount }: Props) => {
|
||||
const { stats } = config;
|
||||
const {
|
||||
services: {
|
||||
data: { fieldFormats },
|
||||
},
|
||||
} = useDataVisualizerKibana();
|
||||
|
||||
if (stats === undefined) return null;
|
||||
|
||||
const { count, sampleCount } = stats;
|
||||
const total = sampleCount ?? totalCount;
|
||||
|
||||
// If field exists is docs but we don't have count stats then don't show
|
||||
// Otherwise if field doesn't appear in docs at all, show 0%
|
||||
const docsCount =
|
||||
const valueCount =
|
||||
count ?? (isIndexBasedFieldVisConfig(config) && config.existsInDocs === true ? undefined : 0);
|
||||
const docsPercent =
|
||||
docsCount !== undefined && sampleCount !== undefined
|
||||
? roundToDecimalPlace((docsCount / sampleCount) * 100)
|
||||
: 0;
|
||||
valueCount !== undefined && total !== undefined
|
||||
? `(${roundToDecimalPlace((valueCount / total) * 100)}%)`
|
||||
: null;
|
||||
|
||||
return docsCount !== undefined ? (
|
||||
return valueCount !== undefined ? (
|
||||
<>
|
||||
{showIcon ? <EuiIcon type="document" size={'m'} className={'columnHeader__icon'} /> : null}
|
||||
<EuiText size={'xs'}>
|
||||
{docsCount} ({docsPercent}%)
|
||||
{fieldFormats
|
||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
|
||||
.convert(valueCount)}{' '}
|
||||
{docsPercent}
|
||||
</EuiText>
|
||||
</>
|
||||
) : null;
|
||||
|
|
|
@ -60,6 +60,8 @@ interface DataVisualizerTableProps<T> {
|
|||
/** Callback to receive any updates when table or page state is changed **/
|
||||
onChange?: (update: Partial<DataVisualizerTableState>) => void;
|
||||
loading?: boolean;
|
||||
totalCount?: number;
|
||||
overallStatsRunning: boolean;
|
||||
}
|
||||
|
||||
export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
|
||||
|
@ -71,6 +73,8 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
|
|||
showPreviewByDefault,
|
||||
onChange,
|
||||
loading,
|
||||
totalCount,
|
||||
overallStatsRunning,
|
||||
}: DataVisualizerTableProps<T>) => {
|
||||
const { euiTheme } = useEuiTheme();
|
||||
|
||||
|
@ -217,12 +221,40 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
|
|||
},
|
||||
{
|
||||
field: 'docCount',
|
||||
name: i18n.translate('xpack.dataVisualizer.dataGrid.documentsCountColumnName', {
|
||||
defaultMessage: 'Documents (%)',
|
||||
}),
|
||||
render: (value: number | undefined, item: DataVisualizerTableItem) => (
|
||||
<DocumentStat config={item} showIcon={dimensions.showIcon} />
|
||||
name: (
|
||||
<div className={'columnHeader__title'}>
|
||||
{i18n.translate('xpack.dataVisualizer.dataGrid.documentsCountColumnName', {
|
||||
defaultMessage: 'Documents (%)',
|
||||
})}
|
||||
{
|
||||
<EuiToolTip
|
||||
content={i18n.translate(
|
||||
'xpack.dataVisualizer.dataGrid.documentsCountColumnTooltip',
|
||||
{
|
||||
defaultMessage:
|
||||
'Document count found is based on a smaller set of sampled records.',
|
||||
}
|
||||
)}
|
||||
>
|
||||
<EuiIcon type="questionInCircle" />
|
||||
</EuiToolTip>
|
||||
}
|
||||
</div>
|
||||
),
|
||||
|
||||
render: (value: number | undefined, item: DataVisualizerTableItem) => {
|
||||
if (overallStatsRunning) {
|
||||
return (
|
||||
<EuiText textAlign="center">
|
||||
<EuiLoadingSpinner size="s" />
|
||||
</EuiText>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<DocumentStat config={item} showIcon={dimensions.showIcon} totalCount={totalCount} />
|
||||
);
|
||||
},
|
||||
sortable: (item: DataVisualizerTableItem) => item?.stats?.count,
|
||||
align: LEFT_ALIGNMENT as HorizontalAlignment,
|
||||
'data-test-subj': 'dataVisualizerTableColumnDocumentsCount',
|
||||
|
@ -233,9 +265,19 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
|
|||
name: i18n.translate('xpack.dataVisualizer.dataGrid.distinctValuesColumnName', {
|
||||
defaultMessage: 'Distinct values',
|
||||
}),
|
||||
render: (_: undefined, item: DataVisualizerTableItem) => (
|
||||
<DistinctValues cardinality={item?.stats?.cardinality} showIcon={dimensions.showIcon} />
|
||||
),
|
||||
render: (_: undefined, item: DataVisualizerTableItem) => {
|
||||
if (overallStatsRunning) {
|
||||
return (
|
||||
<EuiText textAlign="center">
|
||||
<EuiLoadingSpinner size="s" />
|
||||
</EuiText>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<DistinctValues cardinality={item?.stats?.cardinality} showIcon={dimensions.showIcon} />
|
||||
);
|
||||
},
|
||||
sortable: (item: DataVisualizerTableItem) => item?.stats?.cardinality,
|
||||
align: LEFT_ALIGNMENT as HorizontalAlignment,
|
||||
'data-test-subj': 'dataVisualizerTableColumnDistinctValues',
|
||||
|
@ -333,6 +375,7 @@ export const DataVisualizerTable = <T extends DataVisualizerTableItem>({
|
|||
extendedColumns,
|
||||
dimensions.breakPoint,
|
||||
toggleExpandAll,
|
||||
overallStatsRunning,
|
||||
]);
|
||||
|
||||
const itemIdToExpandedRowMap = useMemo(() => {
|
||||
|
|
|
@ -36,8 +36,7 @@ interface Props {
|
|||
onAddFilter?: (field: DataViewField | string, value: string, type: '+' | '-') => void;
|
||||
}
|
||||
|
||||
function getPercentLabel(docCount: number, topValuesSampleSize: number): string {
|
||||
const percent = (100 * docCount) / topValuesSampleSize;
|
||||
function getPercentLabel(percent: number): string {
|
||||
if (percent >= 0.1) {
|
||||
return `${roundToDecimalPlace(percent, 1)}%`;
|
||||
} else {
|
||||
|
@ -47,76 +46,54 @@ function getPercentLabel(docCount: number, topValuesSampleSize: number): string
|
|||
|
||||
export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed, onAddFilter }) => {
|
||||
const {
|
||||
services: { data },
|
||||
services: {
|
||||
data: { fieldFormats },
|
||||
},
|
||||
} = useDataVisualizerKibana();
|
||||
|
||||
const { fieldFormats } = data;
|
||||
|
||||
if (stats === undefined || !stats.topValues) return null;
|
||||
const {
|
||||
topValues,
|
||||
topValuesSampleSize,
|
||||
count,
|
||||
isTopValuesSampled,
|
||||
fieldName,
|
||||
sampleCount,
|
||||
topValuesSamplerShardSize,
|
||||
} = stats;
|
||||
const { topValues, fieldName, sampleCount } = stats;
|
||||
|
||||
const totalDocuments = stats.totalDocuments;
|
||||
const totalDocuments = stats.totalDocuments ?? sampleCount ?? 0;
|
||||
const topValuesOtherCountPercent =
|
||||
1 - (topValues ? topValues.reduce((acc, bucket) => acc + bucket.percent, 0) : 0);
|
||||
const topValuesOtherCount = Math.floor(topValuesOtherCountPercent * (sampleCount ?? 0));
|
||||
|
||||
const progressBarMax = isTopValuesSampled === true ? topValuesSampleSize : count;
|
||||
|
||||
const topValuesOtherCount =
|
||||
(progressBarMax ?? 0) -
|
||||
(topValues ? topValues.map((value) => value.doc_count).reduce((v, acc) => acc + v, 0) : 0);
|
||||
|
||||
const countsElement =
|
||||
totalDocuments !== undefined ? (
|
||||
<EuiText color="subdued" size="xs">
|
||||
{isTopValuesSampled ? (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleRecordsLabel"
|
||||
defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
|
||||
values={{
|
||||
sampledDocuments: sampleCount,
|
||||
sampledDocumentsFormatted: (
|
||||
<strong>
|
||||
{fieldFormats
|
||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
|
||||
.convert(sampleCount)}
|
||||
</strong>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromTotalRecordsLabel"
|
||||
defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
|
||||
values={{
|
||||
totalDocuments,
|
||||
totalDocumentsFormatted: (
|
||||
<strong>
|
||||
{fieldFormats
|
||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
|
||||
.convert(totalDocuments ?? 0)}
|
||||
</strong>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</EuiText>
|
||||
) : (
|
||||
<EuiText size="xs" textAlign={'center'}>
|
||||
const countsElement = (
|
||||
<EuiText color="subdued" size="xs">
|
||||
{totalDocuments > (sampleCount ?? 0) ? (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription"
|
||||
defaultMessage="Calculated from sample of {topValuesSamplerShardSize} documents per shard"
|
||||
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleRecordsLabel"
|
||||
defaultMessage="Calculated from {sampledDocumentsFormatted} sample {sampledDocuments, plural, one {record} other {records}}."
|
||||
values={{
|
||||
topValuesSamplerShardSize,
|
||||
sampledDocuments: sampleCount,
|
||||
sampledDocumentsFormatted: (
|
||||
<strong>
|
||||
{fieldFormats
|
||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
|
||||
.convert(sampleCount)}
|
||||
</strong>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
</EuiText>
|
||||
);
|
||||
) : (
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromTotalRecordsLabel"
|
||||
defaultMessage="Calculated from {totalDocumentsFormatted} {totalDocuments, plural, one {record} other {records}}."
|
||||
values={{
|
||||
totalDocuments,
|
||||
totalDocumentsFormatted: (
|
||||
<strong>
|
||||
{fieldFormats
|
||||
.getDefaultInstance(KBN_FIELD_TYPES.NUMBER, [ES_FIELD_TYPES.INTEGER])
|
||||
.convert(totalDocuments ?? 0)}
|
||||
</strong>
|
||||
),
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</EuiText>
|
||||
);
|
||||
|
||||
return (
|
||||
<ExpandedRowPanel
|
||||
|
@ -139,15 +116,15 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
|
|||
<EuiFlexGroup gutterSize="xs" alignItems="center" key={value.key}>
|
||||
<EuiFlexItem data-test-subj="dataVisualizerFieldDataTopValueBar">
|
||||
<EuiProgress
|
||||
value={value.doc_count}
|
||||
max={progressBarMax}
|
||||
value={value.percent}
|
||||
max={1}
|
||||
color={barColor}
|
||||
size="xs"
|
||||
label={kibanaFieldFormat(value.key, fieldFormat)}
|
||||
className={classNames('eui-textTruncate', 'topValuesValueLabelContainer')}
|
||||
valueText={`${value.doc_count}${
|
||||
progressBarMax !== undefined
|
||||
? ` (${getPercentLabel(value.doc_count, progressBarMax)})`
|
||||
totalDocuments !== undefined
|
||||
? ` (${getPercentLabel(value.percent * 100)})`
|
||||
: ''
|
||||
}`}
|
||||
/>
|
||||
|
@ -222,7 +199,7 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
|
|||
<EuiFlexItem data-test-subj="dataVisualizerFieldDataTopValueBar">
|
||||
<EuiProgress
|
||||
value={topValuesOtherCount}
|
||||
max={progressBarMax}
|
||||
max={totalDocuments}
|
||||
color={barColor}
|
||||
size="xs"
|
||||
label={
|
||||
|
@ -233,8 +210,8 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
|
|||
}
|
||||
className={classNames('eui-textTruncate', 'topValuesValueLabelContainer')}
|
||||
valueText={`${topValuesOtherCount}${
|
||||
progressBarMax !== undefined
|
||||
? ` (${getPercentLabel(topValuesOtherCount, progressBarMax)})`
|
||||
totalDocuments !== undefined
|
||||
? ` (${getPercentLabel(topValuesOtherCountPercent * 100)})`
|
||||
: ''
|
||||
}`}
|
||||
/>
|
||||
|
@ -249,12 +226,10 @@ export const TopValues: FC<Props> = ({ stats, fieldFormat, barColor, compressed,
|
|||
</EuiFlexGroup>
|
||||
) : null}
|
||||
|
||||
{isTopValuesSampled === true && (
|
||||
<Fragment>
|
||||
<EuiSpacer size="xs" />
|
||||
{countsElement}
|
||||
</Fragment>
|
||||
)}
|
||||
<Fragment>
|
||||
<EuiSpacer size="xs" />
|
||||
{countsElement}
|
||||
</Fragment>
|
||||
</div>
|
||||
</ExpandedRowPanel>
|
||||
);
|
||||
|
|
|
@ -551,8 +551,10 @@ export const IndexDataVisualizerView: FC<IndexDataVisualizerViewProps> = (dataVi
|
|||
getItemIdToExpandedRowMap={getItemIdToExpandedRowMap}
|
||||
extendedColumns={extendedColumns}
|
||||
loading={progress < 100}
|
||||
overallStatsRunning={overallStatsProgress.isRunning}
|
||||
showPreviewByDefault={dataVisualizerListState.showDistributions ?? true}
|
||||
onChange={setDataVisualizerListState}
|
||||
totalCount={overallStats.totalCount}
|
||||
/>
|
||||
</EuiPanel>
|
||||
</EuiFlexItem>
|
||||
|
|
|
@ -11,8 +11,8 @@ import { i18n } from '@kbn/i18n';
|
|||
import { Query, Filter } from '@kbn/es-query';
|
||||
import type { TimeRange } from '@kbn/es-query';
|
||||
import { DataView, DataViewField } from '@kbn/data-views-plugin/public';
|
||||
import { css } from '@emotion/react';
|
||||
import { isDefined } from '../../../common/util/is_defined';
|
||||
import { ShardSizeFilter } from './shard_size_select';
|
||||
import { DataVisualizerFieldNamesFilter } from './field_name_filter';
|
||||
import { DataVisualizerFieldTypeFilter } from './field_type_filter';
|
||||
import { SupportedFieldType } from '../../../../../common/types';
|
||||
|
@ -147,12 +147,15 @@ export const SearchPanel: FC<Props> = ({
|
|||
/>
|
||||
</EuiFlexItem>
|
||||
|
||||
<EuiFlexItem grow={2} className={'dvSearchPanel__controls'}>
|
||||
<ShardSizeFilter
|
||||
samplerShardSize={samplerShardSize}
|
||||
setSamplerShardSize={setSamplerShardSize}
|
||||
/>
|
||||
|
||||
<EuiFlexItem
|
||||
grow={2}
|
||||
className={'dvSearchPanel__controls'}
|
||||
css={css`
|
||||
margin-left: 0px !important;
|
||||
padding-left: 0px !important;
|
||||
padding-right: 0px !important;
|
||||
`}
|
||||
>
|
||||
<DataVisualizerFieldNamesFilter
|
||||
overallStats={overallStats}
|
||||
setVisibleFieldNames={setVisibleFieldNames}
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { EuiFlexGroup, EuiFlexItem, EuiIconTip, EuiSuperSelect } from '@elastic/eui';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import React, { FC } from 'react';
|
||||
import { FormattedMessage } from '@kbn/i18n-react';
|
||||
|
||||
interface Props {
|
||||
samplerShardSize: number;
|
||||
setSamplerShardSize(s: number): void;
|
||||
}
|
||||
|
||||
const searchSizeOptions = [1000, 5000, 10000, 100000, -1].map((v) => {
|
||||
return {
|
||||
value: String(v),
|
||||
inputDisplay:
|
||||
v > 0 ? (
|
||||
<span data-test-subj={`dataVisualizerShardSizeOption ${v}`}>
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel"
|
||||
defaultMessage="Sample size (per shard): {wrappedValue}"
|
||||
values={{ wrappedValue: <b>{v}</b> }}
|
||||
/>
|
||||
</span>
|
||||
) : (
|
||||
<span data-test-subj={`dataVisualizerShardSizeOption all`}>
|
||||
<FormattedMessage
|
||||
id="xpack.dataVisualizer.searchPanel.allOptionLabel"
|
||||
defaultMessage="Search all"
|
||||
/>
|
||||
</span>
|
||||
),
|
||||
};
|
||||
});
|
||||
|
||||
export const ShardSizeFilter: FC<Props> = ({ samplerShardSize, setSamplerShardSize }) => {
|
||||
return (
|
||||
<EuiFlexGroup alignItems="center" gutterSize="s" responsive={false}>
|
||||
<EuiFlexItem grow={false} style={{ width: 310 }}>
|
||||
<EuiSuperSelect
|
||||
options={searchSizeOptions}
|
||||
valueOfSelected={String(samplerShardSize)}
|
||||
onChange={(value) => setSamplerShardSize(+value)}
|
||||
aria-label={i18n.translate('xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel', {
|
||||
defaultMessage: 'Select number of documents to sample',
|
||||
})}
|
||||
data-test-subj="dataVisualizerShardSizeSelect"
|
||||
/>
|
||||
</EuiFlexItem>
|
||||
<EuiFlexItem grow={false}>
|
||||
<EuiIconTip
|
||||
content={i18n.translate('xpack.dataVisualizer.searchPanel.queryBarPlaceholder', {
|
||||
defaultMessage:
|
||||
'Selecting a smaller sample size will reduce query run times and the load on the cluster.',
|
||||
})}
|
||||
position="right"
|
||||
/>
|
||||
</EuiFlexItem>
|
||||
</EuiFlexGroup>
|
||||
);
|
||||
};
|
|
@ -24,6 +24,7 @@ import { KibanaContextProvider, KibanaThemeProvider } from '@kbn/kibana-react-pl
|
|||
import type { Query } from '@kbn/es-query';
|
||||
import { DataView, DataViewField } from '@kbn/data-views-plugin/public';
|
||||
import { SavedSearch } from '@kbn/discover-plugin/public';
|
||||
import { SamplingOption } from '../../../../../common/types/field_stats';
|
||||
import { DATA_VISUALIZER_GRID_EMBEDDABLE_TYPE } from './constants';
|
||||
import { EmbeddableLoading } from './embeddable_loading_fallback';
|
||||
import { DataVisualizerStartDependencies } from '../../../../plugin';
|
||||
|
@ -34,7 +35,7 @@ import {
|
|||
import { FieldVisConfig } from '../../../common/components/stats_table/types';
|
||||
import { getDefaultDataVisualizerListState } from '../../components/index_data_visualizer_view/index_data_visualizer_view';
|
||||
import type { DataVisualizerTableState, SavedSearchSavedObject } from '../../../../../common/types';
|
||||
import { DataVisualizerIndexBasedAppState } from '../../types/index_data_visualizer_state';
|
||||
import type { DataVisualizerIndexBasedAppState } from '../../types/index_data_visualizer_state';
|
||||
import { IndexBasedDataVisualizerExpandedRow } from '../../../common/components/expanded_row/index_based_expanded_row';
|
||||
import { useDataVisualizerGridData } from '../../hooks/use_data_visualizer_grid_data';
|
||||
|
||||
|
@ -55,6 +56,7 @@ export interface DataVisualizerGridInput {
|
|||
sessionId?: string;
|
||||
fieldsToFetch?: string[];
|
||||
totalDocuments?: number;
|
||||
samplingOption?: SamplingOption;
|
||||
}
|
||||
export type DataVisualizerGridEmbeddableInput = EmbeddableInput & DataVisualizerGridInput;
|
||||
export type DataVisualizerGridEmbeddableOutput = EmbeddableOutput;
|
||||
|
@ -83,8 +85,15 @@ export const EmbeddableWrapper = ({
|
|||
[dataVisualizerListState, onOutputChange]
|
||||
);
|
||||
|
||||
const { configs, searchQueryLanguage, searchString, extendedColumns, progress, setLastRefresh } =
|
||||
useDataVisualizerGridData(input, dataVisualizerListState);
|
||||
const {
|
||||
configs,
|
||||
searchQueryLanguage,
|
||||
searchString,
|
||||
extendedColumns,
|
||||
progress,
|
||||
overallStatsProgress,
|
||||
setLastRefresh,
|
||||
} = useDataVisualizerGridData(input, dataVisualizerListState);
|
||||
|
||||
useEffect(() => {
|
||||
setLastRefresh(Date.now());
|
||||
|
@ -143,6 +152,7 @@ export const EmbeddableWrapper = ({
|
|||
showPreviewByDefault={input?.showPreviewByDefault}
|
||||
onChange={onOutputChange}
|
||||
loading={progress < 100}
|
||||
overallStatsRunning={overallStatsProgress.isRunning}
|
||||
/>
|
||||
);
|
||||
};
|
||||
|
|
|
@ -5,22 +5,23 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { Required } from 'utility-types';
|
||||
import type { Required } from 'utility-types';
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from 'react';
|
||||
import { merge } from 'rxjs';
|
||||
import { EuiTableActionsColumnType } from '@elastic/eui/src/components/basic_table/table_types';
|
||||
import type { EuiTableActionsColumnType } from '@elastic/eui';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
import { DataViewField, KBN_FIELD_TYPES, UI_SETTINGS } from '@kbn/data-plugin/common';
|
||||
import seedrandom from 'seedrandom';
|
||||
import { RandomSamplerOption } from '../constants/random_sampler';
|
||||
import { DataVisualizerIndexBasedAppState } from '../types/index_data_visualizer_state';
|
||||
import type { SamplingOption } from '@kbn/discover-plugin/public/application/main/components/field_stats_table/field_stats_table';
|
||||
import type { RandomSamplerOption } from '../constants/random_sampler';
|
||||
import type { DataVisualizerIndexBasedAppState } from '../types/index_data_visualizer_state';
|
||||
import { useDataVisualizerKibana } from '../../kibana_context';
|
||||
import { getEsQueryFromSavedSearch } from '../utils/saved_search_utils';
|
||||
import { MetricFieldsStats } from '../../common/components/stats_table/components/field_count_stats';
|
||||
import type { MetricFieldsStats } from '../../common/components/stats_table/components/field_count_stats';
|
||||
import { useTimefilter } from './use_time_filter';
|
||||
import { dataVisualizerRefresh$ } from '../services/timefilter_refresh_service';
|
||||
import { TimeBuckets } from '../../../../common/services/time_buckets';
|
||||
import { FieldVisConfig } from '../../common/components/stats_table/types';
|
||||
import type { FieldVisConfig } from '../../common/components/stats_table/types';
|
||||
import {
|
||||
SUPPORTED_FIELD_TYPES,
|
||||
NON_AGGREGATABLE_FIELD_TYPES,
|
||||
|
@ -29,13 +30,13 @@ import {
|
|||
import type { FieldRequestConfig, SupportedFieldType } from '../../../../common/types';
|
||||
import { kbnTypeToJobType } from '../../common/util/field_types_utils';
|
||||
import { getActions } from '../../common/components/field_data_row/action_menu';
|
||||
import { DataVisualizerGridInput } from '../embeddables/grid_embeddable/grid_embeddable';
|
||||
import type { DataVisualizerGridInput } from '../embeddables/grid_embeddable/grid_embeddable';
|
||||
import { getDefaultPageState } from '../components/index_data_visualizer_view/index_data_visualizer_view';
|
||||
import { useFieldStatsSearchStrategy } from './use_field_stats';
|
||||
import { useOverallStats } from './use_overall_stats';
|
||||
import { OverallStatsSearchStrategyParams } from '../../../../common/types/field_stats';
|
||||
import { Dictionary } from '../../common/util/url_state';
|
||||
import { AggregatableField, NonAggregatableField } from '../types/overall_stats';
|
||||
import type { OverallStatsSearchStrategyParams } from '../../../../common/types/field_stats';
|
||||
import type { Dictionary } from '../../common/util/url_state';
|
||||
import type { AggregatableField, NonAggregatableField } from '../types/overall_stats';
|
||||
|
||||
const defaults = getDefaultPageState();
|
||||
|
||||
|
@ -43,6 +44,11 @@ function isDisplayField(fieldName: string): boolean {
|
|||
return !OMIT_FIELDS.includes(fieldName);
|
||||
}
|
||||
|
||||
const DEFAULT_SAMPLING_OPTION: SamplingOption = {
|
||||
mode: 'random_sampling',
|
||||
seed: '',
|
||||
probability: 0,
|
||||
};
|
||||
export const useDataVisualizerGridData = (
|
||||
input: DataVisualizerGridInput,
|
||||
dataVisualizerListState: Required<DataVisualizerIndexBasedAppState>,
|
||||
|
@ -76,6 +82,7 @@ export const useDataVisualizerGridData = (
|
|||
currentFilters,
|
||||
visibleFieldNames,
|
||||
fieldsToFetch,
|
||||
samplingOption,
|
||||
} = useMemo(
|
||||
() => ({
|
||||
currentSavedSearch: input?.savedSearch,
|
||||
|
@ -84,6 +91,8 @@ export const useDataVisualizerGridData = (
|
|||
visibleFieldNames: input?.visibleFieldNames ?? [],
|
||||
currentFilters: input?.filters,
|
||||
fieldsToFetch: input?.fieldsToFetch,
|
||||
/** By default, use random sampling **/
|
||||
samplingOption: input?.samplingOption ?? DEFAULT_SAMPLING_OPTION,
|
||||
}),
|
||||
[input]
|
||||
);
|
||||
|
@ -203,6 +212,7 @@ export const useDataVisualizerGridData = (
|
|||
}
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
earliest,
|
||||
latest,
|
||||
|
@ -217,6 +227,8 @@ export const useDataVisualizerGridData = (
|
|||
aggregatableFields,
|
||||
nonAggregatableFields,
|
||||
fieldsToFetch,
|
||||
browserSessionSeed,
|
||||
samplingOption: { ...samplingOption, seed: browserSessionSeed.toString() },
|
||||
};
|
||||
},
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
|
@ -226,17 +238,19 @@ export const useDataVisualizerGridData = (
|
|||
currentDataView.id,
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
JSON.stringify(searchQuery),
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
JSON.stringify(samplingOption),
|
||||
samplerShardSize,
|
||||
searchSessionId,
|
||||
lastRefresh,
|
||||
fieldsToFetch,
|
||||
browserSessionSeed,
|
||||
]
|
||||
);
|
||||
|
||||
const { overallStats, progress: overallStatsProgress } = useOverallStats(
|
||||
fieldStatsRequest,
|
||||
lastRefresh,
|
||||
browserSessionSeed,
|
||||
dataVisualizerListState.probability
|
||||
);
|
||||
|
||||
|
@ -269,10 +283,20 @@ export const useDataVisualizerGridData = (
|
|||
return { metricConfigs: existMetricFields, nonMetricConfigs: existNonMetricFields };
|
||||
}, [metricConfigs, nonMetricConfigs, overallStatsProgress.loaded]);
|
||||
|
||||
const probability = useMemo(
|
||||
() =>
|
||||
// If random sampler probability is already manually selected, or is available from the URL
|
||||
// use that instead of using the probability calculated from the doc count
|
||||
(dataVisualizerListState.probability === null
|
||||
? overallStats?.documentCountStats?.probability
|
||||
: dataVisualizerListState.probability) ?? 1,
|
||||
[dataVisualizerListState.probability, overallStats?.documentCountStats?.probability]
|
||||
);
|
||||
const strategyResponse = useFieldStatsSearchStrategy(
|
||||
fieldStatsRequest,
|
||||
configsWithoutStats,
|
||||
dataVisualizerListState
|
||||
dataVisualizerListState,
|
||||
probability
|
||||
);
|
||||
|
||||
const combinedProgress = useMemo(
|
||||
|
|
|
@ -65,7 +65,8 @@ const createBatchedRequests = (fields: Field[], maxBatchSize = 10) => {
|
|||
export function useFieldStatsSearchStrategy(
|
||||
searchStrategyParams: OverallStatsSearchStrategyParams | undefined,
|
||||
fieldStatsParams: FieldStatsParams | undefined,
|
||||
dataVisualizerListState: DataVisualizerIndexBasedAppState
|
||||
dataVisualizerListState: DataVisualizerIndexBasedAppState,
|
||||
samplingProbability: number | null
|
||||
): FieldStatsSearchStrategyReturnBase {
|
||||
const {
|
||||
services: {
|
||||
|
@ -168,6 +169,9 @@ export function useFieldStatsSearchStrategy(
|
|||
},
|
||||
},
|
||||
maxExamples: MAX_EXAMPLES_DEFAULT,
|
||||
samplingProbability,
|
||||
browserSessionSeed: searchStrategyParams.browserSessionSeed,
|
||||
samplingOption: searchStrategyParams.samplingOption,
|
||||
};
|
||||
const searchOptions: ISearchOptions = {
|
||||
abortSignal: abortCtrl.current.signal,
|
||||
|
@ -295,6 +299,7 @@ export function useFieldStatsSearchStrategy(
|
|||
dataVisualizerListState.pageIndex,
|
||||
dataVisualizerListState.sortDirection,
|
||||
dataVisualizerListState.sortField,
|
||||
samplingProbability,
|
||||
]);
|
||||
|
||||
const cancelFetch = useCallback(() => {
|
||||
|
|
|
@ -30,14 +30,14 @@ import {
|
|||
import type { OverallStats } from '../types/overall_stats';
|
||||
import { getDefaultPageState } from '../components/index_data_visualizer_view/index_data_visualizer_view';
|
||||
import { extractErrorProperties } from '../utils/error_utils';
|
||||
import type {
|
||||
import {
|
||||
DataStatsFetchProgress,
|
||||
isRandomSamplingOption,
|
||||
OverallStatsSearchStrategyParams,
|
||||
} from '../../../../common/types/field_stats';
|
||||
import { getDocumentCountStats } from '../search_strategy/requests/get_document_stats';
|
||||
import { getInitialProgress, getReducer } from '../progress_utils';
|
||||
import { MAX_CONCURRENT_REQUESTS } from '../constants/index_data_visualizer_viewer';
|
||||
import { DocumentCountStats } from '../../../../common/types/field_stats';
|
||||
|
||||
/**
|
||||
* Helper function to run forkJoin
|
||||
|
@ -92,7 +92,6 @@ function displayError(toastNotifications: ToastsStart, index: string, err: any)
|
|||
export function useOverallStats<TParams extends OverallStatsSearchStrategyParams>(
|
||||
searchStrategyParams: TParams | undefined,
|
||||
lastRefresh: number,
|
||||
browserSessionSeed: number,
|
||||
probability?: number | null
|
||||
): {
|
||||
progress: DataStatsFetchProgress;
|
||||
|
@ -114,167 +113,163 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams
|
|||
const abortCtrl = useRef(new AbortController());
|
||||
const searchSubscription$ = useRef<Subscription>();
|
||||
|
||||
const startFetch = useCallback(() => {
|
||||
searchSubscription$.current?.unsubscribe();
|
||||
abortCtrl.current.abort();
|
||||
abortCtrl.current = new AbortController();
|
||||
const startFetch = useCallback(async () => {
|
||||
try {
|
||||
searchSubscription$.current?.unsubscribe();
|
||||
abortCtrl.current.abort();
|
||||
abortCtrl.current = new AbortController();
|
||||
|
||||
if (!searchStrategyParams || lastRefresh === 0) return;
|
||||
if (!searchStrategyParams || lastRefresh === 0) return;
|
||||
|
||||
setFetchState({
|
||||
...getInitialProgress(),
|
||||
error: undefined,
|
||||
});
|
||||
setFetchState({
|
||||
...getInitialProgress(),
|
||||
isRunning: true,
|
||||
error: undefined,
|
||||
});
|
||||
|
||||
const {
|
||||
aggregatableFields,
|
||||
nonAggregatableFields,
|
||||
index,
|
||||
searchQuery,
|
||||
timeFieldName,
|
||||
earliest,
|
||||
latest,
|
||||
runtimeFieldMap,
|
||||
samplerShardSize,
|
||||
} = searchStrategyParams;
|
||||
const {
|
||||
aggregatableFields,
|
||||
nonAggregatableFields,
|
||||
index,
|
||||
searchQuery,
|
||||
timeFieldName,
|
||||
earliest,
|
||||
latest,
|
||||
runtimeFieldMap,
|
||||
samplingOption,
|
||||
} = searchStrategyParams;
|
||||
|
||||
const searchOptions: ISearchOptions = {
|
||||
abortSignal: abortCtrl.current.signal,
|
||||
sessionId: searchStrategyParams?.sessionId,
|
||||
};
|
||||
const searchOptions: ISearchOptions = {
|
||||
abortSignal: abortCtrl.current.signal,
|
||||
sessionId: searchStrategyParams?.sessionId,
|
||||
};
|
||||
|
||||
const nonAggregatableFieldsObs = nonAggregatableFields.map((fieldName: string) =>
|
||||
data.search
|
||||
.search<IKibanaSearchRequest, IKibanaSearchResponse>(
|
||||
{
|
||||
params: checkNonAggregatableFieldExistsRequest(
|
||||
index,
|
||||
searchQuery,
|
||||
fieldName,
|
||||
timeFieldName,
|
||||
earliest,
|
||||
latest,
|
||||
runtimeFieldMap
|
||||
),
|
||||
},
|
||||
searchOptions
|
||||
)
|
||||
.pipe(
|
||||
map((resp) => {
|
||||
return {
|
||||
...resp,
|
||||
rawResponse: { ...resp.rawResponse, fieldName },
|
||||
} as IKibanaSearchResponse;
|
||||
})
|
||||
)
|
||||
);
|
||||
const documentCountStats = await getDocumentCountStats(
|
||||
data.search,
|
||||
searchStrategyParams,
|
||||
searchOptions,
|
||||
samplingOption.seed,
|
||||
probability
|
||||
);
|
||||
|
||||
// Have to divide into smaller requests to avoid 413 payload too large
|
||||
const aggregatableFieldsChunks = chunk(aggregatableFields, 30);
|
||||
|
||||
const aggregatableOverallStatsObs = aggregatableFieldsChunks.map((aggregatableFieldsChunk) =>
|
||||
data.search
|
||||
.search(
|
||||
{
|
||||
params: checkAggregatableFieldsExistRequest(
|
||||
index,
|
||||
searchQuery,
|
||||
aggregatableFieldsChunk,
|
||||
samplerShardSize,
|
||||
timeFieldName,
|
||||
earliest,
|
||||
latest,
|
||||
undefined,
|
||||
runtimeFieldMap
|
||||
),
|
||||
},
|
||||
searchOptions
|
||||
)
|
||||
.pipe(
|
||||
map((resp) => {
|
||||
return {
|
||||
...resp,
|
||||
aggregatableFields: aggregatableFieldsChunk,
|
||||
} as AggregatableFieldOverallStats;
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
const sub = rateLimitingForkJoin<
|
||||
| DocumentCountStats
|
||||
| AggregatableFieldOverallStats
|
||||
| NonAggregatableFieldOverallStats
|
||||
| undefined
|
||||
>(
|
||||
[
|
||||
from(
|
||||
getDocumentCountStats(
|
||||
data.search,
|
||||
searchStrategyParams,
|
||||
searchOptions,
|
||||
browserSessionSeed,
|
||||
probability
|
||||
const nonAggregatableFieldsObs = nonAggregatableFields.map((fieldName: string) =>
|
||||
data.search
|
||||
.search<IKibanaSearchRequest, IKibanaSearchResponse>(
|
||||
{
|
||||
params: checkNonAggregatableFieldExistsRequest(
|
||||
index,
|
||||
searchQuery,
|
||||
fieldName,
|
||||
timeFieldName,
|
||||
earliest,
|
||||
latest,
|
||||
runtimeFieldMap
|
||||
),
|
||||
},
|
||||
searchOptions
|
||||
)
|
||||
),
|
||||
...aggregatableOverallStatsObs,
|
||||
...nonAggregatableFieldsObs,
|
||||
],
|
||||
MAX_CONCURRENT_REQUESTS
|
||||
);
|
||||
.pipe(
|
||||
map((resp) => {
|
||||
return {
|
||||
...resp,
|
||||
rawResponse: { ...resp.rawResponse, fieldName },
|
||||
} as IKibanaSearchResponse;
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
searchSubscription$.current = sub.subscribe({
|
||||
next: (value) => {
|
||||
const aggregatableOverallStatsResp: AggregatableFieldOverallStats[] = [];
|
||||
const nonAggregatableOverallStatsResp: NonAggregatableFieldOverallStats[] = [];
|
||||
const documentCountStats = value[0] as DocumentCountStats;
|
||||
// Have to divide into smaller requests to avoid 413 payload too large
|
||||
const aggregatableFieldsChunks = chunk(aggregatableFields, 30);
|
||||
|
||||
value.forEach((resp, idx) => {
|
||||
if (!resp || idx === 0) return;
|
||||
if (isAggregatableFieldOverallStats(resp)) {
|
||||
aggregatableOverallStatsResp.push(resp);
|
||||
}
|
||||
if (isRandomSamplingOption(samplingOption)) {
|
||||
samplingOption.probability = documentCountStats.probability ?? 1;
|
||||
}
|
||||
const aggregatableOverallStatsObs = aggregatableFieldsChunks.map((aggregatableFieldsChunk) =>
|
||||
data.search
|
||||
.search(
|
||||
{
|
||||
params: checkAggregatableFieldsExistRequest(
|
||||
index,
|
||||
searchQuery,
|
||||
aggregatableFieldsChunk,
|
||||
samplingOption,
|
||||
timeFieldName,
|
||||
earliest,
|
||||
latest,
|
||||
undefined,
|
||||
runtimeFieldMap
|
||||
),
|
||||
},
|
||||
searchOptions
|
||||
)
|
||||
.pipe(
|
||||
map((resp) => {
|
||||
return {
|
||||
...resp,
|
||||
aggregatableFields: aggregatableFieldsChunk,
|
||||
} as AggregatableFieldOverallStats;
|
||||
})
|
||||
)
|
||||
);
|
||||
|
||||
if (isNonAggregatableFieldOverallStats(resp)) {
|
||||
nonAggregatableOverallStatsResp.push(resp);
|
||||
}
|
||||
});
|
||||
const sub = rateLimitingForkJoin<
|
||||
AggregatableFieldOverallStats | NonAggregatableFieldOverallStats | undefined
|
||||
>([...aggregatableOverallStatsObs, ...nonAggregatableFieldsObs], MAX_CONCURRENT_REQUESTS);
|
||||
|
||||
const totalCount = documentCountStats?.totalCount ?? 0;
|
||||
searchSubscription$.current = sub.subscribe({
|
||||
next: (value) => {
|
||||
const aggregatableOverallStatsResp: AggregatableFieldOverallStats[] = [];
|
||||
const nonAggregatableOverallStatsResp: NonAggregatableFieldOverallStats[] = [];
|
||||
|
||||
const aggregatableOverallStats = processAggregatableFieldsExistResponse(
|
||||
aggregatableOverallStatsResp,
|
||||
aggregatableFields,
|
||||
samplerShardSize,
|
||||
totalCount
|
||||
);
|
||||
value.forEach((resp, idx) => {
|
||||
if (isAggregatableFieldOverallStats(resp)) {
|
||||
aggregatableOverallStatsResp.push(resp);
|
||||
}
|
||||
|
||||
const nonAggregatableOverallStats = processNonAggregatableFieldsExistResponse(
|
||||
nonAggregatableOverallStatsResp,
|
||||
nonAggregatableFields
|
||||
);
|
||||
if (isNonAggregatableFieldOverallStats(resp)) {
|
||||
nonAggregatableOverallStatsResp.push(resp);
|
||||
}
|
||||
});
|
||||
|
||||
setOverallStats({
|
||||
documentCountStats,
|
||||
...nonAggregatableOverallStats,
|
||||
...aggregatableOverallStats,
|
||||
totalCount,
|
||||
});
|
||||
},
|
||||
error: (error) => {
|
||||
displayError(toasts, searchStrategyParams.index, extractErrorProperties(error));
|
||||
setFetchState({
|
||||
isRunning: false,
|
||||
error,
|
||||
});
|
||||
},
|
||||
complete: () => {
|
||||
setFetchState({
|
||||
loaded: 100,
|
||||
isRunning: false,
|
||||
});
|
||||
},
|
||||
});
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
const totalCount = documentCountStats?.totalCount ?? 0;
|
||||
|
||||
const aggregatableOverallStats = processAggregatableFieldsExistResponse(
|
||||
aggregatableOverallStatsResp,
|
||||
aggregatableFields
|
||||
);
|
||||
|
||||
const nonAggregatableOverallStats = processNonAggregatableFieldsExistResponse(
|
||||
nonAggregatableOverallStatsResp,
|
||||
nonAggregatableFields
|
||||
);
|
||||
|
||||
setOverallStats({
|
||||
documentCountStats,
|
||||
...nonAggregatableOverallStats,
|
||||
...aggregatableOverallStats,
|
||||
totalCount,
|
||||
});
|
||||
},
|
||||
error: (error) => {
|
||||
displayError(toasts, searchStrategyParams.index, extractErrorProperties(error));
|
||||
setFetchState({
|
||||
isRunning: false,
|
||||
error,
|
||||
});
|
||||
},
|
||||
complete: () => {
|
||||
setFetchState({
|
||||
loaded: 100,
|
||||
isRunning: false,
|
||||
});
|
||||
},
|
||||
});
|
||||
} catch (error) {
|
||||
// An `AbortError` gets triggered when a user cancels a request by navigating away, we need to ignore these errors.
|
||||
if (error.name !== 'AbortError') {
|
||||
displayError(toasts, searchStrategyParams!.index, extractErrorProperties(error));
|
||||
}
|
||||
}
|
||||
}, [data.search, searchStrategyParams, toasts, lastRefresh, probability]);
|
||||
|
||||
const cancelFetch = useCallback(() => {
|
||||
|
@ -286,8 +281,11 @@ export function useOverallStats<TParams extends OverallStatsSearchStrategyParams
|
|||
// auto-update
|
||||
useEffect(() => {
|
||||
startFetch();
|
||||
}, [startFetch]);
|
||||
|
||||
useEffect(() => {
|
||||
return cancelFetch;
|
||||
}, [startFetch, cancelFetch]);
|
||||
}, [cancelFetch]);
|
||||
|
||||
return useMemo(
|
||||
() => ({
|
||||
|
|
|
@ -0,0 +1,103 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import * as estypes from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
|
||||
import {
|
||||
Aggs,
|
||||
SamplingOption,
|
||||
isNormalSamplingOption,
|
||||
isRandomSamplingOption,
|
||||
} from '../../../../../common/types/field_stats';
|
||||
|
||||
export function buildAggregationWithSamplingOption(
|
||||
aggs: Aggs,
|
||||
samplingOption: SamplingOption
|
||||
): Record<string, estypes.AggregationsAggregationContainer> {
|
||||
if (!samplingOption) {
|
||||
return aggs;
|
||||
}
|
||||
const { seed } = samplingOption;
|
||||
|
||||
if (isNormalSamplingOption(samplingOption)) {
|
||||
return {
|
||||
sample: {
|
||||
sampler: {
|
||||
shard_size: samplingOption.shardSize,
|
||||
},
|
||||
aggs,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
if (isRandomSamplingOption(samplingOption)) {
|
||||
return {
|
||||
sample: {
|
||||
// @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
|
||||
random_sampler: {
|
||||
probability: samplingOption.probability,
|
||||
...(seed ? { seed } : {}),
|
||||
},
|
||||
aggs,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Else, if no sampling, use random sampler with probability set to 1
|
||||
// this is so that all results are returned under 'sample' path
|
||||
return {
|
||||
sample: {
|
||||
aggs,
|
||||
// @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
|
||||
random_sampler: {
|
||||
probability: 1,
|
||||
...(seed ? { seed } : {}),
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps the supplied aggregations in a random sampler aggregation.
|
||||
*/
|
||||
export function buildRandomSamplerAggregation(
|
||||
aggs: Aggs,
|
||||
probability: number | null,
|
||||
seed: number
|
||||
): Record<string, estypes.AggregationsAggregationContainer> {
|
||||
if (probability === null || probability <= 0 || probability > 1) {
|
||||
return aggs;
|
||||
}
|
||||
|
||||
return {
|
||||
sample: {
|
||||
aggs,
|
||||
// @ts-expect-error AggregationsAggregationContainer needs to be updated with random_sampler
|
||||
random_sampler: {
|
||||
probability,
|
||||
...(seed ? { seed } : {}),
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
export function buildSamplerAggregation(
|
||||
aggs: Aggs,
|
||||
shardSize: number
|
||||
): Record<string, estypes.AggregationsAggregationContainer> {
|
||||
if (shardSize <= 0) {
|
||||
return aggs;
|
||||
}
|
||||
|
||||
return {
|
||||
sample: {
|
||||
aggs,
|
||||
sampler: {
|
||||
shard_size: shardSize,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
|
@ -14,9 +14,10 @@ import type {
|
|||
ISearchOptions,
|
||||
ISearchStart,
|
||||
} from '@kbn/data-plugin/public';
|
||||
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
|
||||
import { processTopValues } from './utils';
|
||||
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
|
||||
import type {
|
||||
Field,
|
||||
BooleanFieldStats,
|
||||
|
@ -30,7 +31,7 @@ export const getBooleanFieldsStatsRequest = (
|
|||
params: FieldStatsCommonRequestParams,
|
||||
fields: Field[]
|
||||
) => {
|
||||
const { index, query, runtimeFieldMap, samplerShardSize } = params;
|
||||
const { index, query, runtimeFieldMap } = params;
|
||||
|
||||
const size = 0;
|
||||
const aggs: Aggs = {};
|
||||
|
@ -48,7 +49,7 @@ export const getBooleanFieldsStatsRequest = (
|
|||
});
|
||||
const searchBody = {
|
||||
query,
|
||||
aggs: buildSamplerAggregation(aggs, samplerShardSize),
|
||||
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
|
||||
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
|
||||
};
|
||||
|
||||
|
@ -65,7 +66,6 @@ export const fetchBooleanFieldsStats = (
|
|||
fields: Field[],
|
||||
options: ISearchOptions
|
||||
): Observable<BooleanFieldStats[] | FieldStatsError> => {
|
||||
const { samplerShardSize } = params;
|
||||
const request: estypes.SearchRequest = getBooleanFieldsStatsRequest(params, fields);
|
||||
return dataSearch
|
||||
.search<IKibanaSearchRequest, IKibanaSearchResponse>({ params: request }, options)
|
||||
|
@ -80,15 +80,34 @@ export const fetchBooleanFieldsStats = (
|
|||
if (!isIKibanaSearchResponse(resp)) return resp;
|
||||
|
||||
const aggregations = resp.rawResponse.aggregations;
|
||||
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
|
||||
const aggsPath = ['sample'];
|
||||
const sampleCount = get(aggregations, [...aggsPath, 'doc_count'], 0);
|
||||
|
||||
const batchStats: BooleanFieldStats[] = fields.map((field, i) => {
|
||||
const safeFieldName = field.fieldName;
|
||||
// Sampler agg will yield doc_count that's bigger than the actual # of sampled records
|
||||
// because it uses the stored _doc_count if available
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
|
||||
// therefore we need to correct it by multiplying by the sampled probability
|
||||
const count = get(
|
||||
aggregations,
|
||||
[...aggsPath, `${safeFieldName}_value_count`, 'doc_count'],
|
||||
0
|
||||
);
|
||||
|
||||
const fieldAgg = get(aggregations, [...aggsPath, `${safeFieldName}_values`], {});
|
||||
const { topValuesSampleSize, topValues } = processTopValues(fieldAgg);
|
||||
|
||||
const multiplier =
|
||||
count > sampleCount ? get(aggregations, [...aggsPath, 'probability'], 1) : 1;
|
||||
|
||||
const stats: BooleanFieldStats = {
|
||||
fieldName: field.fieldName,
|
||||
count: get(aggregations, [...aggsPath, `${safeFieldName}_value_count`, 'doc_count'], 0),
|
||||
count: count * multiplier,
|
||||
trueCount: 0,
|
||||
falseCount: 0,
|
||||
topValues,
|
||||
topValuesSampleSize,
|
||||
};
|
||||
|
||||
const valueBuckets: Array<{ [key: string]: number }> = get(
|
||||
|
@ -97,7 +116,7 @@ export const fetchBooleanFieldsStats = (
|
|||
[]
|
||||
);
|
||||
valueBuckets.forEach((bucket) => {
|
||||
stats[`${bucket.key_as_string}Count`] = bucket.doc_count;
|
||||
stats[`${bucket.key_as_string}Count` as 'trueCount' | 'falseCount'] = bucket.doc_count;
|
||||
});
|
||||
return stats;
|
||||
});
|
||||
|
|
|
@ -15,8 +15,8 @@ import type {
|
|||
ISearchOptions,
|
||||
ISearchStart,
|
||||
} from '@kbn/data-plugin/public';
|
||||
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
|
||||
import type { FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats';
|
||||
import type { Field, DateFieldStats, Aggs } from '../../../../../common/types/field_stats';
|
||||
import { FieldStatsError, isIKibanaSearchResponse } from '../../../../../common/types/field_stats';
|
||||
|
@ -26,7 +26,7 @@ export const getDateFieldsStatsRequest = (
|
|||
params: FieldStatsCommonRequestParams,
|
||||
fields: Field[]
|
||||
) => {
|
||||
const { index, query, runtimeFieldMap, samplerShardSize } = params;
|
||||
const { index, query, runtimeFieldMap } = params;
|
||||
|
||||
const size = 0;
|
||||
|
||||
|
@ -45,7 +45,7 @@ export const getDateFieldsStatsRequest = (
|
|||
|
||||
const searchBody = {
|
||||
query,
|
||||
aggs: buildSamplerAggregation(aggs, samplerShardSize),
|
||||
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
|
||||
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
|
||||
};
|
||||
return {
|
||||
|
@ -61,8 +61,6 @@ export const fetchDateFieldsStats = (
|
|||
fields: Field[],
|
||||
options: ISearchOptions
|
||||
): Observable<DateFieldStats[] | FieldStatsError> => {
|
||||
const { samplerShardSize } = params;
|
||||
|
||||
const request: estypes.SearchRequest = getDateFieldsStatsRequest(params, fields);
|
||||
return dataSearch
|
||||
.search<IKibanaSearchRequest, IKibanaSearchResponse>({ params: request }, options)
|
||||
|
@ -76,15 +74,10 @@ export const fetchDateFieldsStats = (
|
|||
map((resp) => {
|
||||
if (!isIKibanaSearchResponse(resp)) return resp;
|
||||
const aggregations = resp.rawResponse.aggregations;
|
||||
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
|
||||
const aggsPath = ['sample'];
|
||||
|
||||
const batchStats: DateFieldStats[] = fields.map((field, i) => {
|
||||
const safeFieldName = field.safeFieldName;
|
||||
const docCount = get(
|
||||
aggregations,
|
||||
[...aggsPath, `${safeFieldName}_field_stats`, 'doc_count'],
|
||||
0
|
||||
);
|
||||
const fieldStatsResp = get(
|
||||
aggregations,
|
||||
[...aggsPath, `${safeFieldName}_field_stats`, 'actual_stats'],
|
||||
|
@ -92,7 +85,6 @@ export const fetchDateFieldsStats = (
|
|||
);
|
||||
return {
|
||||
fieldName: field.fieldName,
|
||||
count: docCount,
|
||||
earliest: get(fieldStatsResp, 'min', 0),
|
||||
latest: get(fieldStatsResp, 'max', 0),
|
||||
} as DateFieldStats;
|
||||
|
|
|
@ -19,6 +19,8 @@ import type {
|
|||
} from '../../../../../common/types/field_stats';
|
||||
|
||||
const MINIMUM_RANDOM_SAMPLER_DOC_COUNT = 100000;
|
||||
const DEFAULT_INITIAL_RANDOM_SAMPLER_PROBABILITY = 0.000001;
|
||||
|
||||
export const getDocumentCountStatsRequest = (params: OverallStatsSearchStrategyParams) => {
|
||||
const {
|
||||
index,
|
||||
|
@ -69,11 +71,11 @@ export const getDocumentCountStats = async (
|
|||
search: DataPublicPluginStart['search'],
|
||||
params: OverallStatsSearchStrategyParams,
|
||||
searchOptions: ISearchOptions,
|
||||
browserSessionSeed: number,
|
||||
browserSessionSeed: string,
|
||||
probability?: number | null,
|
||||
minimumRandomSamplerDocCount?: number
|
||||
): Promise<DocumentCountStats> => {
|
||||
const seed = browserSessionSeed ?? Math.abs(seedrandom().int32());
|
||||
const seed = browserSessionSeed ?? Math.abs(seedrandom().int32()).toString();
|
||||
|
||||
const {
|
||||
index,
|
||||
|
@ -83,10 +85,11 @@ export const getDocumentCountStats = async (
|
|||
runtimeFieldMap,
|
||||
searchQuery,
|
||||
intervalMs,
|
||||
fieldsToFetch,
|
||||
} = params;
|
||||
|
||||
const result = { randomlySampled: false, took: 0, totalCount: 0 };
|
||||
// Probability = 1 represents no sampling
|
||||
const result = { randomlySampled: false, took: 0, totalCount: 0, probability: 1 };
|
||||
|
||||
const filterCriteria = buildBaseFilterCriteria(timeFieldName, earliestMs, latestMs, searchQuery);
|
||||
|
||||
const query = {
|
||||
|
@ -109,7 +112,7 @@ export const getDocumentCountStats = async (
|
|||
// If probability is provided, use that
|
||||
// Else, make an initial query using very low p
|
||||
// so that we can calculate the next p value that's appropriate for the data set
|
||||
const initialDefaultProbability = probability ?? 0.000001;
|
||||
const initialDefaultProbability = probability ?? DEFAULT_INITIAL_RANDOM_SAMPLER_PROBABILITY;
|
||||
|
||||
const getAggsWithRandomSampling = (p: number) => ({
|
||||
sampler: {
|
||||
|
@ -121,16 +124,13 @@ export const getDocumentCountStats = async (
|
|||
},
|
||||
});
|
||||
|
||||
const hasTimeField = timeFieldName !== undefined && intervalMs !== undefined && intervalMs > 0;
|
||||
|
||||
const getSearchParams = (aggregations: unknown, trackTotalHits = false) => ({
|
||||
index,
|
||||
body: {
|
||||
query,
|
||||
...(!fieldsToFetch &&
|
||||
timeFieldName !== undefined &&
|
||||
intervalMs !== undefined &&
|
||||
intervalMs > 0
|
||||
? { aggs: aggregations }
|
||||
: {}),
|
||||
...(hasTimeField ? { aggs: aggregations } : {}),
|
||||
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
|
||||
},
|
||||
track_total_hits: trackTotalHits,
|
||||
|
@ -142,7 +142,7 @@ export const getDocumentCountStats = async (
|
|||
params: getSearchParams(
|
||||
getAggsWithRandomSampling(initialDefaultProbability),
|
||||
// Track total hits if time field is not defined
|
||||
timeFieldName === undefined
|
||||
!hasTimeField
|
||||
),
|
||||
},
|
||||
searchOptions
|
||||
|
@ -189,13 +189,10 @@ export const getDocumentCountStats = async (
|
|||
const newProbability =
|
||||
(initialDefaultProbability * numDocs) / (numSampled - 2 * Math.sqrt(numSampled));
|
||||
|
||||
// If the number of docs sampled is indicative of query with < 10 million docs
|
||||
// proceed to make a vanilla aggregation without any sampling
|
||||
if (
|
||||
numSampled === 0 ||
|
||||
newProbability === Infinity ||
|
||||
numSampled / initialDefaultProbability < 1e7
|
||||
) {
|
||||
// If the number of docs is < 3 million
|
||||
// proceed to make a vanilla aggregation without any sampling (probability = 1)
|
||||
// Minimum of 4 docs (3e6 * 0.000001 + 1) sampled gives us 90% confidence interval # docs is within
|
||||
if (newProbability === Infinity || numSampled <= 4) {
|
||||
const vanillaAggResp = await search
|
||||
.search(
|
||||
{
|
||||
|
@ -241,7 +238,7 @@ export const processDocumentCountStats = (
|
|||
body: estypes.SearchResponse | undefined,
|
||||
params: OverallStatsSearchStrategyParams,
|
||||
randomlySampled = false
|
||||
): DocumentCountStats | undefined => {
|
||||
): Omit<DocumentCountStats, 'probability'> | undefined => {
|
||||
if (!body) return undefined;
|
||||
|
||||
let totalCount = 0;
|
||||
|
|
|
@ -16,30 +16,33 @@ import {
|
|||
ISearchOptions,
|
||||
} from '@kbn/data-plugin/common';
|
||||
import type { ISearchStart } from '@kbn/data-plugin/public';
|
||||
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import {
|
||||
MAX_PERCENT,
|
||||
PERCENTILE_SPACING,
|
||||
SAMPLER_TOP_TERMS_SHARD_SIZE,
|
||||
SAMPLER_TOP_TERMS_THRESHOLD,
|
||||
} from './constants';
|
||||
import type { Aggs, FieldStatsCommonRequestParams } from '../../../../../common/types/field_stats';
|
||||
import { processTopValues } from './utils';
|
||||
import { isDefined } from '../../../common/util/is_defined';
|
||||
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
|
||||
import { MAX_PERCENT, PERCENTILE_SPACING, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
|
||||
import type {
|
||||
Aggs,
|
||||
Bucket,
|
||||
FieldStatsCommonRequestParams,
|
||||
} from '../../../../../common/types/field_stats';
|
||||
import type {
|
||||
Field,
|
||||
NumericFieldStats,
|
||||
Bucket,
|
||||
FieldStatsError,
|
||||
} from '../../../../../common/types/field_stats';
|
||||
import { processDistributionData } from '../../utils/process_distribution_data';
|
||||
import { extractErrorProperties } from '../../utils/error_utils';
|
||||
import { isIKibanaSearchResponse } from '../../../../../common/types/field_stats';
|
||||
import {
|
||||
isIKibanaSearchResponse,
|
||||
isNormalSamplingOption,
|
||||
} from '../../../../../common/types/field_stats';
|
||||
|
||||
export const getNumericFieldsStatsRequest = (
|
||||
params: FieldStatsCommonRequestParams,
|
||||
fields: Field[]
|
||||
) => {
|
||||
const { index, query, runtimeFieldMap, samplerShardSize } = params;
|
||||
const { index, query, runtimeFieldMap } = params;
|
||||
|
||||
const size = 0;
|
||||
|
||||
|
@ -83,23 +86,12 @@ export const getNumericFieldsStatsRequest = (
|
|||
} as AggregationsTermsAggregation,
|
||||
};
|
||||
|
||||
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
|
||||
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
|
||||
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
|
||||
aggs[`${safeFieldName}_top`] = buildSamplerAggregation(
|
||||
{
|
||||
top,
|
||||
},
|
||||
0.05
|
||||
);
|
||||
} else {
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
}
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
});
|
||||
|
||||
const searchBody = {
|
||||
query,
|
||||
aggs: buildSamplerAggregation(aggs, samplerShardSize),
|
||||
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
|
||||
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
|
||||
};
|
||||
|
||||
|
@ -132,7 +124,7 @@ export const fetchNumericFieldsStats = (
|
|||
if (!isIKibanaSearchResponse(resp)) return resp;
|
||||
|
||||
const aggregations = resp.rawResponse.aggregations;
|
||||
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
|
||||
const aggsPath = ['sample'];
|
||||
|
||||
const batchStats: NumericFieldStats[] = [];
|
||||
|
||||
|
@ -154,28 +146,23 @@ export const fetchNumericFieldsStats = (
|
|||
topAggsPath.push('top');
|
||||
}
|
||||
|
||||
const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
|
||||
const fieldAgg = get(aggregations, [...topAggsPath], {}) as { buckets: Bucket[] };
|
||||
const { topValuesSampleSize, topValues } = processTopValues(fieldAgg);
|
||||
|
||||
const stats: NumericFieldStats = {
|
||||
fieldName: field.fieldName,
|
||||
count: docCount,
|
||||
min: get(fieldStatsResp, 'min', 0),
|
||||
max: get(fieldStatsResp, 'max', 0),
|
||||
avg: get(fieldStatsResp, 'avg', 0),
|
||||
isTopValuesSampled:
|
||||
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
|
||||
isNormalSamplingOption(params.samplingOption) ||
|
||||
(isDefined(params.samplingProbability) && params.samplingProbability < 1),
|
||||
topValues,
|
||||
topValuesSampleSize: topValues.reduce(
|
||||
(acc, curr) => acc + curr.doc_count,
|
||||
get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
|
||||
),
|
||||
topValuesSamplerShardSize:
|
||||
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
|
||||
? SAMPLER_TOP_TERMS_SHARD_SIZE
|
||||
: samplerShardSize,
|
||||
topValuesSampleSize,
|
||||
topValuesSamplerShardSize: get(aggregations, ['sample', 'doc_count']),
|
||||
};
|
||||
|
||||
if (stats.count > 0) {
|
||||
if (docCount > 0) {
|
||||
const percentiles = get(
|
||||
aggregations,
|
||||
[...aggsPath, `${safeFieldName}_percentiles`, 'values'],
|
||||
|
|
|
@ -15,12 +15,12 @@ import type {
|
|||
ISearchOptions,
|
||||
ISearchStart,
|
||||
} from '@kbn/data-plugin/public';
|
||||
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { SAMPLER_TOP_TERMS_SHARD_SIZE, SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
|
||||
import { processTopValues } from './utils';
|
||||
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
|
||||
import { SAMPLER_TOP_TERMS_THRESHOLD } from './constants';
|
||||
import type {
|
||||
Aggs,
|
||||
Bucket,
|
||||
Field,
|
||||
FieldStatsCommonRequestParams,
|
||||
StringFieldStats,
|
||||
|
@ -32,7 +32,7 @@ export const getStringFieldStatsRequest = (
|
|||
params: FieldStatsCommonRequestParams,
|
||||
fields: Field[]
|
||||
) => {
|
||||
const { index, query, runtimeFieldMap, samplerShardSize } = params;
|
||||
const { index, query, runtimeFieldMap } = params;
|
||||
|
||||
const size = 0;
|
||||
|
||||
|
@ -49,25 +49,12 @@ export const getStringFieldStatsRequest = (
|
|||
} as AggregationsTermsAggregation,
|
||||
};
|
||||
|
||||
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
|
||||
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
|
||||
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
|
||||
aggs[`${safeFieldName}_top`] = {
|
||||
sampler: {
|
||||
shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
|
||||
},
|
||||
aggs: {
|
||||
top,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
}
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
});
|
||||
|
||||
const searchBody = {
|
||||
query,
|
||||
aggs: buildSamplerAggregation(aggs, samplerShardSize),
|
||||
aggs: buildAggregationWithSamplingOption(aggs, params.samplingOption),
|
||||
...(isPopulatedObject(runtimeFieldMap) ? { runtime_mappings: runtimeFieldMap } : {}),
|
||||
};
|
||||
|
||||
|
@ -99,7 +86,8 @@ export const fetchStringFieldsStats = (
|
|||
map((resp) => {
|
||||
if (!isIKibanaSearchResponse(resp)) return resp;
|
||||
const aggregations = resp.rawResponse.aggregations;
|
||||
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
|
||||
|
||||
const aggsPath = ['sample'];
|
||||
const batchStats: StringFieldStats[] = [];
|
||||
|
||||
fields.forEach((field, i) => {
|
||||
|
@ -110,21 +98,18 @@ export const fetchStringFieldsStats = (
|
|||
topAggsPath.push('top');
|
||||
}
|
||||
|
||||
const topValues: Bucket[] = get(aggregations, [...topAggsPath, 'buckets'], []);
|
||||
const fieldAgg = get(aggregations, [...topAggsPath], {});
|
||||
|
||||
const { topValuesSampleSize, topValues } = processTopValues(
|
||||
fieldAgg,
|
||||
get(aggregations, ['sample', 'doc_count'])
|
||||
);
|
||||
const stats = {
|
||||
fieldName: field.fieldName,
|
||||
isTopValuesSampled:
|
||||
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD || samplerShardSize > 0,
|
||||
isTopValuesSampled: true,
|
||||
topValues,
|
||||
topValuesSampleSize: topValues.reduce(
|
||||
(acc, curr) => acc + curr.doc_count,
|
||||
get(aggregations, [...topAggsPath, 'sum_other_doc_count'], 0)
|
||||
),
|
||||
topValuesSamplerShardSize:
|
||||
field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD
|
||||
? SAMPLER_TOP_TERMS_SHARD_SIZE
|
||||
: samplerShardSize,
|
||||
topValuesSampleSize,
|
||||
topValuesSamplerShardSize: get(aggregations, ['sample', 'doc_count']),
|
||||
};
|
||||
|
||||
batchStats.push(stats);
|
||||
|
|
|
@ -10,21 +10,21 @@ import { get } from 'lodash';
|
|||
import { Query } from '@kbn/es-query';
|
||||
import { IKibanaSearchResponse } from '@kbn/data-plugin/common';
|
||||
import type { AggCardinality } from '@kbn/ml-agg-utils';
|
||||
import { buildSamplerAggregation, getSamplerAggregationsResponsePath } from '@kbn/ml-agg-utils';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { buildAggregationWithSamplingOption } from './build_random_sampler_agg';
|
||||
import {
|
||||
buildBaseFilterCriteria,
|
||||
getSafeAggregationName,
|
||||
} from '../../../../../common/utils/query_utils';
|
||||
import { getDatafeedAggregations } from '../../../../../common/utils/datafeed_utils';
|
||||
import { AggregatableField, NonAggregatableField } from '../../types/overall_stats';
|
||||
import { Aggs } from '../../../../../common/types/field_stats';
|
||||
import { Aggs, SamplingOption } from '../../../../../common/types/field_stats';
|
||||
|
||||
export const checkAggregatableFieldsExistRequest = (
|
||||
dataViewTitle: string,
|
||||
query: Query['query'],
|
||||
aggregatableFields: string[],
|
||||
samplerShardSize: number,
|
||||
samplingOption: SamplingOption,
|
||||
timeFieldName: string | undefined,
|
||||
earliestMs?: number,
|
||||
latestMs?: number,
|
||||
|
@ -73,7 +73,9 @@ export const checkAggregatableFieldsExistRequest = (
|
|||
filter: filterCriteria,
|
||||
},
|
||||
},
|
||||
...(isPopulatedObject(aggs) ? { aggs: buildSamplerAggregation(aggs, samplerShardSize) } : {}),
|
||||
...(isPopulatedObject(aggs)
|
||||
? { aggs: buildAggregationWithSamplingOption(aggs, samplingOption) }
|
||||
: {}),
|
||||
...(isPopulatedObject(combinedRuntimeMappings)
|
||||
? { runtime_mappings: combinedRuntimeMappings }
|
||||
: {}),
|
||||
|
@ -109,8 +111,6 @@ export function isNonAggregatableFieldOverallStats(
|
|||
export const processAggregatableFieldsExistResponse = (
|
||||
responses: AggregatableFieldOverallStats[] | undefined,
|
||||
aggregatableFields: string[],
|
||||
samplerShardSize: number,
|
||||
totalCount: number,
|
||||
datafeedConfig?: estypes.MlDatafeed
|
||||
) => {
|
||||
const stats = {
|
||||
|
@ -123,12 +123,17 @@ export const processAggregatableFieldsExistResponse = (
|
|||
responses.forEach(({ rawResponse: body, aggregatableFields: aggregatableFieldsChunk }) => {
|
||||
const aggregations = body.aggregations;
|
||||
|
||||
const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize);
|
||||
const sampleCount =
|
||||
samplerShardSize > 0 ? get(aggregations, ['sample', 'doc_count'], 0) : totalCount;
|
||||
const aggsPath = ['sample'];
|
||||
const sampleCount = aggregations.sample.doc_count;
|
||||
aggregatableFieldsChunk.forEach((field, i) => {
|
||||
const safeFieldName = getSafeAggregationName(field, i);
|
||||
// Sampler agg will yield doc_count that's bigger than the actual # of sampled records
|
||||
// because it uses the stored _doc_count if available
|
||||
// https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
|
||||
// therefore we need to correct it by multiplying by the sampled probability
|
||||
const count = get(aggregations, [...aggsPath, `${safeFieldName}_count`, 'doc_count'], 0);
|
||||
const multiplier =
|
||||
count > sampleCount ? get(aggregations, [...aggsPath, 'probability'], 1) : 1;
|
||||
if (count > 0) {
|
||||
const cardinality = get(
|
||||
aggregations,
|
||||
|
@ -140,7 +145,7 @@ export const processAggregatableFieldsExistResponse = (
|
|||
existsInDocs: true,
|
||||
stats: {
|
||||
sampleCount,
|
||||
count,
|
||||
count: count * multiplier,
|
||||
cardinality,
|
||||
},
|
||||
});
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { Bucket } from '../../../../../common/types/field_stats';
|
||||
|
||||
/** Utility to calculate the correct sample size, whether or not _doc_count is set
|
||||
* and calculate the percentage (in fraction) for each bucket
|
||||
* https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-doc-count-field.html
|
||||
* @param aggResult
|
||||
*/
|
||||
export const processTopValues = (aggResult: object, sampledCount?: number) => {
|
||||
const topValuesBuckets: Bucket[] = isPopulatedObject<'buckets', Bucket[]>(aggResult, ['buckets'])
|
||||
? aggResult.buckets
|
||||
: [];
|
||||
const sumOtherDocCount = isPopulatedObject<'sum_other_doc_count', number>(aggResult, [
|
||||
'sum_other_doc_count',
|
||||
])
|
||||
? aggResult.sum_other_doc_count
|
||||
: 0;
|
||||
const valuesInTopBuckets =
|
||||
topValuesBuckets?.reduce((prev, bucket) => bucket.doc_count + prev, 0) || 0;
|
||||
// We could use `aggregations.sample.sample_count.value` instead, but it does not always give a correct sum
|
||||
// See Github issue #144625
|
||||
const realNumberOfDocuments = valuesInTopBuckets + sumOtherDocCount;
|
||||
const topValues = topValuesBuckets.map((bucket) => ({
|
||||
...bucket,
|
||||
doc_count: sampledCount
|
||||
? Math.floor(bucket.doc_count * (sampledCount / realNumberOfDocuments))
|
||||
: bucket.doc_count,
|
||||
percent: bucket.doc_count / realNumberOfDocuments,
|
||||
}));
|
||||
|
||||
return {
|
||||
topValuesSampleSize: realNumberOfDocuments,
|
||||
topValues,
|
||||
};
|
||||
};
|
|
@ -9,7 +9,6 @@ import type { Filter } from '@kbn/es-query';
|
|||
import type { Query } from '@kbn/data-plugin/common/query';
|
||||
import type { RandomSamplerOption } from '../constants/random_sampler';
|
||||
import type { SearchQueryLanguage } from './combined_query';
|
||||
|
||||
export interface ListingPageUrlState {
|
||||
pageSize: number;
|
||||
pageIndex: number;
|
||||
|
|
|
@ -656,20 +656,7 @@ export class DataVisualizer {
|
|||
},
|
||||
};
|
||||
|
||||
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
|
||||
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
|
||||
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
|
||||
aggs[`${safeFieldName}_top`] = {
|
||||
sampler: {
|
||||
shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
|
||||
},
|
||||
aggs: {
|
||||
top,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
}
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
});
|
||||
|
||||
const searchBody = {
|
||||
|
@ -782,20 +769,7 @@ export class DataVisualizer {
|
|||
},
|
||||
};
|
||||
|
||||
// If cardinality >= SAMPLE_TOP_TERMS_THRESHOLD, run the top terms aggregation
|
||||
// in a sampler aggregation, even if no sampling has been specified (samplerShardSize < 1).
|
||||
if (samplerShardSize < 1 && field.cardinality >= SAMPLER_TOP_TERMS_THRESHOLD) {
|
||||
aggs[`${safeFieldName}_top`] = {
|
||||
sampler: {
|
||||
shard_size: SAMPLER_TOP_TERMS_SHARD_SIZE,
|
||||
},
|
||||
aggs: {
|
||||
top,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
}
|
||||
aggs[`${safeFieldName}_top`] = top;
|
||||
});
|
||||
|
||||
const searchBody = {
|
||||
|
|
|
@ -9941,8 +9941,6 @@
|
|||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent} % des documents ont des valeurs comprises entre {minValFormatted} et {maxValFormatted}",
|
||||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent} % des documents ont une valeur de {valFormatted}",
|
||||
"xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "Exclure le {fieldName} : \"{value}\"",
|
||||
"xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "Calculé à partir d'un échantillon de {topValuesSamplerShardSize} documents par partition",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "Calculé à partir d'un échantillon de {topValuesSamplerShardSize} documents par partition",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "Affichage de {minPercent} - {maxPercent} centiles",
|
||||
"xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "Il peut être rempli, par exemple, à l'aide d'un paramètre {copyToParam} dans le mapping du document ou être réduit à partir du champ {sourceParam} après une indexation par l'utilisation des paramètres {includesParam} et {excludesParam}.",
|
||||
"xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "Ce champ n'était pas présent dans le champ {sourceParam} des documents interrogés.",
|
||||
|
@ -9980,7 +9978,6 @@
|
|||
"xpack.dataVisualizer.nameCollisionMsg": "\"{name}\" existe déjà, veuillez fournir un nom unique",
|
||||
"xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "Probabilité utilisée : {samplingProbability} %",
|
||||
"xpack.dataVisualizer.searchPanel.ofFieldsTotal": "sur un total de {totalCount}",
|
||||
"xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "Taille de l'échantillon (par partition) : {wrappedValue}",
|
||||
"xpack.dataVisualizer.searchPanel.totalDocCountLabel": "Total des documents : {prepend}{strongTotalCount}",
|
||||
"xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
|
||||
"xpack.dataVisualizer.addCombinedFieldsLabel": "Ajouter un champ combiné",
|
||||
|
@ -10013,8 +10010,6 @@
|
|||
"xpack.dataVisualizer.dataGrid.field.loadingLabel": "Chargement",
|
||||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "distribution",
|
||||
"xpack.dataVisualizer.dataGrid.field.topValuesLabel": "Valeurs les plus élevées",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "faux",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "vrai",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "compte",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "valeurs distinctes",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "Statistiques des documents",
|
||||
|
@ -10259,13 +10254,10 @@
|
|||
"xpack.dataVisualizer.removeCombinedFieldsLabel": "Retirer le champ combiné",
|
||||
"xpack.dataVisualizer.samplingOptionsButton": "Options d’échantillonnage",
|
||||
"xpack.dataVisualizer.searchPanel.allFieldsLabel": "Tous les champs",
|
||||
"xpack.dataVisualizer.searchPanel.allOptionLabel": "Tout rechercher",
|
||||
"xpack.dataVisualizer.searchPanel.invalidSyntax": "Syntaxe non valide",
|
||||
"xpack.dataVisualizer.searchPanel.numberFieldsLabel": "Champs de numéros",
|
||||
"xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "La sélection d'une taille d'échantillon plus petite réduira les temps d'exécution de la requête et la charge sur le cluster.",
|
||||
"xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "Rechercher… (par exemple, status:200 AND extension:\"PHP\")",
|
||||
"xpack.dataVisualizer.searchPanel.randomSamplerMessage": "Des valeurs approximatives sont indiquées dans le décompte de documents et le graphique, qui utilisent des agrégations par échantillonnage aléatoire.",
|
||||
"xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "Sélectionner le nombre de documents à échantillonner",
|
||||
"xpack.dataVisualizer.searchPanel.showEmptyFields": "Afficher les champs vides",
|
||||
"xpack.dataVisualizer.title": "Charger un fichier",
|
||||
"xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "Le panneau comporte {count} recherches",
|
||||
|
|
|
@ -9928,8 +9928,6 @@
|
|||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent}% のドキュメントに {minValFormatted} から {maxValFormatted} の間の値があります",
|
||||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent}% のドキュメントに {valFormatted} の値があります",
|
||||
"xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "{fieldName}の除外:\"{value}\"",
|
||||
"xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "1 つのシャードにつき {topValuesSamplerShardSize} のドキュメントのサンプルで計算されています",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "1 つのシャードにつき {topValuesSamplerShardSize} のドキュメントのサンプルで計算されています",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "{minPercent} - {maxPercent} パーセンタイルを表示中",
|
||||
"xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "たとえば、ドキュメントマッピングで {copyToParam} パラメーターを使ったり、{includesParam} と {excludesParam} パラメーターを使用してインデックスした後に {sourceParam} フィールドから切り取ったりして入力される場合があります。",
|
||||
"xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "このフィールドはクエリが実行されたドキュメントの {sourceParam} フィールドにありませんでした。",
|
||||
|
@ -9966,7 +9964,6 @@
|
|||
"xpack.dataVisualizer.nameCollisionMsg": "「{name}」はすでに存在します。一意の名前を入力してください。",
|
||||
"xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "使用された確率:{samplingProbability}%",
|
||||
"xpack.dataVisualizer.searchPanel.ofFieldsTotal": "合計 {totalCount}",
|
||||
"xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "サンプルサイズ(シャード単位):{wrappedValue}",
|
||||
"xpack.dataVisualizer.searchPanel.totalDocCountLabel": "合計ドキュメント数:{prepend}{strongTotalCount}",
|
||||
"xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
|
||||
"xpack.dataVisualizer.addCombinedFieldsLabel": "結合されたフィールドを追加",
|
||||
|
@ -9999,8 +9996,6 @@
|
|||
"xpack.dataVisualizer.dataGrid.field.loadingLabel": "読み込み中",
|
||||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "分布",
|
||||
"xpack.dataVisualizer.dataGrid.field.topValuesLabel": "トップの値",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "false",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "true",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "カウント",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "固有の値",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "ドキュメント統計情報",
|
||||
|
@ -10245,13 +10240,10 @@
|
|||
"xpack.dataVisualizer.removeCombinedFieldsLabel": "結合されたフィールドを削除",
|
||||
"xpack.dataVisualizer.samplingOptionsButton": "抽出オプション",
|
||||
"xpack.dataVisualizer.searchPanel.allFieldsLabel": "すべてのフィールド",
|
||||
"xpack.dataVisualizer.searchPanel.allOptionLabel": "すべて検索",
|
||||
"xpack.dataVisualizer.searchPanel.invalidSyntax": "無効な構文",
|
||||
"xpack.dataVisualizer.searchPanel.numberFieldsLabel": "数値フィールド",
|
||||
"xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "小さいサンプルサイズを選択することで、クエリの実行時間を短縮しクラスターへの負荷を軽減できます。",
|
||||
"xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "検索…(例:status:200 AND extension:\"PHP\")",
|
||||
"xpack.dataVisualizer.searchPanel.randomSamplerMessage": "近似値は、ランダムサンプラーアグリゲーションを使用する、合計ドキュメント数およびグラフに表示されます。",
|
||||
"xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "サンプリングするドキュメント数を選択してください",
|
||||
"xpack.dataVisualizer.searchPanel.showEmptyFields": "空のフィールドを表示",
|
||||
"xpack.dataVisualizer.title": "ファイルをアップロード",
|
||||
"xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "パネルには{count}個のドリルダウンがあります",
|
||||
|
|
|
@ -9946,8 +9946,6 @@
|
|||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueBetweenLabel": "{percent}% 的文档具有介于 {minValFormatted} 和 {maxValFormatted} 之间的值",
|
||||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.tooltipValueEqualLabel": "{percent}% 的文档的值为 {valFormatted}",
|
||||
"xpack.dataVisualizer.dataGrid.field.removeFilterAriaLabel": "筛除 {fieldName}:“{value}”",
|
||||
"xpack.dataVisualizer.dataGrid.field.topValues.calculatedFromSampleDescription": "基于每个分片的 {topValuesSamplerShardSize} 文档样例计算",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.choroplethMapTopValues.calculatedFromSampleDescription": "基于每个分片的 {topValuesSamplerShardSize} 文档样例计算",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.numberContent.displayingPercentilesLabel": "正在显示 {minPercent} - {maxPercent} 百分位数",
|
||||
"xpack.dataVisualizer.dataGrid.fieldText.fieldMayBePopulatedDescription": "例如,可以使用文档映射中的 {copyToParam} 参数进行填充,也可以在索引后通过使用 {includesParam} 和 {excludesParam} 参数从 {sourceParam} 字段中修剪。",
|
||||
"xpack.dataVisualizer.dataGrid.fieldText.fieldNotPresentDescription": "查询的文档的 {sourceParam} 字段中不存在此字段。",
|
||||
|
@ -9985,7 +9983,6 @@
|
|||
"xpack.dataVisualizer.nameCollisionMsg": "“{name}”已存在,请提供唯一名称",
|
||||
"xpack.dataVisualizer.randomSamplerSettingsPopUp.probabilityLabel": "使用的概率:{samplingProbability}%",
|
||||
"xpack.dataVisualizer.searchPanel.ofFieldsTotal": ",共 {totalCount} 个",
|
||||
"xpack.dataVisualizer.searchPanel.sampleSizeOptionLabel": "样本大小(每分片):{wrappedValue}",
|
||||
"xpack.dataVisualizer.searchPanel.totalDocCountLabel": "文档总数:{prepend}{strongTotalCount}",
|
||||
"xpack.dataVisualizer.searchPanel.totalDocCountNumber": "{totalCount, plural, other {#}}",
|
||||
"xpack.dataVisualizer.addCombinedFieldsLabel": "添加组合字段",
|
||||
|
@ -10018,8 +10015,6 @@
|
|||
"xpack.dataVisualizer.dataGrid.field.loadingLabel": "正在加载",
|
||||
"xpack.dataVisualizer.dataGrid.field.metricDistributionChart.seriesName": "分布",
|
||||
"xpack.dataVisualizer.dataGrid.field.topValuesLabel": "排名最前值",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.falseCountLabel": "false",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.booleanContent.trueCountLabel": "true",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.countLabel": "计数",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.distinctValueLabel": "不同值",
|
||||
"xpack.dataVisualizer.dataGrid.fieldExpandedRow.documentStatsTable.metaTableTitle": "文档统计",
|
||||
|
@ -10264,13 +10259,10 @@
|
|||
"xpack.dataVisualizer.removeCombinedFieldsLabel": "移除组合字段",
|
||||
"xpack.dataVisualizer.samplingOptionsButton": "采样选项",
|
||||
"xpack.dataVisualizer.searchPanel.allFieldsLabel": "所有字段",
|
||||
"xpack.dataVisualizer.searchPanel.allOptionLabel": "搜索全部",
|
||||
"xpack.dataVisualizer.searchPanel.invalidSyntax": "语法无效",
|
||||
"xpack.dataVisualizer.searchPanel.numberFieldsLabel": "字段数目",
|
||||
"xpack.dataVisualizer.searchPanel.queryBarPlaceholder": "选择较小的样例大小将减少查询运行时间和集群上的负载。",
|
||||
"xpack.dataVisualizer.searchPanel.queryBarPlaceholderText": "搜索……(例如,status:200 AND extension:\"PHP\")",
|
||||
"xpack.dataVisualizer.searchPanel.randomSamplerMessage": "总文档计数和图表中将显示近似值,它们使用随机采样器聚合。",
|
||||
"xpack.dataVisualizer.searchPanel.sampleSizeAriaLabel": "选择要采样的文档数目",
|
||||
"xpack.dataVisualizer.searchPanel.showEmptyFields": "显示空字段",
|
||||
"xpack.dataVisualizer.title": "上传文件",
|
||||
"xpack.embeddableEnhanced.actions.panelNotifications.manyDrilldowns": "面板有 {count} 个向下钻取",
|
||||
|
|
|
@ -14,7 +14,7 @@ import {
|
|||
farequoteKQLSearchTestData,
|
||||
farequoteLuceneSearchTestData,
|
||||
sampleLogTestData,
|
||||
} from './index_test_data';
|
||||
} from './index_test_data_random_sampler';
|
||||
|
||||
export default function ({ getPageObject, getService }: FtrProviderContext) {
|
||||
const headerPage = getPageObject('header');
|
||||
|
@ -62,7 +62,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
|
|||
}
|
||||
|
||||
await ml.dataVisualizerTable.assertSearchPanelExist();
|
||||
await ml.dataVisualizerTable.assertSampleSizeInputExists();
|
||||
await ml.dataVisualizerTable.assertFieldTypeInputExists();
|
||||
await ml.dataVisualizerTable.assertFieldNameInputExists();
|
||||
|
||||
|
@ -113,18 +112,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
|
|||
);
|
||||
}
|
||||
|
||||
await ml.testExecution.logTestStep(
|
||||
`${testData.suiteTitle} sample size control changes non-metric fields`
|
||||
);
|
||||
for (const sampleSizeCase of testData.sampleSizeValidations) {
|
||||
const { size, expected } = sampleSizeCase;
|
||||
await ml.dataVisualizerTable.setSampleSizeInputValue(
|
||||
size,
|
||||
expected.field,
|
||||
expected.docCountFormatted
|
||||
);
|
||||
}
|
||||
|
||||
await ml.testExecution.logTestStep('sets and resets field type filter correctly');
|
||||
await ml.dataVisualizerTable.setFieldTypeFilter(
|
||||
testData.fieldTypeFilters,
|
||||
|
|
|
@ -63,7 +63,7 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 11,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '86,274 (100%)',
|
||||
viewableInLens: true,
|
||||
hasActionMenu: true,
|
||||
},
|
||||
|
@ -92,7 +92,7 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '86,274 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
|
@ -153,7 +153,6 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
}
|
||||
|
||||
await ml.dataVisualizerTable.assertSearchPanelExist();
|
||||
await ml.dataVisualizerTable.assertSampleSizeInputExists();
|
||||
await ml.dataVisualizerTable.assertFieldTypeInputExists();
|
||||
await ml.dataVisualizerTable.assertFieldNameInputExists();
|
||||
|
||||
|
|
|
@ -15,8 +15,8 @@ export const farequoteDataViewTestData: TestData = {
|
|||
fieldNameFilters: ['airline', '@timestamp'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.KEYWORD],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
totalDocCountFormatted: '86,274',
|
||||
|
@ -27,7 +27,7 @@ export const farequoteDataViewTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
|
@ -40,7 +40,7 @@ export const farequoteDataViewTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
|
@ -61,7 +61,7 @@ export const farequoteDataViewTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -71,7 +71,7 @@ export const farequoteDataViewTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 11,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -91,7 +91,7 @@ export const farequoteDataViewTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
|
@ -112,8 +112,8 @@ export const farequoteKQLSearchTestData: TestData = {
|
|||
fieldNameFilters: ['@version'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
totalDocCountFormatted: '34,415',
|
||||
|
@ -124,7 +124,7 @@ export const farequoteKQLSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
|
@ -137,7 +137,7 @@ export const farequoteKQLSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
|
@ -158,7 +158,7 @@ export const farequoteKQLSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -168,7 +168,7 @@ export const farequoteKQLSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 5,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -188,7 +188,7 @@ export const farequoteKQLSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
|
@ -209,8 +209,8 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
|
|||
fieldNameFilters: ['@version'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
filters: [{ key: 'airline', value: 'ASA' }],
|
||||
|
@ -222,7 +222,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
|
@ -235,7 +235,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
|
@ -256,7 +256,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -267,7 +267,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
|
|||
loading: false,
|
||||
exampleCount: 1,
|
||||
exampleContent: ['ASA'],
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -287,7 +287,7 @@ export const farequoteKQLFiltersSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
|
@ -308,8 +308,8 @@ export const farequoteLuceneSearchTestData: TestData = {
|
|||
fieldNameFilters: ['@version.keyword', 'type'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
totalDocCountFormatted: '34,416',
|
||||
|
@ -320,7 +320,7 @@ export const farequoteLuceneSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
|
@ -333,7 +333,7 @@ export const farequoteLuceneSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
|
@ -354,7 +354,7 @@ export const farequoteLuceneSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -364,7 +364,7 @@ export const farequoteLuceneSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 5,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -384,7 +384,7 @@ export const farequoteLuceneSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
|
@ -405,8 +405,8 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
|
|||
fieldNameFilters: ['@version.keyword', 'type'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5000 (100%)' } },
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
filters: [{ key: 'airline', value: 'ASA' }],
|
||||
|
@ -418,7 +418,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
|
@ -431,7 +431,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
|
|||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
|
@ -452,7 +452,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -463,7 +463,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
|
|||
loading: false,
|
||||
exampleCount: 1,
|
||||
exampleContent: ['ASA'],
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
|
@ -483,7 +483,7 @@ export const farequoteLuceneFiltersSearchTestData: TestData = {
|
|||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5000 (100%)',
|
||||
docCountFormatted: '5,000 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
|
|
|
@ -0,0 +1,535 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { ML_JOB_FIELD_TYPES } from '@kbn/ml-plugin/common/constants/field_types';
|
||||
import { TestData } from './types';
|
||||
|
||||
export const farequoteDataViewTestData: TestData = {
|
||||
suiteTitle: 'farequote index pattern',
|
||||
isSavedSearch: false,
|
||||
sourceIndexOrSavedSearch: 'ft_farequote',
|
||||
fieldNameFilters: ['airline', '@timestamp'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.KEYWORD],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
totalDocCountFormatted: '86,274',
|
||||
metricFields: [
|
||||
{
|
||||
fieldName: 'responsetime',
|
||||
type: ML_JOB_FIELD_TYPES.NUMBER,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '86,274 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
nonMetricFields: [
|
||||
{
|
||||
fieldName: '@timestamp',
|
||||
type: ML_JOB_FIELD_TYPES.DATE,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '86,274 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: '@version',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: '@version.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '86,274 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 11,
|
||||
docCountFormatted: '86,274 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'type',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: 'type.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '86,274 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
emptyFields: ['sourcetype'],
|
||||
visibleMetricFieldsCount: 1,
|
||||
totalMetricFieldsCount: 1,
|
||||
populatedFieldsCount: 7,
|
||||
totalFieldsCount: 8,
|
||||
fieldNameFiltersResultCount: 2,
|
||||
fieldTypeFiltersResultCount: 3,
|
||||
},
|
||||
};
|
||||
|
||||
export const farequoteKQLSearchTestData: TestData = {
|
||||
suiteTitle: 'KQL saved search',
|
||||
isSavedSearch: true,
|
||||
sourceIndexOrSavedSearch: 'ft_farequote_kuery',
|
||||
fieldNameFilters: ['@version'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
totalDocCountFormatted: '34,415',
|
||||
metricFields: [
|
||||
{
|
||||
fieldName: 'responsetime',
|
||||
type: ML_JOB_FIELD_TYPES.NUMBER,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '34,415 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
nonMetricFields: [
|
||||
{
|
||||
fieldName: '@timestamp',
|
||||
type: ML_JOB_FIELD_TYPES.DATE,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '34,415 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: '@version',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: '@version.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '34,415 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 5,
|
||||
docCountFormatted: '34,415 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'type',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: 'type.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '34,415 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
emptyFields: ['sourcetype'],
|
||||
visibleMetricFieldsCount: 1,
|
||||
totalMetricFieldsCount: 1,
|
||||
populatedFieldsCount: 7,
|
||||
totalFieldsCount: 8,
|
||||
fieldNameFiltersResultCount: 1,
|
||||
fieldTypeFiltersResultCount: 3,
|
||||
},
|
||||
};
|
||||
|
||||
export const farequoteKQLFiltersSearchTestData: TestData = {
|
||||
suiteTitle: 'KQL saved search and filters',
|
||||
isSavedSearch: true,
|
||||
sourceIndexOrSavedSearch: 'ft_farequote_filter_and_kuery',
|
||||
fieldNameFilters: ['@version'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.DATE, ML_JOB_FIELD_TYPES.TEXT],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
filters: [{ key: 'airline', value: 'ASA' }],
|
||||
totalDocCountFormatted: '5,674',
|
||||
metricFields: [
|
||||
{
|
||||
fieldName: 'responsetime',
|
||||
type: ML_JOB_FIELD_TYPES.NUMBER,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5,674 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
nonMetricFields: [
|
||||
{
|
||||
fieldName: '@timestamp',
|
||||
type: ML_JOB_FIELD_TYPES.DATE,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5,674 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: '@version',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: '@version.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5,674 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
exampleContent: ['ASA'],
|
||||
docCountFormatted: '5,674 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'type',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: 'type.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5,674 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
emptyFields: ['sourcetype'],
|
||||
visibleMetricFieldsCount: 1,
|
||||
totalMetricFieldsCount: 1,
|
||||
populatedFieldsCount: 7,
|
||||
totalFieldsCount: 8,
|
||||
fieldNameFiltersResultCount: 1,
|
||||
fieldTypeFiltersResultCount: 3,
|
||||
},
|
||||
};
|
||||
|
||||
export const farequoteLuceneSearchTestData: TestData = {
|
||||
suiteTitle: 'lucene saved search',
|
||||
isSavedSearch: true,
|
||||
sourceIndexOrSavedSearch: 'ft_farequote_lucene',
|
||||
fieldNameFilters: ['@version.keyword', 'type'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
totalDocCountFormatted: '34,416',
|
||||
metricFields: [
|
||||
{
|
||||
fieldName: 'responsetime',
|
||||
type: ML_JOB_FIELD_TYPES.NUMBER,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '34,416 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
nonMetricFields: [
|
||||
{
|
||||
fieldName: '@timestamp',
|
||||
type: ML_JOB_FIELD_TYPES.DATE,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '34,416 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: '@version',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: '@version.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '34,416 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 5,
|
||||
docCountFormatted: '34,416 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'type',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: 'type.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '34,416 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
emptyFields: ['sourcetype'],
|
||||
visibleMetricFieldsCount: 1,
|
||||
totalMetricFieldsCount: 1,
|
||||
populatedFieldsCount: 7,
|
||||
totalFieldsCount: 8,
|
||||
fieldNameFiltersResultCount: 2,
|
||||
fieldTypeFiltersResultCount: 1,
|
||||
},
|
||||
};
|
||||
|
||||
export const farequoteLuceneFiltersSearchTestData: TestData = {
|
||||
suiteTitle: 'lucene saved search and filter',
|
||||
isSavedSearch: true,
|
||||
sourceIndexOrSavedSearch: 'ft_farequote_filter_and_lucene',
|
||||
fieldNameFilters: ['@version.keyword', 'type'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.NUMBER],
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'airline', docCountFormatted: '1,000 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '5,000 (100%)' } },
|
||||
],
|
||||
expected: {
|
||||
filters: [{ key: 'airline', value: 'ASA' }],
|
||||
totalDocCountFormatted: '5,673',
|
||||
metricFields: [
|
||||
{
|
||||
fieldName: 'responsetime',
|
||||
type: ML_JOB_FIELD_TYPES.NUMBER,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5,673 (100%)',
|
||||
statsMaxDecimalPlaces: 3,
|
||||
topValuesCount: 11,
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
nonMetricFields: [
|
||||
{
|
||||
fieldName: '@timestamp',
|
||||
type: ML_JOB_FIELD_TYPES.DATE,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '5,673 (100%)',
|
||||
exampleCount: 2,
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: '@version',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: '@version.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5,673 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'airline',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
exampleContent: ['ASA'],
|
||||
docCountFormatted: '5,673 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
{
|
||||
fieldName: 'type',
|
||||
type: ML_JOB_FIELD_TYPES.TEXT,
|
||||
existsInDocs: true,
|
||||
aggregatable: false,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '',
|
||||
viewableInLens: false,
|
||||
},
|
||||
{
|
||||
fieldName: 'type.keyword',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
exampleCount: 1,
|
||||
docCountFormatted: '5,673 (100%)',
|
||||
viewableInLens: true,
|
||||
},
|
||||
],
|
||||
emptyFields: ['sourcetype'],
|
||||
visibleMetricFieldsCount: 1,
|
||||
totalMetricFieldsCount: 1,
|
||||
populatedFieldsCount: 7,
|
||||
totalFieldsCount: 8,
|
||||
fieldNameFiltersResultCount: 2,
|
||||
fieldTypeFiltersResultCount: 1,
|
||||
},
|
||||
};
|
||||
|
||||
export const sampleLogTestData: TestData = {
|
||||
suiteTitle: 'geo point field',
|
||||
isSavedSearch: false,
|
||||
sourceIndexOrSavedSearch: 'ft_module_sample_logs',
|
||||
fieldNameFilters: ['geo.coordinates'],
|
||||
fieldTypeFilters: [ML_JOB_FIELD_TYPES.GEO_POINT],
|
||||
rowsPerPage: 50,
|
||||
expected: {
|
||||
totalDocCountFormatted: '408',
|
||||
metricFields: [],
|
||||
// only testing the geo_point fields
|
||||
nonMetricFields: [
|
||||
{
|
||||
fieldName: 'geo.coordinates',
|
||||
type: ML_JOB_FIELD_TYPES.GEO_POINT,
|
||||
existsInDocs: true,
|
||||
aggregatable: true,
|
||||
loading: false,
|
||||
docCountFormatted: '408 (100%)',
|
||||
exampleCount: 10,
|
||||
viewableInLens: false,
|
||||
},
|
||||
],
|
||||
emptyFields: [],
|
||||
visibleMetricFieldsCount: 4,
|
||||
totalMetricFieldsCount: 5,
|
||||
populatedFieldsCount: 35,
|
||||
totalFieldsCount: 36,
|
||||
fieldNameFiltersResultCount: 1,
|
||||
fieldTypeFiltersResultCount: 1,
|
||||
},
|
||||
sampleSizeValidations: [
|
||||
{ size: 1000, expected: { field: 'geo.coordinates', docCountFormatted: '408 (100%)' } },
|
||||
{ size: 5000, expected: { field: '@timestamp', docCountFormatted: '408 (100%)' } },
|
||||
],
|
||||
};
|
|
@ -290,25 +290,6 @@ export function MachineLearningDataVisualizerTableProvider(
|
|||
await testSubjects.existOrFail('dataVisualizerFieldTypeSelect');
|
||||
}
|
||||
|
||||
public async assertSampleSizeInputExists() {
|
||||
await testSubjects.existOrFail('dataVisualizerShardSizeSelect');
|
||||
}
|
||||
|
||||
public async setSampleSizeInputValue(
|
||||
sampleSize: number | 'all',
|
||||
fieldName: string,
|
||||
docCountFormatted: string
|
||||
) {
|
||||
await this.assertSampleSizeInputExists();
|
||||
await testSubjects.clickWhenNotDisabledWithoutRetry('dataVisualizerShardSizeSelect');
|
||||
await testSubjects.existOrFail(`dataVisualizerShardSizeOption ${sampleSize}`);
|
||||
await testSubjects.click(`dataVisualizerShardSizeOption ${sampleSize}`);
|
||||
|
||||
await retry.tryForTime(5000, async () => {
|
||||
await this.assertFieldDocCount(fieldName, docCountFormatted);
|
||||
});
|
||||
}
|
||||
|
||||
public async setFieldTypeFilter(fieldTypes: string[], expectedRowCount = 1) {
|
||||
await this.assertFieldTypeInputExists();
|
||||
await mlCommonUI.setMultiSelectFilter('dataVisualizerFieldTypeSelect', fieldTypes);
|
||||
|
|
|
@ -103,11 +103,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
|
|||
await ml.testExecution.logTestStep('set data visualizer options');
|
||||
await ml.dataVisualizerIndexBased.assertTimeRangeSelectorSectionExists();
|
||||
await ml.dataVisualizerIndexBased.clickUseFullDataButton('14,074');
|
||||
await ml.dataVisualizerTable.setSampleSizeInputValue(
|
||||
'all',
|
||||
'geo.coordinates',
|
||||
'14074 (100%)'
|
||||
);
|
||||
await ml.dataVisualizerTable.setFieldTypeFilter([ML_JOB_FIELD_TYPES.GEO_POINT]);
|
||||
|
||||
await ml.testExecution.logTestStep('set maps options and take screenshot');
|
||||
|
|
|
@ -66,11 +66,6 @@ export default function ({ getPageObject, getService }: FtrProviderContext) {
|
|||
await ml.testExecution.logTestStep('set data visualizer options');
|
||||
await ml.dataVisualizerIndexBased.assertTimeRangeSelectorSectionExists();
|
||||
await ml.dataVisualizerIndexBased.clickUseFullDataButton('14,074');
|
||||
await ml.dataVisualizerTable.setSampleSizeInputValue(
|
||||
'all',
|
||||
'geo.coordinates',
|
||||
'14074 (100%)'
|
||||
);
|
||||
await ml.dataVisualizerTable.setFieldNameFilter(['geo.dest']);
|
||||
|
||||
await ml.testExecution.logTestStep('set maps options and take screenshot');
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue