[ML] Data Frame Analytics: Highlight filtered data in scatterplot charts (#144871)

## Summary

Related meta issue: https://github.com/elastic/kibana/issues/131551

This PR adds functionality to the scatterplot charts to show the full
data sample and, when the user has added a filter/query in the query
bar, the portion of the data reflecting the filter is highlighted so it
can be differentiated from the background data.

Classification results view with query for `AvgTicketPrice > 400`

<img width="1032" alt="image"
src="https://user-images.githubusercontent.com/6446462/200716771-b2012e9b-c620-46a8-9dc3-92df23ef4476.png">

Outlier detection results view with same filter

<img width="1026" alt="image"
src="https://user-images.githubusercontent.com/6446462/200716858-01407906-34de-43d6-892b-7bbfede05eac.png">

Regression results view with same filter

<img width="1007" alt="image"
src="https://user-images.githubusercontent.com/6446462/200716910-41165b81-a300-420c-8976-47a0ea9612bf.png">

Help text:

<img width="1005" alt="image"
src="https://user-images.githubusercontent.com/6446462/201484563-9f4ca87b-3025-485f-ac0e-4a30deee847f.png">




### Checklist

Delete any items that are not applicable to this PR.

- [ ] Any text added follows [EUI's writing
guidelines](https://elastic.github.io/eui/#/guidelines/writing), uses
sentence case text and includes [i18n
support](https://github.com/elastic/kibana/blob/main/packages/kbn-i18n/README.md)
- [ ]
[Documentation](https://www.elastic.co/guide/en/kibana/master/development-documentation.html)
was added for features that require explanation or tutorials
- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] Any UI touched in this PR is usable by keyboard only (learn more
about [keyboard accessibility](https://webaim.org/techniques/keyboard/))
- [ ] Any UI touched in this PR does not create any new axe failures
(run axe in browser:
[FF](https://addons.mozilla.org/en-US/firefox/addon/axe-devtools/),
[Chrome](https://chrome.google.com/webstore/detail/axe-web-accessibility-tes/lhdoppojpmngadmnindnejefpokejbdd?hl=en-US))
- [ ] If a plugin configuration key changed, check if it needs to be
allowlisted in the cloud and added to the [docker
list](https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/resources/base/bin/kibana-docker)
- [ ] This renders correctly on smaller devices using a responsive
layout. (You can test this [in your
browser](https://www.browserstack.com/guide/responsive-testing-on-local-server))
- [ ] This was checked for [cross-browser
compatibility](https://www.elastic.co/support/matrix#matrix_browsers)

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Melissa Alvarez 2022-11-15 10:33:28 -05:00 committed by GitHub
parent 75ce1e397a
commit 295a267dae
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 301 additions and 170 deletions

View file

@ -29,7 +29,6 @@ import { stringHash } from '@kbn/ml-string-hash';
import { extractErrorMessage } from '../../../../common';
import { isRuntimeMappings } from '../../../../common/util/runtime_field_utils';
import { RuntimeMappings } from '../../../../common/types/fields';
import type { ResultsSearchQuery } from '../../data_frame_analytics/common/analytics';
import { getCombinedRuntimeMappings } from '../data_grid';
import { useMlApiContext } from '../../contexts/kibana';
@ -81,13 +80,25 @@ const OptionLabelWithIconTip: FC<OptionLabelWithIconTipProps> = ({ label, toolti
</>
);
function filterChartableItems(items: estypes.SearchHit[], resultsField?: string) {
return (
items
.map((d) =>
getProcessedFields(d.fields ?? {}, (key: string) =>
key.startsWith(`${resultsField}.feature_importance`)
)
)
.filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field]))) ?? []
);
}
export interface ScatterplotMatrixProps {
fields: string[];
index: string;
resultsField?: string;
color?: string;
legendType?: LegendType;
searchQuery?: ResultsSearchQuery;
searchQuery?: estypes.QueryDslQueryContainer;
runtimeMappings?: RuntimeMappings;
indexPattern?: DataView;
}
@ -128,7 +139,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
// contains the fetched documents and columns to be passed on to the Vega spec.
const [splom, setSplom] = useState<
{ items: any[]; columns: string[]; messages: string[] } | undefined
{ items: any[]; backgroundItems: any[]; columns: string[]; messages: string[] } | undefined
>();
// formats the array of field names for EuiComboBox
@ -165,7 +176,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
useEffect(() => {
if (fields.length === 0) {
setSplom({ columns: [], items: [], messages: [] });
setSplom({ columns: [], items: [], backgroundItems: [], messages: [] });
setIsLoading(false);
return;
}
@ -184,7 +195,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
...(includeOutlierScoreField ? [outlierScoreField] : []),
];
const query = randomizeQuery
const foregroundQuery = randomizeQuery
? {
function_score: {
query: searchQuery,
@ -193,33 +204,65 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
}
: searchQuery;
let backgroundQuery;
// If it's not the default query then we do a background search excluding the current query
if (
searchQuery &&
((searchQuery.match_all && Object.keys(searchQuery.match_all).length > 0) ||
(searchQuery.bool && Object.keys(searchQuery.bool).length > 0))
) {
backgroundQuery = randomizeQuery
? {
function_score: {
query: { bool: { must_not: [searchQuery] } },
random_score: { seed: 10, field: '_seq_no' },
},
}
: { bool: { must_not: [searchQuery] } };
}
const combinedRuntimeMappings =
indexPattern && getCombinedRuntimeMappings(indexPattern, runtimeMappings);
const resp: estypes.SearchResponse = await esSearch({
index,
body: {
fields: queryFields,
_source: false,
query,
from: 0,
size: fetchSize,
...(isRuntimeMappings(combinedRuntimeMappings)
? { runtime_mappings: combinedRuntimeMappings }
: {}),
},
});
const body = {
fields: queryFields,
_source: false,
query: foregroundQuery,
from: 0,
size: fetchSize,
...(isRuntimeMappings(combinedRuntimeMappings)
? { runtime_mappings: combinedRuntimeMappings }
: {}),
};
const promises = [
esSearch({
index,
body,
}),
];
if (backgroundQuery) {
promises.push(
esSearch({
index,
body: { ...body, query: backgroundQuery },
})
);
}
const [foregroundResp, backgroundResp] = await Promise.all<estypes.SearchResponse>(
promises
);
if (!options.didCancel) {
const items = resp.hits.hits
.map((d) =>
getProcessedFields(d.fields ?? {}, (key: string) =>
key.startsWith(`${resultsField}.feature_importance`)
)
)
.filter((d) => !Object.keys(d).some((field) => Array.isArray(d[field])));
const items = filterChartableItems(foregroundResp.hits.hits, resultsField);
const backgroundItems = filterChartableItems(
backgroundResp?.hits.hits ?? [],
resultsField
);
const originalDocsCount = resp.hits.hits.length;
const originalDocsCount = foregroundResp.hits.hits.length;
const filteredDocsCount = originalDocsCount - items.length;
if (originalDocsCount === filteredDocsCount) {
@ -229,7 +272,7 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
'All fetched documents included fields with arrays of values and cannot be visualized.',
})
);
} else if (resp.hits.hits.length !== items.length) {
} else if (foregroundResp.hits.hits.length !== items.length) {
messages.push(
i18n.translate('xpack.ml.splom.arrayFieldsWarningMessage', {
defaultMessage:
@ -242,12 +285,17 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
);
}
setSplom({ columns: fields, items, messages });
setSplom({ columns: fields, items, backgroundItems, messages });
setIsLoading(false);
}
} catch (e) {
setIsLoading(false);
setSplom({ columns: [], items: [], messages: [extractErrorMessage(e)] });
setSplom({
columns: [],
items: [],
backgroundItems: [],
messages: [extractErrorMessage(e)],
});
}
}
@ -265,10 +313,11 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
return;
}
const { items, columns } = splom;
const { items, backgroundItems, columns } = splom;
return getScatterplotMatrixVegaLiteSpec(
items,
backgroundItems,
columns,
euiTheme,
resultsField,
@ -409,7 +458,25 @@ export const ScatterplotMatrix: FC<ScatterplotMatrixProps> = ({
</>
)}
{splom.items.length > 0 && <VegaChart vegaSpec={vegaSpec} />}
{splom.items.length > 0 && (
<>
<VegaChart vegaSpec={vegaSpec} />
{splom.backgroundItems.length ? (
<>
<EuiSpacer size="s" />
<EuiFormRow
fullWidth
helpText={i18n.translate('xpack.ml.splom.backgroundLayerHelpText', {
defaultMessage:
"If the data points match your filter, they're shown in color; otherwise, they're blurred gray.",
})}
>
<></>
</EuiFormRow>
</>
) : null}
</>
)}
</div>
)}
</>

View file

@ -73,7 +73,8 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
it('should return the default spec for non-outliers without a legend', () => {
const data = [{ x: 1, y: 1 }];
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, ['x', 'y'], euiThemeLight);
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, [], ['x', 'y'], euiThemeLight);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];
// A valid Vega Lite spec shouldn't throw an error when compiled.
expect(() => compile(vegaLiteSpec)).not.toThrow();
@ -82,17 +83,17 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
expect(specForegroundLayer.data.values).toEqual(data);
expect(specForegroundLayer.mark).toEqual({
opacity: 0.75,
size: 8,
type: 'circle',
});
expect(vegaLiteSpec.spec.encoding.color).toEqual({
expect(specForegroundLayer.encoding.color).toEqual({
condition: [{ selection: USER_SELECTION }, { selection: SINGLE_POINT_CLICK }],
value: COLOR_BLUR,
});
expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([
expect(specForegroundLayer.encoding.tooltip).toEqual([
{ field: 'x', type: 'quantitative' },
{ field: 'y', type: 'quantitative' },
]);
@ -101,7 +102,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
it('should return the spec for outliers', () => {
const data = [{ x: 1, y: 1 }];
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(data, ['x', 'y'], euiThemeLight, 'ml');
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(
data,
[],
['x', 'y'],
euiThemeLight,
'ml'
);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];
// A valid Vega Lite spec shouldn't throw an error when compiled.
expect(() => compile(vegaLiteSpec)).not.toThrow();
@ -110,13 +118,13 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
expect(specForegroundLayer.data.values).toEqual(data);
expect(specForegroundLayer.mark).toEqual({
opacity: 0.75,
size: 8,
type: 'circle',
});
expect(vegaLiteSpec.spec.encoding.color).toEqual({
expect(specForegroundLayer.encoding.color).toEqual({
condition: {
selection: USER_SELECTION,
field: 'is_outlier',
@ -127,7 +135,7 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
},
value: COLOR_BLUR,
});
expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([
expect(specForegroundLayer.encoding.tooltip).toEqual([
{ field: 'x', type: 'quantitative' },
{ field: 'y', type: 'quantitative' },
{
@ -144,12 +152,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(
data,
[],
['x', 'y'],
euiThemeLight,
undefined,
'the-color-field',
LEGEND_TYPES.NOMINAL
);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];
// A valid Vega Lite spec shouldn't throw an error when compiled.
expect(() => compile(vegaLiteSpec)).not.toThrow();
@ -158,13 +168,13 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
column: ['x', 'y'],
row: ['y', 'x'],
});
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(vegaLiteSpec.spec.mark).toEqual({
expect(specForegroundLayer.data.values).toEqual(data);
expect(specForegroundLayer.mark).toEqual({
opacity: 0.75,
size: 8,
type: 'circle',
});
expect(vegaLiteSpec.spec.encoding.color).toEqual({
expect(specForegroundLayer.encoding.color).toEqual({
condition: {
selection: USER_SELECTION,
field: 'the-color-field',
@ -175,7 +185,7 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
},
value: COLOR_BLUR,
});
expect(vegaLiteSpec.spec.encoding.tooltip).toEqual([
expect(specForegroundLayer.encoding.tooltip).toEqual([
{ field: 'the-color-field', type: 'nominal' },
{ field: 'x', type: 'quantitative' },
{ field: 'y', type: 'quantitative' },
@ -187,12 +197,14 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
const vegaLiteSpec = getScatterplotMatrixVegaLiteSpec(
data,
[],
['x.a', 'y[a]'],
euiThemeLight,
undefined,
'the-color-field',
LEGEND_TYPES.NOMINAL
);
const specForegroundLayer = vegaLiteSpec.spec.layer[0];
// column values should be escaped
expect(vegaLiteSpec.repeat).toEqual({
@ -200,6 +212,6 @@ describe('getScatterplotMatrixVegaLiteSpec()', () => {
row: ['y\\[a\\]', 'x\\.a'],
});
// raw data should not be escaped
expect(vegaLiteSpec.spec.data.values).toEqual(data);
expect(specForegroundLayer.data.values).toEqual(data);
});
});

View file

@ -75,6 +75,151 @@ export const getColorSpec = (
};
};
const getVegaSpecLayer = (
isBackground: boolean,
values: VegaValue[],
colorSpec: any,
escapedOutlierScoreField: string,
outliers: boolean,
dynamicSize: boolean,
vegaColumns: string[],
color?: string
) => {
const selection = outliers
? {
selection: {
[USER_SELECTION]: { type: 'interval' },
[SINGLE_POINT_CLICK]: { type: 'single' },
mlOutlierScoreThreshold: {
type: 'single',
fields: ['cutoff'],
bind: {
input: 'range',
max: 1,
min: 0,
name: i18n.translate('xpack.ml.splomSpec.outlierScoreThresholdName', {
defaultMessage: 'Outlier score threshold: ',
}),
step: 0.01,
},
init: { cutoff: 0.99 },
},
},
}
: {
selection: {
// Always allow user selection
[USER_SELECTION]: {
type: 'interval',
},
[SINGLE_POINT_CLICK]: { type: 'single', empty: 'none' },
},
};
return {
data: { values: [...values] },
mark: {
...(outliers && dynamicSize
? {
type: 'circle',
strokeWidth: 1.2,
strokeOpacity: 0.75,
fillOpacity: 0.1,
}
: { type: 'circle', opacity: 0.75, size: 8 }),
},
// transformation to apply outlier threshold as category
...(outliers
? {
transform: [
{
calculate: `datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff`,
as: 'is_outlier',
},
],
}
: {}),
encoding: {
color: isBackground ? { value: COLOR_BLUR } : colorSpec,
opacity: {
condition: {
selection: USER_SELECTION,
value: 0.8,
},
value: 0.5,
},
...(dynamicSize
? {
stroke: colorSpec,
opacity: {
condition: {
value: 1,
test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`,
},
value: 0.5,
},
}
: {}),
...(outliers
? {
order: { field: escapedOutlierScoreField },
size: {
...(!dynamicSize
? {
condition: {
value: 40,
test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`,
},
value: 8,
}
: {
type: LEGEND_TYPES.QUANTITATIVE,
field: escapedOutlierScoreField,
scale: {
type: 'linear',
range: [8, 200],
domain: [0, 1],
},
}),
},
}
: {}),
x: {
type: LEGEND_TYPES.QUANTITATIVE,
field: { repeat: 'column' },
scale: { zero: false },
},
y: {
type: LEGEND_TYPES.QUANTITATIVE,
field: { repeat: 'row' },
scale: { zero: false },
},
tooltip: [
...(color !== undefined
? // @ts-ignore
[{ type: colorSpec.condition.type, field: getEscapedVegaFieldName(color) }]
: []),
...vegaColumns.map((d) => ({
type: LEGEND_TYPES.QUANTITATIVE,
field: d,
})),
...(outliers
? [
{
type: LEGEND_TYPES.QUANTITATIVE,
field: escapedOutlierScoreField,
format: '.3f',
},
]
: []),
],
},
...(isBackground ? {} : selection),
width: SCATTERPLOT_SIZE,
height: SCATTERPLOT_SIZE,
};
};
// Escapes the characters .[] in field names with double backslashes
// since VEGA treats dots/brackets in field names as nested values.
// See https://vega.github.io/vega-lite/docs/field.html for details.
@ -86,6 +231,7 @@ type VegaValue = Record<string, string | number>;
export const getScatterplotMatrixVegaLiteSpec = (
values: VegaValue[],
backgroundValues: VegaValue[],
columns: string[],
euiTheme: typeof euiThemeLight,
resultsField?: string,
@ -106,7 +252,7 @@ export const getScatterplotMatrixVegaLiteSpec = (
legendType
);
return {
const schema: TopLevelSpec = {
$schema: 'https://vega.github.io/schema/vega-lite/v4.17.0.json',
background: 'transparent',
// There seems to be a bug in Vega which doesn't propagate these settings
@ -134,129 +280,35 @@ export const getScatterplotMatrixVegaLiteSpec = (
row: vegaColumns.slice().reverse(),
},
spec: {
data: { values: [...vegaValues] },
mark: {
...(outliers && dynamicSize
? {
type: 'circle',
strokeWidth: 1.2,
strokeOpacity: 0.75,
fillOpacity: 0.1,
}
: { type: 'circle', opacity: 0.75, size: 8 }),
},
// transformation to apply outlier threshold as category
...(outliers
? {
transform: [
{
calculate: `datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff`,
as: 'is_outlier',
},
],
}
: {}),
encoding: {
color: colorSpec,
opacity: {
condition: {
selection: USER_SELECTION,
value: 0.8,
},
value: 0.5,
},
...(dynamicSize
? {
stroke: colorSpec,
opacity: {
condition: {
value: 1,
test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`,
},
value: 0.5,
},
}
: {}),
...(outliers
? {
order: { field: escapedOutlierScoreField },
size: {
...(!dynamicSize
? {
condition: {
value: 40,
test: `(datum['${escapedOutlierScoreField}'] >= mlOutlierScoreThreshold.cutoff)`,
},
value: 8,
}
: {
type: LEGEND_TYPES.QUANTITATIVE,
field: escapedOutlierScoreField,
scale: {
type: 'linear',
range: [8, 200],
domain: [0, 1],
},
}),
},
}
: {}),
x: {
type: LEGEND_TYPES.QUANTITATIVE,
field: { repeat: 'column' },
scale: { zero: false },
},
y: {
type: LEGEND_TYPES.QUANTITATIVE,
field: { repeat: 'row' },
scale: { zero: false },
},
tooltip: [
...(color !== undefined
? // @ts-ignore
[{ type: colorSpec.condition.type, field: getEscapedVegaFieldName(color) }]
: []),
...vegaColumns.map((d) => ({
type: LEGEND_TYPES.QUANTITATIVE,
field: d,
})),
...(outliers
? [{ type: LEGEND_TYPES.QUANTITATIVE, field: escapedOutlierScoreField, format: '.3f' }]
: []),
],
},
...(outliers
? {
selection: {
[USER_SELECTION]: { type: 'interval' },
[SINGLE_POINT_CLICK]: { type: 'single' },
mlOutlierScoreThreshold: {
type: 'single',
fields: ['cutoff'],
bind: {
input: 'range',
max: 1,
min: 0,
name: i18n.translate('xpack.ml.splomSpec.outlierScoreThresholdName', {
defaultMessage: 'Outlier score threshold: ',
}),
step: 0.01,
},
init: { cutoff: 0.99 },
},
},
}
: {
selection: {
// Always allow user selection
[USER_SELECTION]: {
type: 'interval',
},
[SINGLE_POINT_CLICK]: { type: 'single', empty: 'none' },
},
}),
width: SCATTERPLOT_SIZE,
height: SCATTERPLOT_SIZE,
layer: [
getVegaSpecLayer(
false,
vegaValues,
colorSpec,
escapedOutlierScoreField,
outliers,
!!dynamicSize,
vegaColumns,
color
),
],
},
};
if (backgroundValues.length) {
schema.spec.layer.unshift(
getVegaSpecLayer(
true,
backgroundValues,
colorSpec,
escapedOutlierScoreField,
outliers,
!!dynamicSize,
vegaColumns,
color
)
);
}
return schema;
};