[ML] Data Frame Analytics: Fix field name escaping for Vega based scatterplot matrix. (#193386)

## Summary

Field names with `\n` would fail to render the DFA scatterplot matrix:

<img width="804" alt="image"
src="https://github.com/user-attachments/assets/26e356b8-236d-4255-b556-2ebc2e5db4fc">

This fixes the escaping and adds unit tests.

The fix isn't 100% ideal because there are cases when we may end up with
an additional backslash being rendered for labels of the scatterplot.
However, all other variations I tried caused rendering problems of the
charts and rendering would fail completely.

For example, just escaping `\n` without the general backslash escaping
causes the following Vega error: `Duplicate scale or projection name:
"child__row_my_numbercolumn_my_number_x"`

On the other hand escaping just the backslash without the additional
`\n` escaping causes an "expression parse error" in in Vega and the
chart wouldn't render.

Note this PR just focuses on escaping for the Vega spec for the
scatterplot matrix. There are still other places in the UI (data grid
headers, fields selector).

<img width="792" alt="image"
src="https://github.com/user-attachments/assets/35532741-7a13-4707-b8da-c72dcc8c935b">

### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [x] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
This commit is contained in:
Walter Rafelsberger 2024-09-24 10:41:46 +02:00 committed by GitHub
parent 0b4f8774dd
commit 194d6307dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 71 additions and 3 deletions

View file

@ -16,6 +16,7 @@ import { LEGEND_TYPES } from '../vega_chart/common';
import {
getColorSpec,
getEscapedVegaFieldName,
getScatterplotMatrixVegaLiteSpec,
COLOR_RANGE_NOMINAL,
COLOR_RANGE_OUTLIER,
@ -75,6 +76,56 @@ describe('getColorSpec()', () => {
});
});
describe('getEscapedVegaFieldName()', () => {
it('should escape dots in field names', () => {
const fieldName = 'field.name';
const escapedFieldName = getEscapedVegaFieldName(fieldName);
expect(escapedFieldName).toBe('field\\.name');
});
it('should escape brackets in field names', () => {
const fieldName = 'field[name]';
const escapedFieldName = getEscapedVegaFieldName(fieldName);
expect(escapedFieldName).toBe('field\\[name\\]');
});
it('should escape both dots and brackets in field names', () => {
const fieldName = 'field.name[0]';
const escapedFieldName = getEscapedVegaFieldName(fieldName);
expect(escapedFieldName).toBe('field\\.name\\[0\\]');
});
it('should return the same string if there are no special characters', () => {
const fieldName = 'fieldname';
const escapedFieldName = getEscapedVegaFieldName(fieldName);
expect(escapedFieldName).toBe('fieldname');
});
it('should prepend a string if provided', () => {
const fieldName = 'field.name';
const prependString = 'prefix_';
const escapedFieldName = getEscapedVegaFieldName(fieldName, prependString);
expect(escapedFieldName).toBe('prefix_field\\.name');
});
it('should escape newlines in field names', () => {
// String quotes process backslashes, so we need to escape them for
// the test string to contain a backslash. For example, without the
// double backslash, this string would contain a newline character.
const fieldName = 'field\\name';
const escapedFieldName = getEscapedVegaFieldName(fieldName);
expect(escapedFieldName).toBe('field\\\\name');
});
it('should escape backslashes in field names', () => {
// String quotes process backslashes, so we need to escape them for
// the test string to contain a backslash.
const fieldName = 'fieldname\\withbackslash';
const escapedFieldName = getEscapedVegaFieldName(fieldName);
expect(escapedFieldName).toBe('fieldname\\\\withbackslash');
});
});
describe('getScatterplotMatrixVegaLiteSpec()', () => {
const forCustomLink = false;

View file

@ -249,11 +249,28 @@ const getVegaSpecLayer = (
};
};
// Escapes the characters .[] in field names with double backslashes
// Escapes the characters .[]\ in field names with double backslashes
// since VEGA treats dots/brackets in field names as nested values.
// See https://vega.github.io/vega-lite/docs/field.html for details.
function getEscapedVegaFieldName(fieldName: string, prependString: string = '') {
return `${prependString}${fieldName.replace(/([\.|\[|\]])/g, '\\$1')}`;
export function getEscapedVegaFieldName(fieldName: string, prependString: string = '') {
// Note the following isn't 100% ideal because there are cases when we may
// end up with an additional backslash being rendered for labels of the
// scatterplot. However, all other variations I tried caused rendering
// problems of the charts and rendering would fail completely.
// For example, just escaping \n in the first replace without the general
// backslash escaping causes the following Vega error:
// Duplicate scale or projection name: "child__row_my_numbercolumn_my_number_x"
// Escaping just the backslash without the additional \n escaping causes
// causes an "expression parse error" in Vega and the chart wouldn't render.
// Escape newline characters
fieldName = fieldName.replace(/\n/g, '\\n');
// Escape .[]\
fieldName = fieldName.replace(/([\.|\[|\]|\\])/g, '\\$1');
return `${prependString}${fieldName}`;
}
type VegaValue = Record<string, string | number>;