mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
[ML] Fixing missing final new line character issue (#109274)
* [ML] Fixing missing final new line character issue * adding tests * tiny refactor * test fixes based on review Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
c7f55db17f
commit
fb1c3ca5a6
8 changed files with 95 additions and 5 deletions
|
@ -40,7 +40,10 @@ export abstract class Importer implements IImporter {
|
|||
let remainder = 0;
|
||||
for (let i = 0; i < parts; i++) {
|
||||
const byteArray = decoder.decode(data.slice(i * size - remainder, (i + 1) * size));
|
||||
const { success, docs, remainder: tempRemainder } = this._createDocs(byteArray);
|
||||
const { success, docs, remainder: tempRemainder } = this._createDocs(
|
||||
byteArray,
|
||||
i === parts - 1
|
||||
);
|
||||
if (success) {
|
||||
this._docArray = this._docArray.concat(docs);
|
||||
remainder = tempRemainder;
|
||||
|
@ -52,7 +55,7 @@ export abstract class Importer implements IImporter {
|
|||
return { success: true };
|
||||
}
|
||||
|
||||
protected abstract _createDocs(t: string): CreateDocsResponse;
|
||||
protected abstract _createDocs(t: string, isLastPart: boolean): CreateDocsResponse;
|
||||
|
||||
public async initializeImport(
|
||||
index: string,
|
||||
|
|
|
@ -30,7 +30,7 @@ export class MessageImporter extends Importer {
|
|||
// multiline_start_pattern regex
|
||||
// if it does, it is a legitimate end of line and can be pushed into the list,
|
||||
// if not, it must be a newline char inside a field value, so keep looking.
|
||||
protected _createDocs(text: string): CreateDocsResponse {
|
||||
protected _createDocs(text: string, isLastPart: boolean): CreateDocsResponse {
|
||||
let remainder = 0;
|
||||
try {
|
||||
const docs: Doc[] = [];
|
||||
|
@ -39,9 +39,17 @@ export class MessageImporter extends Importer {
|
|||
let line = '';
|
||||
for (let i = 0; i < text.length; i++) {
|
||||
const char = text[i];
|
||||
const isLastChar = i === text.length - 1;
|
||||
if (char === '\n') {
|
||||
message = this._processLine(docs, message, line);
|
||||
line = '';
|
||||
} else if (isLastPart && isLastChar) {
|
||||
// if this is the end of the last line and the last chunk of data,
|
||||
// add the remainder as a final line.
|
||||
// just in case the last line doesn't end in a new line char.
|
||||
line += char;
|
||||
message = this._processLine(docs, message, line);
|
||||
line = '';
|
||||
} else {
|
||||
line += char;
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ export class NdjsonImporter extends Importer {
|
|||
super();
|
||||
}
|
||||
|
||||
protected _createDocs(json: string): CreateDocsResponse {
|
||||
protected _createDocs(json: string, isLastPart: boolean): CreateDocsResponse {
|
||||
let remainder = 0;
|
||||
try {
|
||||
const splitJson = json.split(/}\s*\n/);
|
||||
|
|
|
@ -111,6 +111,7 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
totalFieldsCount: 12,
|
||||
fieldTypeFiltersResultCount: 4,
|
||||
fieldNameFiltersResultCount: 1,
|
||||
ingestedDocCount: 20,
|
||||
},
|
||||
},
|
||||
{
|
||||
|
@ -152,6 +153,51 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
totalFieldsCount: 3,
|
||||
fieldTypeFiltersResultCount: 1,
|
||||
fieldNameFiltersResultCount: 1,
|
||||
ingestedDocCount: 13,
|
||||
},
|
||||
},
|
||||
{
|
||||
suiteSuffix: 'with a file with a missing new line char at the end',
|
||||
filePath: path.join(__dirname, 'files_to_import', 'missing_end_of_file_newline.csv'),
|
||||
indexName: 'user-import_3',
|
||||
createIndexPattern: false,
|
||||
fieldTypeFilters: [],
|
||||
fieldNameFilters: [],
|
||||
expected: {
|
||||
results: {
|
||||
title: 'missing_end_of_file_newline.csv',
|
||||
numberOfFields: 3,
|
||||
},
|
||||
metricFields: [
|
||||
{
|
||||
fieldName: 'value',
|
||||
type: ML_JOB_FIELD_TYPES.NUMBER,
|
||||
docCountFormatted: '3 (100%)',
|
||||
exampleCount: 3,
|
||||
topValuesCount: 3,
|
||||
},
|
||||
],
|
||||
nonMetricFields: [
|
||||
{
|
||||
fieldName: 'title',
|
||||
type: ML_JOB_FIELD_TYPES.UNKNOWN,
|
||||
docCountFormatted: '3 (100%)',
|
||||
exampleCount: 3,
|
||||
},
|
||||
{
|
||||
fieldName: 'description',
|
||||
type: ML_JOB_FIELD_TYPES.KEYWORD,
|
||||
docCountFormatted: '3 (100%)',
|
||||
exampleCount: 3,
|
||||
},
|
||||
],
|
||||
visibleMetricFieldsCount: 0,
|
||||
totalMetricFieldsCount: 0,
|
||||
populatedFieldsCount: 3,
|
||||
totalFieldsCount: 3,
|
||||
fieldTypeFiltersResultCount: 3,
|
||||
fieldNameFiltersResultCount: 3,
|
||||
ingestedDocCount: 3,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
@ -271,6 +317,10 @@ export default function ({ getService }: FtrProviderContext) {
|
|||
await ml.testExecution.logTestStep('imports the file');
|
||||
await ml.dataVisualizerFileBased.startImportAndWaitForProcessing();
|
||||
|
||||
await ml.dataVisualizerFileBased.assertIngestedDocCount(
|
||||
testData.expected.ingestedDocCount
|
||||
);
|
||||
|
||||
await ml.testExecution.logTestStep('creates filebeat config');
|
||||
await ml.dataVisualizerFileBased.selectCreateFilebeatConfig();
|
||||
|
||||
|
|
|
@ -11,4 +11,4 @@ POINT (-2.509384 51.40959),On or near Barnard Walk,
|
|||
POINT (-2.495055 51.422132),On or near Cross Street,
|
||||
POINT (-2.509384 51.40959),On or near Barnard Walk,
|
||||
POINT (-2.495055 51.422132),On or near Cross Street,
|
||||
POINT (-2.509126 51.416137),On or near St Francis Road,
|
||||
POINT (-2.509126 51.416137),On or near St Francis Road,
|
||||
|
|
|
|
@ -0,0 +1,4 @@
|
|||
title,description,value
|
||||
first title,this is the first description,22
|
||||
second title,this is the second description,66
|
||||
third title,this is the third description,88
|
|
|
@ -285,6 +285,20 @@ export function MachineLearningCommonUIProvider({
|
|||
await this.assertRowsNumberPerPage(testSubj, rowsNumber);
|
||||
},
|
||||
|
||||
async getEuiDescriptionListDescriptionFromTitle(testSubj: string, title: string) {
|
||||
const subj = await testSubjects.find(testSubj);
|
||||
const titles = await subj.findAllByTagName('dt');
|
||||
const descriptions = await subj.findAllByTagName('dd');
|
||||
|
||||
for (let i = 0; i < titles.length; i++) {
|
||||
const titleText = (await titles[i].parseDomContent()).html();
|
||||
if (titleText === title) {
|
||||
return (await descriptions[i].parseDomContent()).html();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
},
|
||||
|
||||
async changeToSpace(spaceId: string) {
|
||||
await PageObjects.spaceSelector.openSpacesNav();
|
||||
await PageObjects.spaceSelector.goToSpecificSpace(spaceId);
|
||||
|
|
|
@ -132,6 +132,17 @@ export function MachineLearningDataVisualizerFileBasedProvider(
|
|||
});
|
||||
},
|
||||
|
||||
async assertIngestedDocCount(count: number) {
|
||||
const docCount = await mlCommonUI.getEuiDescriptionListDescriptionFromTitle(
|
||||
'dataVisualizerFileImportSuccessCallout',
|
||||
'Documents ingested'
|
||||
);
|
||||
expect(docCount).to.eql(
|
||||
count,
|
||||
`Expected Documents ingested count to be '${count}' (got '${docCount}')`
|
||||
);
|
||||
},
|
||||
|
||||
async selectCreateFilebeatConfig() {
|
||||
await testSubjects.scrollIntoView('fileDataVisFilebeatConfigLink', {
|
||||
bottomOffset: fixedFooterHeight,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue