[ML] Fixing missing final new line character issue (#109274)

* [ML] Fixing missing final new line character issue * adding tests * tiny refactor * test fixes based on review Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
2025-04-23 09:19:04 -04:00 · 2021-08-25 14:00:32 +01:00 · 2021-08-25 14:00:32 +01:00 · fb1c3ca5a6
commit fb1c3ca5a6
parent c7f55db17f
8 changed files with 95 additions and 5 deletions
--- a/x-pack/plugins/file_upload/public/importer/importer.ts
+++ b/x-pack/plugins/file_upload/public/importer/importer.ts
@ -40,7 +40,10 @@ export abstract class Importer implements IImporter {
    let remainder = 0;
    for (let i = 0; i < parts; i++) {
      const byteArray = decoder.decode(data.slice(i * size - remainder, (i + 1) * size));
-      const { success, docs, remainder: tempRemainder } = this._createDocs(byteArray);
+      const { success, docs, remainder: tempRemainder } = this._createDocs(
+        byteArray,
+        i === parts - 1
+      );
      if (success) {
        this._docArray = this._docArray.concat(docs);
        remainder = tempRemainder;
@ -52,7 +55,7 @@ export abstract class Importer implements IImporter {
    return { success: true };
  }

-  protected abstract _createDocs(t: string): CreateDocsResponse;
+  protected abstract _createDocs(t: string, isLastPart: boolean): CreateDocsResponse;

  public async initializeImport(
    index: string,
--- a/x-pack/plugins/file_upload/public/importer/message_importer.ts
+++ b/x-pack/plugins/file_upload/public/importer/message_importer.ts
@ -30,7 +30,7 @@ export class MessageImporter extends Importer {
  // multiline_start_pattern regex
  // if it does, it is a legitimate end of line and can be pushed into the list,
  // if not, it must be a newline char inside a field value, so keep looking.
-  protected _createDocs(text: string): CreateDocsResponse {
+  protected _createDocs(text: string, isLastPart: boolean): CreateDocsResponse {
    let remainder = 0;
    try {
      const docs: Doc[] = [];
@ -39,9 +39,17 @@ export class MessageImporter extends Importer {
      let line = '';
      for (let i = 0; i < text.length; i++) {
        const char = text[i];
+        const isLastChar = i === text.length - 1;
        if (char === '\n') {
          message = this._processLine(docs, message, line);
          line = '';
+        } else if (isLastPart && isLastChar) {
+          // if this is the end of the last line and the last chunk of data,
+          // add the remainder as a final line.
+          // just in case the last line doesn't end in a new line char.
+          line += char;
+          message = this._processLine(docs, message, line);
+          line = '';
        } else {
          line += char;
        }
--- a/x-pack/plugins/file_upload/public/importer/ndjson_importer.ts
+++ b/x-pack/plugins/file_upload/public/importer/ndjson_importer.ts
@ -13,7 +13,7 @@ export class NdjsonImporter extends Importer {
    super();
  }

-  protected _createDocs(json: string): CreateDocsResponse {
+  protected _createDocs(json: string, isLastPart: boolean): CreateDocsResponse {
    let remainder = 0;
    try {
      const splitJson = json.split(/}\s*\n/);
--- a/x-pack/test/functional/apps/ml/data_visualizer/file_data_visualizer.ts
+++ b/x-pack/test/functional/apps/ml/data_visualizer/file_data_visualizer.ts
@ -111,6 +111,7 @@ export default function ({ getService }: FtrProviderContext) {
        totalFieldsCount: 12,
        fieldTypeFiltersResultCount: 4,
        fieldNameFiltersResultCount: 1,
+        ingestedDocCount: 20,
      },
    },
    {
@ -152,6 +153,51 @@ export default function ({ getService }: FtrProviderContext) {
        totalFieldsCount: 3,
        fieldTypeFiltersResultCount: 1,
        fieldNameFiltersResultCount: 1,
+        ingestedDocCount: 13,
+      },
+    },
+    {
+      suiteSuffix: 'with a file with a missing new line char at the end',
+      filePath: path.join(__dirname, 'files_to_import', 'missing_end_of_file_newline.csv'),
+      indexName: 'user-import_3',
+      createIndexPattern: false,
+      fieldTypeFilters: [],
+      fieldNameFilters: [],
+      expected: {
+        results: {
+          title: 'missing_end_of_file_newline.csv',
+          numberOfFields: 3,
+        },
+        metricFields: [
+          {
+            fieldName: 'value',
+            type: ML_JOB_FIELD_TYPES.NUMBER,
+            docCountFormatted: '3 (100%)',
+            exampleCount: 3,
+            topValuesCount: 3,
+          },
+        ],
+        nonMetricFields: [
+          {
+            fieldName: 'title',
+            type: ML_JOB_FIELD_TYPES.UNKNOWN,
+            docCountFormatted: '3 (100%)',
+            exampleCount: 3,
+          },
+          {
+            fieldName: 'description',
+            type: ML_JOB_FIELD_TYPES.KEYWORD,
+            docCountFormatted: '3 (100%)',
+            exampleCount: 3,
+          },
+        ],
+        visibleMetricFieldsCount: 0,
+        totalMetricFieldsCount: 0,
+        populatedFieldsCount: 3,
+        totalFieldsCount: 3,
+        fieldTypeFiltersResultCount: 3,
+        fieldNameFiltersResultCount: 3,
+        ingestedDocCount: 3,
      },
    },
  ];
@ -271,6 +317,10 @@ export default function ({ getService }: FtrProviderContext) {
          await ml.testExecution.logTestStep('imports the file');
          await ml.dataVisualizerFileBased.startImportAndWaitForProcessing();

+          await ml.dataVisualizerFileBased.assertIngestedDocCount(
+            testData.expected.ingestedDocCount
+          );
+
          await ml.testExecution.logTestStep('creates filebeat config');
          await ml.dataVisualizerFileBased.selectCreateFilebeatConfig();

--- a/x-pack/test/functional/apps/ml/data_visualizer/files_to_import/geo_file.csv
+++ b/x-pack/test/functional/apps/ml/data_visualizer/files_to_import/geo_file.csv
@ -11,4 +11,4 @@ POINT (-2.509384 51.40959),On or near Barnard Walk,
 POINT (-2.495055 51.422132),On or near Cross Street,
 POINT (-2.509384 51.40959),On or near Barnard Walk,
 POINT (-2.495055 51.422132),On or near Cross Street,
-POINT (-2.509126 51.416137),On or near St Francis Road,
+POINT (-2.509126 51.416137),On or near St Francis Road,
--- a/x-pack/test/functional/apps/ml/data_visualizer/files_to_import/missing_end_of_file_newline.csv
+++ b/x-pack/test/functional/apps/ml/data_visualizer/files_to_import/missing_end_of_file_newline.csv
@ -0,0 +1,4 @@
+title,description,value
+first title,this is the first description,22
+second title,this is the second description,66
+third title,this is the third description,88
--- a/x-pack/test/functional/services/ml/common_ui.ts
+++ b/x-pack/test/functional/services/ml/common_ui.ts
@ -285,6 +285,20 @@ export function MachineLearningCommonUIProvider({
      await this.assertRowsNumberPerPage(testSubj, rowsNumber);
    },

+    async getEuiDescriptionListDescriptionFromTitle(testSubj: string, title: string) {
+      const subj = await testSubjects.find(testSubj);
+      const titles = await subj.findAllByTagName('dt');
+      const descriptions = await subj.findAllByTagName('dd');
+
+      for (let i = 0; i < titles.length; i++) {
+        const titleText = (await titles[i].parseDomContent()).html();
+        if (titleText === title) {
+          return (await descriptions[i].parseDomContent()).html();
+        }
+      }
+      return null;
+    },
+
    async changeToSpace(spaceId: string) {
      await PageObjects.spaceSelector.openSpacesNav();
      await PageObjects.spaceSelector.goToSpecificSpace(spaceId);
--- a/x-pack/test/functional/services/ml/data_visualizer_file_based.ts
+++ b/x-pack/test/functional/services/ml/data_visualizer_file_based.ts
@ -132,6 +132,17 @@ export function MachineLearningDataVisualizerFileBasedProvider(
      });
    },

+    async assertIngestedDocCount(count: number) {
+      const docCount = await mlCommonUI.getEuiDescriptionListDescriptionFromTitle(
+        'dataVisualizerFileImportSuccessCallout',
+        'Documents ingested'
+      );
+      expect(docCount).to.eql(
+        count,
+        `Expected Documents ingested count to be '${count}' (got '${docCount}')`
+      );
+    },
+
    async selectCreateFilebeatConfig() {
      await testSubjects.scrollIntoView('fileDataVisFilebeatConfigLink', {
        bottomOffset: fixedFooterHeight,