mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 17:59:23 -04:00
* [ML] File data viz limitiing upload chunk size * adding comments * refactor * fixing incorrect overwrite of array
This commit is contained in:
parent
b72debb66c
commit
5bc76f8438
1 changed files with 43 additions and 7 deletions
|
@ -10,7 +10,8 @@ import { chunk } from 'lodash';
|
|||
import moment from 'moment';
|
||||
import { i18n } from '@kbn/i18n';
|
||||
|
||||
const CHUNK_SIZE = 10000;
|
||||
const CHUNK_SIZE = 5000;
|
||||
const MAX_CHUNK_CHAR_COUNT = 1000000;
|
||||
const IMPORT_RETRIES = 5;
|
||||
|
||||
export class Importer {
|
||||
|
@ -21,6 +22,7 @@ export class Importer {
|
|||
|
||||
this.data = [];
|
||||
this.docArray = [];
|
||||
this.docSizeArray = [];
|
||||
}
|
||||
|
||||
async initializeImport(index) {
|
||||
|
@ -58,7 +60,7 @@ export class Importer {
|
|||
};
|
||||
}
|
||||
|
||||
const chunks = chunk(this.docArray, CHUNK_SIZE);
|
||||
const chunks = createDocumentChunks(this.docArray);
|
||||
|
||||
const ingestPipeline = {
|
||||
id: pipelineId,
|
||||
|
@ -86,13 +88,18 @@ export class Importer {
|
|||
};
|
||||
|
||||
while (resp.success === false && retries > 0) {
|
||||
resp = await ml.fileDatavisualizer.import(aggs);
|
||||
try {
|
||||
resp = await ml.fileDatavisualizer.import(aggs);
|
||||
|
||||
if (retries < IMPORT_RETRIES) {
|
||||
console.log(`Retrying import ${IMPORT_RETRIES - retries}`);
|
||||
if (retries < IMPORT_RETRIES) {
|
||||
console.log(`Retrying import ${IMPORT_RETRIES - retries}`);
|
||||
}
|
||||
|
||||
retries--;
|
||||
} catch (err) {
|
||||
resp = { success: false, error: err };
|
||||
retries = 0;
|
||||
}
|
||||
|
||||
retries--;
|
||||
}
|
||||
|
||||
if (resp.success) {
|
||||
|
@ -152,3 +159,32 @@ function updatePipelineTimezone(ingestPipeline) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
function createDocumentChunks(docArray) {
|
||||
const chunks = [];
|
||||
// chop docArray into 5000 doc chunks
|
||||
const tempChunks = chunk(docArray, CHUNK_SIZE);
|
||||
|
||||
// loop over tempChunks and check that the total character length
|
||||
// for each chunk is within the MAX_CHUNK_CHAR_COUNT.
|
||||
// if the length is too long, split the chunk into smaller chunks
|
||||
// based on how much larger it is than MAX_CHUNK_CHAR_COUNT
|
||||
// note, each document is a different size, so dividing by charCountOfDocs
|
||||
// only produces an average chunk size that should be smaller than the max length
|
||||
for (let i = 0; i < tempChunks.length; i++) {
|
||||
const docs = tempChunks[i];
|
||||
const numberOfDocs = docs.length;
|
||||
|
||||
const charCountOfDocs = JSON.stringify(docs).length;
|
||||
if (charCountOfDocs > MAX_CHUNK_CHAR_COUNT) {
|
||||
// calculate new chunk size which should produce a chunk
|
||||
// who's length is on average around MAX_CHUNK_CHAR_COUNT
|
||||
const adjustedChunkSize = Math.floor((MAX_CHUNK_CHAR_COUNT / charCountOfDocs) * numberOfDocs);
|
||||
const smallerChunks = chunk(docs, adjustedChunkSize);
|
||||
chunks.push(...smallerChunks);
|
||||
} else {
|
||||
chunks.push(docs);
|
||||
}
|
||||
}
|
||||
return chunks;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue