[Reporting] Fix performance of CSV generation (#120309) (#120446)

* [Reporting] Fix performance of CSV generation

* use a for loop with 1 operation instead of 3 chained maps

* do without the callback

* update comment

Co-authored-by: Tim Sullivan <tsullivan@users.noreply.github.com>
This commit is contained in:
Kibana Machine 2021-12-04 18:39:01 -05:00 committed by GitHub
parent 6deaeb592c
commit b752da9c23
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -242,7 +242,7 @@ export class CsvGenerator {
/*
* Format a Datatable into rows of CSV content
*/
private generateRows(
private async generateRows(
columns: string[],
table: Datatable,
builder: MaxSizeStringBuilder,
@ -255,14 +255,38 @@ export class CsvGenerator {
break;
}
const row =
columns
.map((f) => ({ column: f, data: dataTableRow[f] }))
.map(this.formatCellValues(formatters))
.map(this.escapeValues(settings))
.join(settings.separator) + '\n';
/*
* Intrinsically, generating the rows is a synchronous process. Awaiting
* on a setImmediate call here partititions what could be a very long and
* CPU-intenstive synchronous process into an asychronous process. This
* give NodeJS to process other asychronous events that wait on the Event
* Loop.
*
* See: https://nodejs.org/en/docs/guides/dont-block-the-event-loop/
*
* It's likely this creates a lot of context switching, and adds to the
* time it would take to generate the CSV. There are alternatives to the
* chosen performance solution:
*
* 1. Partition the synchronous process with fewer partitions, by using
* the loop counter to call setImmediate only every N amount of rows.
* Testing is required to see what the best N value for most data will
* be.
*
* 2. Use a C++ add-on to generate the CSV using the Node Worker Pool
* instead of using the Event Loop
*/
await new Promise(setImmediate);
if (!builder.tryAppend(row)) {
const rowDefinition: string[] = [];
const format = this.formatCellValues(formatters);
const escape = this.escapeValues(settings);
for (const column of columns) {
rowDefinition.push(escape(format({ column, data: dataTableRow[column] })));
}
if (!builder.tryAppend(rowDefinition.join(settings.separator) + '\n')) {
this.logger.warn(`Max Size Reached after ${this.csvRowCount} rows.`);
this.maxSizeReached = true;
if (this.cancellationToken) {
@ -377,7 +401,7 @@ export class CsvGenerator {
}
const formatters = this.getFormatters(table);
this.generateRows(columns, table, builder, formatters, settings);
await this.generateRows(columns, table, builder, formatters, settings);
// update iterator
currentRecord += table.rows.length;