[CSV/Reporting] Refinements for logging hit totals (#171811)

## Summary

This PR fixes a **logging** issue noticed when analyzing CSV Export
logs. There is an "info" log giving the total number of hits, which
includes the `total.relation` value in the message. This log message
incorrectly printed the relationship as `eq` even when the relation was
not given in the Elasticsearch response.

The correction to this log message will help us make sure our search
requests are configured to track the total hits of the query.

```
# Before
Total hits eq 12345.

# After
Received total hits: 12345. Accuracy: eq.
Received total hits: 12345. Accuracy: unknown.
```

### Other changes
* A new private method for logging the result metadata
* Syntax cleanup
* More tests

### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
This commit is contained in:
Tim Sullivan 2023-11-28 09:16:28 -07:00 committed by GitHub
parent e94a977873
commit 3f9f3e649e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 214 additions and 46 deletions

View file

@ -66,6 +66,77 @@ exports[`CsvGenerator formulas escapes formula values in a header, doesn't warn
`;
exports[`CsvGenerator keeps order of the columns during the scroll 1`] = `
Array [
Array [
"Requesting PIT for: [logstash-*]...",
],
Array [
"Opened PIT ID: oju9fs3698s3[39 bytes]",
],
Array [
"Executing search request with PIT ID: [oju9fs3698s3[39 bytes]]",
],
Array [
"Received total hits: 3. Accuracy: unknown.",
],
Array [
"Result details: {\\"rawResponse\\":{\\"took\\":1,\\"timed_out\\":false,\\"_shards\\":{\\"total\\":1,\\"successful\\":1,\\"failed\\":0,\\"skipped\\":0},\\"hits\\":{\\"total\\":3,\\"max_score\\":0},\\"pit_id\\":\\"oju9fs3698s3[39 bytes]\\"}}",
],
Array [
"Received PIT ID: [oju9fs3698s3[39 bytes]]",
],
Array [
"Received search_after: [undefined]",
],
Array [
"Building CSV header row",
],
Array [
"Building 1 CSV data rows",
],
Array [
"Executing search request with PIT ID: [oju9fs3698s3[39 bytes]]",
],
Array [
"Received total hits: 3. Accuracy: unknown.",
],
Array [
"Result details: {\\"rawResponse\\":{\\"took\\":1,\\"timed_out\\":false,\\"_shards\\":{\\"total\\":1,\\"successful\\":1,\\"failed\\":0,\\"skipped\\":0},\\"hits\\":{\\"total\\":3,\\"max_score\\":0},\\"pit_id\\":\\"oju9fs3698s3[39 bytes]\\"}}",
],
Array [
"Received PIT ID: [oju9fs3698s3[39 bytes]]",
],
Array [
"Received search_after: [undefined]",
],
Array [
"Building 1 CSV data rows",
],
Array [
"Executing search request with PIT ID: [oju9fs3698s3[39 bytes]]",
],
Array [
"Received total hits: 3. Accuracy: unknown.",
],
Array [
"Result details: {\\"rawResponse\\":{\\"took\\":1,\\"timed_out\\":false,\\"_shards\\":{\\"total\\":1,\\"successful\\":1,\\"failed\\":0,\\"skipped\\":0},\\"hits\\":{\\"total\\":3,\\"max_score\\":0},\\"pit_id\\":\\"oju9fs3698s3[39 bytes]\\"}}",
],
Array [
"Received PIT ID: [oju9fs3698s3[39 bytes]]",
],
Array [
"Received search_after: [undefined]",
],
Array [
"Building 1 CSV data rows",
],
Array [
"Closing PIT oju9fs3698s3[39 bytes]",
],
]
`;
exports[`CsvGenerator keeps order of the columns during the scroll 2`] = `
"\\"_id\\",\\"_index\\",\\"_score\\",a,b
\\"'-\\",\\"'-\\",\\"'-\\",a1,b1
\\"'-\\",\\"'-\\",\\"'-\\",\\"'-\\",b2

View file

@ -393,6 +393,8 @@ describe('CsvGenerator', () => {
})
);
const debugLogSpy = jest.spyOn(mockLogger, 'debug');
const generateCsv = new CsvGenerator(
createMockJob({ searchSource: {}, columns: [] }),
mockConfig,
@ -411,6 +413,8 @@ describe('CsvGenerator', () => {
);
await generateCsv.generateData();
expect(debugLogSpy.mock.calls).toMatchSnapshot();
expect(content).toMatchSnapshot();
});
@ -896,6 +900,82 @@ describe('CsvGenerator', () => {
`);
});
describe('debug logging', () => {
it('logs the the total hits relation if relation is provided', async () => {
mockDataClient.search = jest.fn().mockImplementation(() =>
Rx.of({
rawResponse: {
took: 1,
timed_out: false,
pit_id: mockPitId,
_shards: { total: 1, successful: 1, failed: 0, skipped: 0 },
hits: { hits: [], total: { relation: 'eq', value: 12345 }, max_score: 0 },
},
})
);
const debugLogSpy = jest.spyOn(mockLogger, 'debug');
const generateCsv = new CsvGenerator(
createMockJob({ columns: ['date', 'ip', 'message'] }),
mockConfig,
{
es: mockEsClient,
data: mockDataClient,
uiSettings: uiSettingsClient,
},
{
searchSourceStart: mockSearchSourceService,
fieldFormatsRegistry: mockFieldFormatsRegistry,
},
new CancellationToken(),
mockLogger,
stream
);
await generateCsv.generateData();
expect(debugLogSpy).toHaveBeenCalledWith('Received total hits: 12345. Accuracy: eq.');
});
it('logs the the total hits relation as "unknown" if relation is not provided', async () => {
mockDataClient.search = jest.fn().mockImplementation(() =>
Rx.of({
rawResponse: {
took: 1,
timed_out: false,
pit_id: mockPitId,
_shards: { total: 1, successful: 1, failed: 0, skipped: 0 },
hits: { hits: [], total: 12345, max_score: 0 },
},
})
);
const debugLogSpy = jest.spyOn(mockLogger, 'debug');
const generateCsv = new CsvGenerator(
createMockJob({ columns: ['date', 'ip', 'message'] }),
mockConfig,
{
es: mockEsClient,
data: mockDataClient,
uiSettings: uiSettingsClient,
},
{
searchSourceStart: mockSearchSourceService,
fieldFormatsRegistry: mockFieldFormatsRegistry,
},
new CancellationToken(),
mockLogger,
stream
);
await generateCsv.generateData();
expect(debugLogSpy).toHaveBeenCalledWith('Received total hits: 12345. Accuracy: unknown.');
});
});
it('will return partial data if the scroll or search fails', async () => {
mockDataClient.search = jest.fn().mockImplementation(() => {
throw new esErrors.ResponseError({

View file

@ -11,7 +11,11 @@ import type { Writable } from 'stream';
import { errors as esErrors, estypes } from '@elastic/elasticsearch';
import type { IScopedClusterClient, IUiSettingsClient, Logger } from '@kbn/core/server';
import type { ISearchSource, ISearchStartSearchSource } from '@kbn/data-plugin/common';
import type {
IKibanaSearchResponse,
ISearchSource,
ISearchStartSearchSource,
} from '@kbn/data-plugin/common';
import { ES_SEARCH_STRATEGY, cellHasFormulas, tabifyDocs } from '@kbn/data-plugin/common';
import type { IScopedSearchClient } from '@kbn/data-plugin/server';
import type { Datatable } from '@kbn/expressions-plugin/server';
@ -94,6 +98,38 @@ export class CsvGenerator {
return pitId;
}
/**
* @param clientDetails: Details from the data.search client
* @param results: Raw data from ES
*/
private logResults(
clientDetails: Omit<IKibanaSearchResponse<unknown>, 'rawResponse'>,
results: estypes.SearchResponse<unknown>
) {
const { hits: resultsHits, ...headerWithPit } = results;
const { hits, ...hitsMeta } = resultsHits;
const trackedTotal = resultsHits.total as estypes.SearchTotalHits;
const currentTotal = trackedTotal?.value ?? resultsHits.total;
const totalAccuracy = trackedTotal?.relation ?? 'unknown';
this.logger.debug(`Received total hits: ${currentTotal}. Accuracy: ${totalAccuracy}.`);
// reconstruct the data.search response (w/out the data) for logging
const { pit_id: newPitId, ...header } = headerWithPit;
const logInfo = {
...clientDetails,
rawResponse: {
...header,
hits: hitsMeta,
pit_id: `${this.formatPit(newPitId)}`,
},
};
this.logger.debug(`Result details: ${JSON.stringify(logInfo)}`);
// use the most recently received id for the next search request
this.logger.debug(`Received PIT ID: [${this.formatPit(results.pit_id)}]`);
}
private async doSearch(
searchSource: ISearchSource,
settings: CsvExportSettings,
@ -117,25 +153,20 @@ export class CsvGenerator {
throw new Error('Could not retrieve the search body!');
}
const searchParams = {
params: {
body: searchBody,
},
};
let results: estypes.SearchResponse<unknown> | undefined;
const searchParams = { params: { body: searchBody } };
let results: estypes.SearchResponse<unknown>;
try {
results = (
await lastValueFrom(
this.clients.data.search(searchParams, {
strategy: ES_SEARCH_STRATEGY,
transport: {
maxRetries: 0, // retrying reporting jobs is handled in the task manager scheduling logic
requestTimeout: scrollSettings.duration,
},
})
)
).rawResponse;
const { rawResponse, ...rawDetails } = await lastValueFrom(
this.clients.data.search(searchParams, {
strategy: ES_SEARCH_STRATEGY,
transport: {
maxRetries: 0, // retrying reporting jobs is handled in the task manager scheduling logic
requestTimeout: settings.scroll.duration,
},
})
);
results = rawResponse;
this.logResults(rawDetails, rawResponse);
} catch (err) {
this.logger.error(`CSV export search error: ${err}`);
throw err;
@ -327,7 +358,6 @@ export class CsvGenerator {
let first = true;
let currentRecord = -1;
let totalRecords: number | undefined;
let totalRelation = 'eq';
let searchAfter: estypes.SortResults | undefined;
let pitId = await this.openPointInTime(indexPatternTitle, settings);
@ -360,47 +390,31 @@ export class CsvGenerator {
searchSource.setField('pit', { id: pitId, keep_alive: settings.scroll.duration });
const results = await this.doSearch(searchSource, settings, searchAfter);
const { hits } = results;
if (first && hits.total != null) {
if (typeof hits.total === 'number') {
totalRecords = hits.total;
} else {
totalRecords = hits.total?.value;
totalRelation = hits.total?.relation ?? 'unknown';
}
this.logger.info(`Total hits ${totalRelation} ${totalRecords}.`);
}
if (!results) {
this.logger.warn(`Search results are undefined!`);
break;
}
const {
hits: { hits: _hits, ...hitsMeta },
...headerWithPit
} = results;
const { hits: resultsHits } = results;
const { hits, total } = resultsHits;
const trackedTotal = total as estypes.SearchTotalHits;
const currentTotal = trackedTotal?.value ?? total;
const { pit_id: newPitId, ...header } = headerWithPit;
const logInfo = {
header: { pit_id: `${this.formatPit(newPitId)}`, ...header },
hitsMeta,
};
this.logger.debug(`Results metadata: ${JSON.stringify(logInfo)}`);
if (first) {
// export stops when totalRecords have been accumulated (or the results have run out)
totalRecords = currentTotal;
}
// use the most recently received id for the next search request
this.logger.debug(`Received PIT ID: [${this.formatPit(results.pit_id)}]`);
pitId = results.pit_id ?? pitId;
// Update last sort results for next query. PIT is used, so the sort results
// automatically include _shard_doc as a tiebreaker
searchAfter = hits.hits[hits.hits.length - 1]?.sort as estypes.SortResults | undefined;
searchAfter = hits[hits.length - 1]?.sort as estypes.SortResults | undefined;
this.logger.debug(`Received search_after: [${searchAfter}]`);
// check for shard failures, log them and add a warning if found
const { _shards: shards } = header;
const { _shards: shards } = results;
if (shards.failures) {
shards.failures.forEach(({ reason }) => {
warnings.push(`Shard failure: ${JSON.stringify(reason)}`);
@ -499,6 +513,9 @@ export class CsvGenerator {
};
}
/**
* Method to avoid logging the entire PIT: it could be megabytes long
*/
private formatPit(pitId: string | undefined) {
const byteSize = pitId ? Buffer.byteLength(pitId, 'utf-8') : 0;
return pitId?.substring(0, 12) + `[${byteSize} bytes]`;