[Profiling] Improve performance when decoding stacktraces (#143084)

* Derive address and file ID from base64 encoding

This skips the intermediate deserialization step to a buffer object.

* Move run-length encoding methods

* Decode run-length directly from base64 encoding

This skips the intermediate deserialization step to a buffer object.

* Minor refactor

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Joseph Crail 2022-10-24 16:24:31 -07:00 committed by GitHub
parent 21c7f5e074
commit a4b20e6e89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 440 additions and 191 deletions

View file

@ -6,12 +6,8 @@
*/
import { createStackFrameID, StackTrace } from '../../common/profiling';
import {
decodeStackTrace,
EncodedStackTrace,
runLengthDecode,
runLengthEncode,
} from './stacktrace';
import { runLengthEncode } from '../../common/run_length_encoding';
import { decodeStackTrace, EncodedStackTrace } from './stacktrace';
enum fileID {
A = 'aQpJmTLWydNvOapSFZOwKg',
@ -89,100 +85,4 @@ describe('Stack trace operations', () => {
expect(decodeStackTrace(t.original)).toEqual(t.expected);
}
});
test('run length is fully reversible', () => {
const tests: number[][] = [[], [0], [0, 1, 2, 3], [0, 1, 1, 2, 2, 2, 3, 3, 3, 3]];
for (const t of tests) {
expect(runLengthDecode(runLengthEncode(t))).toEqual(t);
}
});
test('runLengthDecode with optional parameter', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
expected: [0, 0, 0, 0, 0, 2, 2],
},
{
bytes: Buffer.from([0x1, 0x8]),
expected: [8],
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes, t.expected.length)).toEqual(t.expected);
}
});
test('runLengthDecode with larger output than available input', () => {
const bytes = Buffer.from([0x5, 0x0, 0x2, 0x2]);
const decoded = [0, 0, 0, 0, 0, 2, 2];
const expected = decoded.concat(Array(decoded.length).fill(0));
expect(runLengthDecode(bytes, expected.length)).toEqual(expected);
});
test('runLengthDecode without optional parameter', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
expected: [0, 0, 0, 0, 0, 2, 2],
},
{
bytes: Buffer.from([0x1, 0x8]),
expected: [8],
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes)).toEqual(t.expected);
}
});
test('runLengthDecode works for very long runs', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x2, 0xff, 0x0]),
expected: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
},
{
bytes: Buffer.from([0xff, 0x2, 0x1, 0x2]),
expected: Array(256).fill(2),
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes)).toEqual(t.expected);
}
});
test('runLengthEncode works for very long runs', () => {
const tests: Array<{
numbers: number[];
expected: Buffer;
}> = [
{
numbers: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
expected: Buffer.from([0x5, 0x2, 0xff, 0x0]),
},
{
numbers: Array(256).fill(2),
expected: Buffer.from([0xff, 0x2, 0x1, 0x2]),
},
];
for (const t of tests) {
expect(runLengthEncode(t.numbers)).toEqual(t.expected);
}
});
});

View file

@ -21,11 +21,14 @@ import {
emptyStackFrame,
Executable,
FileID,
getAddressFromStackFrameID,
getFileIDFromStackFrameID,
StackFrame,
StackFrameID,
StackTrace,
StackTraceID,
} from '../../common/profiling';
import { runLengthDecodeBase64Url } from '../../common/run_length_encoding';
import { ProfilingESClient } from '../utils/create_profiling_es_client';
import { withProfilingSpan } from '../utils/with_profiling_span';
import { DownsampledEventsIndex } from './downsampling';
@ -52,87 +55,6 @@ export type EncodedStackTrace = DedotObject<{
[ProfilingESField.StacktraceFrameTypes]: string;
}>;
// runLengthEncode run-length encodes the input array.
//
// The input is a list of uint8s. The output is a binary stream of
// 2-byte pairs (first byte is the length and the second byte is the
// binary representation of the object) in reverse order.
//
// E.g. uint8 array [0, 0, 0, 0, 0, 2, 2, 2] is converted into the byte
// array [5, 0, 3, 2].
export function runLengthEncode(input: number[]): Buffer {
const output: number[] = [];
if (input.length === 0) {
return Buffer.from(output);
}
let count = 1;
let current = input[0];
for (let i = 1; i < input.length; i++) {
const next = input[i];
if (next === current && count < 255) {
count++;
continue;
}
output.push(count, current);
count = 1;
current = next;
}
output.push(count, current);
return Buffer.from(output);
}
// runLengthDecode decodes a run-length encoding for the input array.
//
// The input is a binary stream of 2-byte pairs (first byte is the length and the
// second byte is the binary representation of the object). The output is a list of
// uint8s.
//
// E.g. byte array [5, 0, 3, 2] is converted into an uint8 array like
// [0, 0, 0, 0, 0, 2, 2, 2].
export function runLengthDecode(input: Buffer, outputSize?: number): number[] {
let size;
if (typeof outputSize === 'undefined') {
size = 0;
for (let i = 0; i < input.length; i += 2) {
size += input[i];
}
} else {
size = outputSize;
}
const output: number[] = new Array(size);
let idx = 0;
for (let i = 0; i < input.length; i += 2) {
for (let j = 0; j < input[i]; j++) {
output[idx] = input[i + 1];
idx++;
}
}
// Due to truncation of the frame types for stacktraces longer than 255,
// the expected output size and the actual decoded size can be different.
// Ordinarily, these two values should be the same.
//
// We have decided to fill in the remainder of the output array with zeroes
// as a reasonable default. Without this step, the output array would have
// undefined values.
for (let i = idx; i < size; i++) {
output[i] = 0;
}
return output;
}
// decodeStackTrace unpacks an encoded stack trace from Elasticsearch
export function decodeStackTrace(input: EncodedStackTrace): StackTrace {
const inputFrameIDs = input.Stacktrace.frame.ids;
@ -152,19 +74,15 @@ export function decodeStackTrace(input: EncodedStackTrace): StackTrace {
// However, since the file ID is base64-encoded using 21.33 bytes
// (16 * 4 / 3), then the 22 bytes have an extra 4 bits from the
// address (see diagram in definition of EncodedStackTrace).
for (let i = 0; i < countsFrameIDs; i++) {
const pos = i * BASE64_FRAME_ID_LENGTH;
for (let i = 0, pos = 0; i < countsFrameIDs; i++, pos += BASE64_FRAME_ID_LENGTH) {
const frameID = inputFrameIDs.slice(pos, pos + BASE64_FRAME_ID_LENGTH);
const buf = Buffer.from(frameID, 'base64url');
fileIDs[i] = buf.toString('base64url', 0, 16);
addressOrLines[i] = Number(buf.readBigUInt64BE(16));
frameIDs[i] = frameID;
fileIDs[i] = getFileIDFromStackFrameID(frameID);
addressOrLines[i] = getAddressFromStackFrameID(frameID);
}
// Step 2: Convert the run-length byte encoding into a list of uint8s.
const types = Buffer.from(inputFrameTypes, 'base64url');
const typeIDs = runLengthDecode(types, countsFrameIDs);
const typeIDs = runLengthDecodeBase64Url(inputFrameTypes, inputFrameTypes.length, countsFrameIDs);
return {
AddressOrLines: addressOrLines,