[Profiling] Improve performance when decoding stacktraces (#143084)

* Derive address and file ID from base64 encoding

This skips the intermediate deserialization step to a buffer object.

* Move run-length encoding methods

* Decode run-length directly from base64 encoding

This skips the intermediate deserialization step to a buffer object.

* Minor refactor

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Joseph Crail 2022-10-24 16:24:31 -07:00 committed by GitHub
parent 21c7f5e074
commit a4b20e6e89
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 440 additions and 191 deletions

View file

@ -0,0 +1,22 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
export const safeBase64Decoder = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0,
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 0, 0, 0, 0, 63, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0,
];
export const safeBase64Encoder =
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz01234456789-_';
/* eslint no-bitwise: ["error", { "allow": ["&"] }] */
export function charCodeAt(input: string, i: number): number {
return safeBase64Decoder[input.charCodeAt(i) & 0x7f];
}

View file

@ -6,12 +6,23 @@
*/
import {
createStackFrameID,
createStackFrameMetadata,
FrameType,
getAddressFromStackFrameID,
getCalleeFunction,
getCalleeSource,
getFileIDFromStackFrameID,
} from './profiling';
describe('Stack frame operations', () => {
test('decode stack frame ID', () => {
const frameID = createStackFrameID('ABCDEFGHIJKLMNOPQRSTUw', 123456789);
expect(getAddressFromStackFrameID(frameID)).toEqual(123456789);
expect(getFileIDFromStackFrameID(frameID)).toEqual('ABCDEFGHIJKLMNOPQRSTUw');
});
});
describe('Stack frame metadata operations', () => {
test('metadata has executable and function names', () => {
const metadata = createStackFrameMetadata({

View file

@ -5,6 +5,8 @@
* 2.0.
*/
import { charCodeAt, safeBase64Encoder } from './base64';
export type StackTraceID = string;
export type StackFrameID = string;
export type FileID = string;
@ -16,6 +18,37 @@ export function createStackFrameID(fileID: FileID, addressOrLine: number): Stack
return buf.toString('base64url');
}
/* eslint no-bitwise: ["error", { "allow": ["&"] }] */
export function getFileIDFromStackFrameID(frameID: StackFrameID): FileID {
return frameID.slice(0, 21) + safeBase64Encoder[frameID.charCodeAt(21) & 0x30];
}
/* eslint no-bitwise: ["error", { "allow": ["<<=", "&"] }] */
export function getAddressFromStackFrameID(frameID: StackFrameID): number {
let address = charCodeAt(frameID, 21) & 0xf;
address <<= 6;
address += charCodeAt(frameID, 22);
address <<= 6;
address += charCodeAt(frameID, 23);
address <<= 6;
address += charCodeAt(frameID, 24);
address <<= 6;
address += charCodeAt(frameID, 25);
address <<= 6;
address += charCodeAt(frameID, 26);
address <<= 6;
address += charCodeAt(frameID, 27);
address <<= 6;
address += charCodeAt(frameID, 28);
address <<= 6;
address += charCodeAt(frameID, 29);
address <<= 6;
address += charCodeAt(frameID, 30);
address <<= 6;
address += charCodeAt(frameID, 31);
return address;
}
export enum FrameType {
Unsymbolized = 0,
Python,

View file

@ -0,0 +1,166 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { runLengthDecode, runLengthDecodeBase64Url, runLengthEncode } from './run_length_encoding';
describe('Run-length encoding operations', () => {
test('run length is fully reversible', () => {
const tests: number[][] = [[], [0], [0, 1, 2, 3], [0, 1, 1, 2, 2, 2, 3, 3, 3, 3]];
for (const t of tests) {
expect(runLengthDecode(runLengthEncode(t))).toEqual(t);
}
});
test('runLengthDecode with optional parameter', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
expected: [0, 0, 0, 0, 0, 2, 2],
},
{
bytes: Buffer.from([0x1, 0x8]),
expected: [8],
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes, t.expected.length)).toEqual(t.expected);
}
});
test('runLengthDecode with larger output than available input', () => {
const bytes = Buffer.from([0x5, 0x0, 0x2, 0x2]);
const decoded = [0, 0, 0, 0, 0, 2, 2];
const expected = decoded.concat(Array(decoded.length).fill(0));
expect(runLengthDecode(bytes, expected.length)).toEqual(expected);
});
test('runLengthDecode without optional parameter', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
expected: [0, 0, 0, 0, 0, 2, 2],
},
{
bytes: Buffer.from([0x1, 0x8]),
expected: [8],
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes)).toEqual(t.expected);
}
});
test('runLengthDecode works for very long runs', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x2, 0xff, 0x0]),
expected: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
},
{
bytes: Buffer.from([0xff, 0x2, 0x1, 0x2]),
expected: Array(256).fill(2),
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes)).toEqual(t.expected);
}
});
test('runLengthEncode works for very long runs', () => {
const tests: Array<{
numbers: number[];
expected: Buffer;
}> = [
{
numbers: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
expected: Buffer.from([0x5, 0x2, 0xff, 0x0]),
},
{
numbers: Array(256).fill(2),
expected: Buffer.from([0xff, 0x2, 0x1, 0x2]),
},
];
for (const t of tests) {
expect(runLengthEncode(t.numbers)).toEqual(t.expected);
}
});
test('runLengthDecodeBase64Url', () => {
const tests: Array<{
data: string;
expected: number[];
}> = [
{
data: 'CQM',
expected: [3, 3, 3, 3, 3, 3, 3, 3, 3],
},
{
data: 'EgMHBA',
expected: Array(18).fill(3).concat(Array(7).fill(4)),
},
{
data: 'CAMfBQIDEAQ',
expected: Array(8)
.fill(3)
.concat(Array(31).fill(5))
.concat([3, 3])
.concat(Array(16).fill(4)),
},
];
for (const t of tests) {
expect(runLengthDecodeBase64Url(t.data, t.data.length, t.expected.length)).toEqual(
t.expected
);
}
});
test('runLengthDecodeBase64Url with larger output than available input', () => {
const data = Buffer.from([0x5, 0x0, 0x3, 0x2]).toString('base64url');
const decoded = [0, 0, 0, 0, 0, 2, 2, 2];
const expected = decoded.concat(Array(decoded.length).fill(0));
expect(runLengthDecodeBase64Url(data, data.length, expected.length)).toEqual(expected);
});
test('runLengthDecodeBase64Url works for very long runs', () => {
const tests: Array<{
data: string;
expected: number[];
}> = [
{
data: Buffer.from([0x5, 0x2, 0xff, 0x0]).toString('base64url'),
expected: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
},
{
data: Buffer.from([0xff, 0x2, 0x1, 0x2]).toString('base64url'),
expected: Array(256).fill(2),
},
];
for (const t of tests) {
expect(runLengthDecodeBase64Url(t.data, t.data.length, t.expected.length)).toEqual(
t.expected
);
}
});
});

View file

@ -0,0 +1,199 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { charCodeAt } from './base64';
// runLengthEncode run-length encodes the input array.
//
// The input is a list of uint8s. The output is a binary stream of
// 2-byte pairs (first byte is the length and the second byte is the
// binary representation of the object) in reverse order.
//
// E.g. uint8 array [0, 0, 0, 0, 0, 2, 2, 2] is converted into the byte
// array [5, 0, 3, 2].
export function runLengthEncode(input: number[]): Buffer {
const output: number[] = [];
if (input.length === 0) {
return Buffer.from(output);
}
let count = 1;
let current = input[0];
for (let i = 1; i < input.length; i++) {
const next = input[i];
if (next === current && count < 255) {
count++;
continue;
}
output.push(count, current);
count = 1;
current = next;
}
output.push(count, current);
return Buffer.from(output);
}
function copyNumber(target: number[], value: number, offset: number, end: number) {
for (let i = offset; i < end; i++) {
target[i] = value;
}
}
// runLengthDecode decodes a run-length encoding for the input array.
//
// The input is a binary stream of 2-byte pairs (first byte is the length and the
// second byte is the binary representation of the object). The output is a list of
// uint8s.
//
// E.g. byte array [5, 0, 3, 2] is converted into an uint8 array like
// [0, 0, 0, 0, 0, 2, 2, 2].
export function runLengthDecode(input: Buffer, outputSize?: number): number[] {
let size;
if (typeof outputSize === 'undefined') {
size = 0;
for (let i = 0; i < input.length; i += 2) {
size += input[i];
}
} else {
size = outputSize;
}
const output: number[] = new Array(size);
let idx = 0;
for (let i = 0; i < input.length; i += 2) {
for (let j = 0; j < input[i]; j++) {
output[idx] = input[i + 1];
idx++;
}
}
// Due to truncation of the frame types for stacktraces longer than 255,
// the expected output size and the actual decoded size can be different.
// Ordinarily, these two values should be the same.
//
// We have decided to fill in the remainder of the output array with zeroes
// as a reasonable default. Without this step, the output array would have
// undefined values.
copyNumber(output, 0, idx, size);
return output;
}
// runLengthDecodeBase64Url decodes a run-length encoding for the
// base64-encoded input string.
//
// The input is a base64-encoded string. The output is a list of uint8s.
//
// E.g. string 'BQADAg' is converted into an uint8 array like
// [0, 0, 0, 0, 0, 2, 2, 2].
//
// The motivating intent for this method is to unpack a base64-encoded
// run-length encoding without using intermediate storage.
//
// This method relies on these assumptions and details:
// - array encoded using run-length and base64 always returns string of length
// 0, 3, or 6 (mod 8)
// - since original array is composed of uint8s, we ignore Unicode codepoints
// - JavaScript bitwise operators operate on 32-bits so decoding must be done
// in 32-bit chunks
/* eslint no-bitwise: ["error", { "allow": ["<<", ">>", ">>=", "&", "|"] }] */
export function runLengthDecodeBase64Url(input: string, size: number, capacity: number): number[] {
const output = new Array<number>(capacity);
const multipleOf8 = Math.floor(size / 8);
const remainder = size % 8;
let n = 0;
let count = 0;
let value = 0;
let i = 0;
let j = 0;
for (i = 0; i < multipleOf8; i += 8) {
n =
(charCodeAt(input, i) << 26) |
(charCodeAt(input, i + 1) << 20) |
(charCodeAt(input, i + 2) << 14) |
(charCodeAt(input, i + 3) << 8) |
(charCodeAt(input, i + 4) << 2) |
(charCodeAt(input, i + 5) >> 4);
count = (n >> 24) & 0xff;
value = (n >> 16) & 0xff;
copyNumber(output, value, j, j + count);
j += count;
count = (n >> 8) & 0xff;
value = n & 0xff;
copyNumber(output, value, j, j + count);
j += count;
n =
((charCodeAt(input, i + 5) & 0xf) << 12) |
(charCodeAt(input, i + 6) << 6) |
charCodeAt(input, i + 7);
count = (n >> 8) & 0xff;
value = n & 0xff;
copyNumber(output, value, j, j + count);
j += count;
}
if (remainder === 6) {
n =
(charCodeAt(input, i) << 26) |
(charCodeAt(input, i + 1) << 20) |
(charCodeAt(input, i + 2) << 14) |
(charCodeAt(input, i + 3) << 8) |
(charCodeAt(input, i + 4) << 2) |
(charCodeAt(input, i + 5) >> 4);
count = (n >> 24) & 0xff;
value = (n >> 16) & 0xff;
copyNumber(output, value, j, j + count);
j += count;
count = (n >> 8) & 0xff;
value = n & 0xff;
copyNumber(output, value, j, j + count);
j += count;
} else if (remainder === 3) {
n = (charCodeAt(input, i) << 12) | (charCodeAt(input, i + 1) << 6) | charCodeAt(input, i + 2);
n >>= 2;
count = (n >> 8) & 0xff;
value = n & 0xff;
copyNumber(output, value, j, j + count);
j += count;
}
// Due to truncation of the frame types for stacktraces longer than 255,
// the expected output size and the actual decoded size can be different.
// Ordinarily, these two values should be the same.
//
// We have decided to fill in the remainder of the output array with zeroes
// as a reasonable default. Without this step, the output array would have
// undefined values.
copyNumber(output, 0, j, capacity);
return output;
}

View file

@ -6,12 +6,8 @@
*/
import { createStackFrameID, StackTrace } from '../../common/profiling';
import {
decodeStackTrace,
EncodedStackTrace,
runLengthDecode,
runLengthEncode,
} from './stacktrace';
import { runLengthEncode } from '../../common/run_length_encoding';
import { decodeStackTrace, EncodedStackTrace } from './stacktrace';
enum fileID {
A = 'aQpJmTLWydNvOapSFZOwKg',
@ -89,100 +85,4 @@ describe('Stack trace operations', () => {
expect(decodeStackTrace(t.original)).toEqual(t.expected);
}
});
test('run length is fully reversible', () => {
const tests: number[][] = [[], [0], [0, 1, 2, 3], [0, 1, 1, 2, 2, 2, 3, 3, 3, 3]];
for (const t of tests) {
expect(runLengthDecode(runLengthEncode(t))).toEqual(t);
}
});
test('runLengthDecode with optional parameter', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
expected: [0, 0, 0, 0, 0, 2, 2],
},
{
bytes: Buffer.from([0x1, 0x8]),
expected: [8],
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes, t.expected.length)).toEqual(t.expected);
}
});
test('runLengthDecode with larger output than available input', () => {
const bytes = Buffer.from([0x5, 0x0, 0x2, 0x2]);
const decoded = [0, 0, 0, 0, 0, 2, 2];
const expected = decoded.concat(Array(decoded.length).fill(0));
expect(runLengthDecode(bytes, expected.length)).toEqual(expected);
});
test('runLengthDecode without optional parameter', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
expected: [0, 0, 0, 0, 0, 2, 2],
},
{
bytes: Buffer.from([0x1, 0x8]),
expected: [8],
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes)).toEqual(t.expected);
}
});
test('runLengthDecode works for very long runs', () => {
const tests: Array<{
bytes: Buffer;
expected: number[];
}> = [
{
bytes: Buffer.from([0x5, 0x2, 0xff, 0x0]),
expected: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
},
{
bytes: Buffer.from([0xff, 0x2, 0x1, 0x2]),
expected: Array(256).fill(2),
},
];
for (const t of tests) {
expect(runLengthDecode(t.bytes)).toEqual(t.expected);
}
});
test('runLengthEncode works for very long runs', () => {
const tests: Array<{
numbers: number[];
expected: Buffer;
}> = [
{
numbers: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
expected: Buffer.from([0x5, 0x2, 0xff, 0x0]),
},
{
numbers: Array(256).fill(2),
expected: Buffer.from([0xff, 0x2, 0x1, 0x2]),
},
];
for (const t of tests) {
expect(runLengthEncode(t.numbers)).toEqual(t.expected);
}
});
});

View file

@ -21,11 +21,14 @@ import {
emptyStackFrame,
Executable,
FileID,
getAddressFromStackFrameID,
getFileIDFromStackFrameID,
StackFrame,
StackFrameID,
StackTrace,
StackTraceID,
} from '../../common/profiling';
import { runLengthDecodeBase64Url } from '../../common/run_length_encoding';
import { ProfilingESClient } from '../utils/create_profiling_es_client';
import { withProfilingSpan } from '../utils/with_profiling_span';
import { DownsampledEventsIndex } from './downsampling';
@ -52,87 +55,6 @@ export type EncodedStackTrace = DedotObject<{
[ProfilingESField.StacktraceFrameTypes]: string;
}>;
// runLengthEncode run-length encodes the input array.
//
// The input is a list of uint8s. The output is a binary stream of
// 2-byte pairs (first byte is the length and the second byte is the
// binary representation of the object) in reverse order.
//
// E.g. uint8 array [0, 0, 0, 0, 0, 2, 2, 2] is converted into the byte
// array [5, 0, 3, 2].
export function runLengthEncode(input: number[]): Buffer {
const output: number[] = [];
if (input.length === 0) {
return Buffer.from(output);
}
let count = 1;
let current = input[0];
for (let i = 1; i < input.length; i++) {
const next = input[i];
if (next === current && count < 255) {
count++;
continue;
}
output.push(count, current);
count = 1;
current = next;
}
output.push(count, current);
return Buffer.from(output);
}
// runLengthDecode decodes a run-length encoding for the input array.
//
// The input is a binary stream of 2-byte pairs (first byte is the length and the
// second byte is the binary representation of the object). The output is a list of
// uint8s.
//
// E.g. byte array [5, 0, 3, 2] is converted into an uint8 array like
// [0, 0, 0, 0, 0, 2, 2, 2].
export function runLengthDecode(input: Buffer, outputSize?: number): number[] {
let size;
if (typeof outputSize === 'undefined') {
size = 0;
for (let i = 0; i < input.length; i += 2) {
size += input[i];
}
} else {
size = outputSize;
}
const output: number[] = new Array(size);
let idx = 0;
for (let i = 0; i < input.length; i += 2) {
for (let j = 0; j < input[i]; j++) {
output[idx] = input[i + 1];
idx++;
}
}
// Due to truncation of the frame types for stacktraces longer than 255,
// the expected output size and the actual decoded size can be different.
// Ordinarily, these two values should be the same.
//
// We have decided to fill in the remainder of the output array with zeroes
// as a reasonable default. Without this step, the output array would have
// undefined values.
for (let i = idx; i < size; i++) {
output[i] = 0;
}
return output;
}
// decodeStackTrace unpacks an encoded stack trace from Elasticsearch
export function decodeStackTrace(input: EncodedStackTrace): StackTrace {
const inputFrameIDs = input.Stacktrace.frame.ids;
@ -152,19 +74,15 @@ export function decodeStackTrace(input: EncodedStackTrace): StackTrace {
// However, since the file ID is base64-encoded using 21.33 bytes
// (16 * 4 / 3), then the 22 bytes have an extra 4 bits from the
// address (see diagram in definition of EncodedStackTrace).
for (let i = 0; i < countsFrameIDs; i++) {
const pos = i * BASE64_FRAME_ID_LENGTH;
for (let i = 0, pos = 0; i < countsFrameIDs; i++, pos += BASE64_FRAME_ID_LENGTH) {
const frameID = inputFrameIDs.slice(pos, pos + BASE64_FRAME_ID_LENGTH);
const buf = Buffer.from(frameID, 'base64url');
fileIDs[i] = buf.toString('base64url', 0, 16);
addressOrLines[i] = Number(buf.readBigUInt64BE(16));
frameIDs[i] = frameID;
fileIDs[i] = getFileIDFromStackFrameID(frameID);
addressOrLines[i] = getAddressFromStackFrameID(frameID);
}
// Step 2: Convert the run-length byte encoding into a list of uint8s.
const types = Buffer.from(inputFrameTypes, 'base64url');
const typeIDs = runLengthDecode(types, countsFrameIDs);
const typeIDs = runLengthDecodeBase64Url(inputFrameTypes, inputFrameTypes.length, countsFrameIDs);
return {
AddressOrLines: addressOrLines,