[Profiling] Improve performance when decoding stacktraces (#143084)

* Derive address and file ID from base64 encoding This skips the intermediate deserialization step to a buffer object. * Move run-length encoding methods * Decode run-length directly from base64 encoding This skips the intermediate deserialization step to a buffer object. * Minor refactor Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
2025-04-24 17:59:23 -04:00 · 2022-10-24 16:24:31 -07:00 · 2022-10-24 16:24:31 -07:00 · a4b20e6e89
commit a4b20e6e89
parent 21c7f5e074
7 changed files with 440 additions and 191 deletions
--- a/x-pack/plugins/profiling/server/routes/stacktrace.test.ts
+++ b/x-pack/plugins/profiling/server/routes/stacktrace.test.ts
@ -6,12 +6,8 @@
 */

 import { createStackFrameID, StackTrace } from '../../common/profiling';
-import {
-  decodeStackTrace,
-  EncodedStackTrace,
-  runLengthDecode,
-  runLengthEncode,
-} from './stacktrace';
+import { runLengthEncode } from '../../common/run_length_encoding';
+import { decodeStackTrace, EncodedStackTrace } from './stacktrace';

 enum fileID {
  A = 'aQpJmTLWydNvOapSFZOwKg',
@ -89,100 +85,4 @@ describe('Stack trace operations', () => {
      expect(decodeStackTrace(t.original)).toEqual(t.expected);
    }
  });
-
-  test('run length is fully reversible', () => {
-    const tests: number[][] = [[], [0], [0, 1, 2, 3], [0, 1, 1, 2, 2, 2, 3, 3, 3, 3]];
-
-    for (const t of tests) {
-      expect(runLengthDecode(runLengthEncode(t))).toEqual(t);
-    }
-  });
-
-  test('runLengthDecode with optional parameter', () => {
-    const tests: Array<{
-      bytes: Buffer;
-      expected: number[];
-    }> = [
-      {
-        bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
-        expected: [0, 0, 0, 0, 0, 2, 2],
-      },
-      {
-        bytes: Buffer.from([0x1, 0x8]),
-        expected: [8],
-      },
-    ];
-
-    for (const t of tests) {
-      expect(runLengthDecode(t.bytes, t.expected.length)).toEqual(t.expected);
-    }
-  });
-
-  test('runLengthDecode with larger output than available input', () => {
-    const bytes = Buffer.from([0x5, 0x0, 0x2, 0x2]);
-    const decoded = [0, 0, 0, 0, 0, 2, 2];
-    const expected = decoded.concat(Array(decoded.length).fill(0));
-
-    expect(runLengthDecode(bytes, expected.length)).toEqual(expected);
-  });
-
-  test('runLengthDecode without optional parameter', () => {
-    const tests: Array<{
-      bytes: Buffer;
-      expected: number[];
-    }> = [
-      {
-        bytes: Buffer.from([0x5, 0x0, 0x2, 0x2]),
-        expected: [0, 0, 0, 0, 0, 2, 2],
-      },
-      {
-        bytes: Buffer.from([0x1, 0x8]),
-        expected: [8],
-      },
-    ];
-
-    for (const t of tests) {
-      expect(runLengthDecode(t.bytes)).toEqual(t.expected);
-    }
-  });
-
-  test('runLengthDecode works for very long runs', () => {
-    const tests: Array<{
-      bytes: Buffer;
-      expected: number[];
-    }> = [
-      {
-        bytes: Buffer.from([0x5, 0x2, 0xff, 0x0]),
-        expected: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
-      },
-      {
-        bytes: Buffer.from([0xff, 0x2, 0x1, 0x2]),
-        expected: Array(256).fill(2),
-      },
-    ];
-
-    for (const t of tests) {
-      expect(runLengthDecode(t.bytes)).toEqual(t.expected);
-    }
-  });
-
-  test('runLengthEncode works for very long runs', () => {
-    const tests: Array<{
-      numbers: number[];
-      expected: Buffer;
-    }> = [
-      {
-        numbers: [2, 2, 2, 2, 2].concat(Array(255).fill(0)),
-        expected: Buffer.from([0x5, 0x2, 0xff, 0x0]),
-      },
-      {
-        numbers: Array(256).fill(2),
-        expected: Buffer.from([0xff, 0x2, 0x1, 0x2]),
-      },
-    ];
-
-    for (const t of tests) {
-      expect(runLengthEncode(t.numbers)).toEqual(t.expected);
-    }
-  });
 });
--- a/x-pack/plugins/profiling/server/routes/stacktrace.ts
+++ b/x-pack/plugins/profiling/server/routes/stacktrace.ts
@ -21,11 +21,14 @@ import {
  emptyStackFrame,
  Executable,
  FileID,
+  getAddressFromStackFrameID,
+  getFileIDFromStackFrameID,
  StackFrame,
  StackFrameID,
  StackTrace,
  StackTraceID,
 } from '../../common/profiling';
+import { runLengthDecodeBase64Url } from '../../common/run_length_encoding';
 import { ProfilingESClient } from '../utils/create_profiling_es_client';
 import { withProfilingSpan } from '../utils/with_profiling_span';
 import { DownsampledEventsIndex } from './downsampling';
@ -52,87 +55,6 @@ export type EncodedStackTrace = DedotObject<{
  [ProfilingESField.StacktraceFrameTypes]: string;
 }>;

-// runLengthEncode run-length encodes the input array.
-//
-// The input is a list of uint8s. The output is a binary stream of
-// 2-byte pairs (first byte is the length and the second byte is the
-// binary representation of the object) in reverse order.
-//
-// E.g. uint8 array [0, 0, 0, 0, 0, 2, 2, 2] is converted into the byte
-// array [5, 0, 3, 2].
-export function runLengthEncode(input: number[]): Buffer {
-  const output: number[] = [];
-
-  if (input.length === 0) {
-    return Buffer.from(output);
-  }
-
-  let count = 1;
-  let current = input[0];
-
-  for (let i = 1; i < input.length; i++) {
-    const next = input[i];
-
-    if (next === current && count < 255) {
-      count++;
-      continue;
-    }
-
-    output.push(count, current);
-
-    count = 1;
-    current = next;
-  }
-
-  output.push(count, current);
-
-  return Buffer.from(output);
-}
-
-// runLengthDecode decodes a run-length encoding for the input array.
-//
-// The input is a binary stream of 2-byte pairs (first byte is the length and the
-// second byte is the binary representation of the object). The output is a list of
-// uint8s.
-//
-// E.g. byte array [5, 0, 3, 2] is converted into an uint8 array like
-// [0, 0, 0, 0, 0, 2, 2, 2].
-export function runLengthDecode(input: Buffer, outputSize?: number): number[] {
-  let size;
-
-  if (typeof outputSize === 'undefined') {
-    size = 0;
-    for (let i = 0; i < input.length; i += 2) {
-      size += input[i];
-    }
-  } else {
-    size = outputSize;
-  }
-
-  const output: number[] = new Array(size);
-
-  let idx = 0;
-  for (let i = 0; i < input.length; i += 2) {
-    for (let j = 0; j < input[i]; j++) {
-      output[idx] = input[i + 1];
-      idx++;
-    }
-  }
-
-  // Due to truncation of the frame types for stacktraces longer than 255,
-  // the expected output size and the actual decoded size can be different.
-  // Ordinarily, these two values should be the same.
-  //
-  // We have decided to fill in the remainder of the output array with zeroes
-  // as a reasonable default. Without this step, the output array would have
-  // undefined values.
-  for (let i = idx; i < size; i++) {
-    output[i] = 0;
-  }
-
-  return output;
-}
-
 // decodeStackTrace unpacks an encoded stack trace from Elasticsearch
 export function decodeStackTrace(input: EncodedStackTrace): StackTrace {
  const inputFrameIDs = input.Stacktrace.frame.ids;
@ -152,19 +74,15 @@ export function decodeStackTrace(input: EncodedStackTrace): StackTrace {
  // However, since the file ID is base64-encoded using 21.33 bytes
  // (16 * 4 / 3), then the 22 bytes have an extra 4 bits from the
  // address (see diagram in definition of EncodedStackTrace).
-  for (let i = 0; i < countsFrameIDs; i++) {
-    const pos = i * BASE64_FRAME_ID_LENGTH;
+  for (let i = 0, pos = 0; i < countsFrameIDs; i++, pos += BASE64_FRAME_ID_LENGTH) {
    const frameID = inputFrameIDs.slice(pos, pos + BASE64_FRAME_ID_LENGTH);
-    const buf = Buffer.from(frameID, 'base64url');
-
-    fileIDs[i] = buf.toString('base64url', 0, 16);
-    addressOrLines[i] = Number(buf.readBigUInt64BE(16));
    frameIDs[i] = frameID;
+    fileIDs[i] = getFileIDFromStackFrameID(frameID);
+    addressOrLines[i] = getAddressFromStackFrameID(frameID);
  }

  // Step 2: Convert the run-length byte encoding into a list of uint8s.
-  const types = Buffer.from(inputFrameTypes, 'base64url');
-  const typeIDs = runLengthDecode(types, countsFrameIDs);
+  const typeIDs = runLengthDecodeBase64Url(inputFrameTypes, inputFrameTypes.length, countsFrameIDs);

  return {
    AddressOrLines: addressOrLines,