[Profiling] New Profiling ES Flamegraph API (#167477)

New setting was added to go back to use the old one:
<img width="1005" alt="Screenshot 2023-09-28 at 10 03 08"
src="051931d6-01f5-4a33-81a4-41e3f6dd047b">

In the old API, some logic was still being handled by kibana to create
the flamegraph data. This logic was moved to the Profiling ES plugin and
a new API was created: `_profiling/flamegraph`. We've proven that this
has improved the loading time to render the profiling in ~3s.

---------

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Cauê Marcondes 2023-10-02 16:22:49 +01:00 committed by GitHub
parent 7d777423b9
commit 0b48b92310
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 93561 additions and 25 deletions

View file

@ -593,4 +593,8 @@ export const stackManagementSchema: MakeSchemaFrom<UsageStats> = {
type: 'boolean',
_meta: { description: 'Non-default value of setting.' },
},
'observability:profilingUseLegacyFlamegraphAPI': {
type: 'boolean',
_meta: { description: 'Non-default value of setting.' },
},
};

View file

@ -155,6 +155,7 @@ export interface UsageStats {
'securitySolution:enableGroupedNav': boolean;
'securitySolution:showRelatedIntegrations': boolean;
'visualization:visualize:legacyGaugeChartsLibrary': boolean;
'observability:profilingUseLegacyFlamegraphAPI': boolean;
'observability:profilingPerCoreWatt': number;
'observability:profilingCo2PerKWH': number;
'observability:profilingDatacenterPUE': number;

View file

@ -10078,6 +10078,12 @@
"_meta": {
"description": "Non-default value of setting."
}
},
"observability:profilingUseLegacyFlamegraphAPI": {
"type": "boolean",
"_meta": {
"description": "Non-default value of setting."
}
}
}
},

View file

@ -8,6 +8,7 @@
import { toNumberRt } from '@kbn/io-ts-utils';
import type { BaseFlameGraph, TopNFunctions } from '@kbn/profiling-utils';
import * as t from 'io-ts';
import { profilingUseLegacyFlamegraphAPI } from '@kbn/observability-plugin/common';
import { HOST_NAME } from '../../../common/es_fields/apm';
import { toKueryFilterFormat } from '../../../common/utils/to_kuery_filter_format';
import { getApmEventClient } from '../../lib/helpers/get_apm_event_client';
@ -36,6 +37,10 @@ const profilingFlamegraphRoute = createApmServerRoute({
{ flamegraph: BaseFlameGraph; hostNames: string[] } | undefined
> => {
const { context, plugins, params } = resources;
const useLegacyFlamegraphAPI = await (
await context.core
).uiSettings.client.get<boolean>(profilingUseLegacyFlamegraphAPI);
const [esClient, apmEventClient, profilingDataAccessStart] =
await Promise.all([
(await context.core).elasticsearch.client,
@ -67,6 +72,7 @@ const profilingFlamegraphRoute = createApmServerRoute({
rangeFromMs: start,
rangeToMs: end,
kuery: toKueryFilterFormat(HOST_NAME, serviceHostNames),
useLegacyFlamegraphAPI,
});
return { flamegraph, hostNames: serviceHostNames };

View file

@ -41,6 +41,7 @@ export {
enableCriticalPath,
syntheticsThrottlingEnabled,
apmEnableProfilingIntegration,
profilingUseLegacyFlamegraphAPI,
profilingCo2PerKWH,
profilingDatacenterPUE,
profilingPerCoreWatt,

View file

@ -27,6 +27,7 @@ export const apmEnableContinuousRollups = 'observability:apmEnableContinuousRoll
export const syntheticsThrottlingEnabled = 'observability:syntheticsThrottlingEnabled';
export const enableLegacyUptimeApp = 'observability:enableLegacyUptimeApp';
export const apmEnableProfilingIntegration = 'observability:apmEnableProfilingIntegration';
export const profilingUseLegacyFlamegraphAPI = 'observability:profilingUseLegacyFlamegraphAPI';
export const profilingPerCoreWatt = 'observability:profilingPerCoreWatt';
export const profilingCo2PerKWH = 'observability:profilingCo2PerKWH';
export const profilingDatacenterPUE = 'observability:profilingDatacenterPUE';

View file

@ -30,6 +30,7 @@ import {
syntheticsThrottlingEnabled,
enableLegacyUptimeApp,
apmEnableProfilingIntegration,
profilingUseLegacyFlamegraphAPI,
profilingCo2PerKWH,
profilingDatacenterPUE,
profilingPerCoreWatt,
@ -377,6 +378,14 @@ export const uiSettings: Record<string, UiSettings> = {
schema: schema.boolean(),
requiresPageReload: false,
},
[profilingUseLegacyFlamegraphAPI]: {
category: [observabilityFeatureId],
name: i18n.translate('xpack.observability.profilingUseLegacyFlamegraphAPI', {
defaultMessage: 'Use legacy Flamegraph API in Universal Profiling',
}),
value: false,
schema: schema.boolean(),
},
[profilingPerCoreWatt]: {
category: [observabilityFeatureId],
name: i18n.translate('xpack.observability.profilingPerCoreWattUiSettingName', {

View file

@ -6,6 +6,7 @@
*/
import { schema } from '@kbn/config-schema';
import { profilingUseLegacyFlamegraphAPI } from '@kbn/observability-plugin/common';
import { RouteRegisterParameters } from '.';
import { getRoutePaths } from '../../common';
import { handleRouteHandlerError } from '../utils/handle_route_error_handler';
@ -33,6 +34,9 @@ export function registerFlameChartSearchRoute({
},
async (context, request, response) => {
const { timeFrom, timeTo, kuery } = request.query;
const useLegacyFlamegraphAPI = await (
await context.core
).uiSettings.client.get<boolean>(profilingUseLegacyFlamegraphAPI);
try {
const esClient = await getClient(context);
@ -41,6 +45,7 @@ export function registerFlameChartSearchRoute({
rangeFromMs: timeFrom,
rangeToMs: timeTo,
kuery,
useLegacyFlamegraphAPI,
});
return response.ok({ body: flamegraph });

View file

@ -58,6 +58,17 @@ describe('TopN data from Elasticsearch', () => {
}) as Promise<any>
),
getEsClient: jest.fn(() => context.elasticsearch.client.asCurrentUser),
profilingFlamegraph: jest.fn(
(request) =>
context.elasticsearch.client.asCurrentUser.transport.request({
method: 'POST',
path: encodeURI('_profiling/flamegraph'),
body: {
query: request.query,
sample_size: request.sampleSize,
},
}) as Promise<any>
),
};
const logger = loggerMock.create();

View file

@ -10,7 +10,11 @@ import type { ESSearchRequest, InferSearchResponseOf } from '@kbn/es-types';
import type { KibanaRequest } from '@kbn/core/server';
import { unwrapEsResponse } from '@kbn/observability-plugin/server';
import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import type { ProfilingStatusResponse, StackTraceResponse } from '@kbn/profiling-utils';
import type {
BaseFlameGraph,
ProfilingStatusResponse,
StackTraceResponse,
} from '@kbn/profiling-utils';
import { withProfilingSpan } from './with_profiling_span';
export function cancelEsRequestOnAbort<T extends Promise<any>>(
@ -36,6 +40,10 @@ export interface ProfilingESClient {
}): Promise<StackTraceResponse>;
profilingStatus(): Promise<ProfilingStatusResponse>;
getEsClient(): ElasticsearchClient;
profilingFlamegraph({}: {
query: QueryDslQueryContainer;
sampleSize: number;
}): Promise<BaseFlameGraph>;
}
export function createProfilingEsClient({
@ -118,5 +126,26 @@ export function createProfilingEsClient({
getEsClient() {
return esClient;
},
profilingFlamegraph({ query, sampleSize }) {
const controller = new AbortController();
const promise = withProfilingSpan('_profiling/flamegraph', () => {
return esClient.transport.request(
{
method: 'POST',
path: encodeURI('/_profiling/flamegraph'),
body: {
query,
sample_size: sampleSize,
},
},
{
signal: controller.signal,
meta: true,
}
);
});
return unwrapEsResponse(promise) as Promise<BaseFlameGraph>;
},
};
}

View file

@ -8,7 +8,11 @@
import { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/typesWithBodyKey';
import { ElasticsearchClient } from '@kbn/core/server';
import type { ESSearchRequest, InferSearchResponseOf } from '@kbn/es-types';
import type { ProfilingStatusResponse, StackTraceResponse } from '@kbn/profiling-utils';
import type {
BaseFlameGraph,
ProfilingStatusResponse,
StackTraceResponse,
} from '@kbn/profiling-utils';
export interface ProfilingESClient {
search<TDocument = unknown, TSearchRequest extends ESSearchRequest = ESSearchRequest>(
@ -21,4 +25,8 @@ export interface ProfilingESClient {
}): Promise<StackTraceResponse>;
profilingStatus(): Promise<ProfilingStatusResponse>;
getEsClient(): ElasticsearchClient;
profilingFlamegraph({}: {
query: QueryDslQueryContainer;
sampleSize: number;
}): Promise<BaseFlameGraph>;
}

View file

@ -7,6 +7,7 @@
import { ElasticsearchClient } from '@kbn/core/server';
import { createBaseFlameGraph, createCalleeTree } from '@kbn/profiling-utils';
import { kqlQuery } from '../../utils/query';
import { withProfilingSpan } from '../../utils/with_profiling_span';
import { RegisterServicesParams } from '../register_services';
import { searchStackTraces } from '../search_stack_traces';
@ -16,40 +17,69 @@ export interface FetchFlamechartParams {
rangeFromMs: number;
rangeToMs: number;
kuery: string;
useLegacyFlamegraphAPI?: boolean;
}
const targetSampleSize = 20000; // minimum number of samples to get statistically sound results
export function createFetchFlamechart({ createProfilingEsClient }: RegisterServicesParams) {
return async ({ esClient, rangeFromMs, rangeToMs, kuery }: FetchFlamechartParams) => {
return async ({
esClient,
rangeFromMs,
rangeToMs,
kuery,
useLegacyFlamegraphAPI = false,
}: FetchFlamechartParams) => {
const rangeFromSecs = rangeFromMs / 1000;
const rangeToSecs = rangeToMs / 1000;
const profilingEsClient = createProfilingEsClient({ esClient });
const targetSampleSize = 20000; // minimum number of samples to get statistically sound results
const totalSeconds = rangeToSecs - rangeFromSecs;
// Use legacy stack traces API to fetch the flamegraph
if (useLegacyFlamegraphAPI) {
const { events, stackTraces, executables, stackFrames, totalFrames, samplingRate } =
await searchStackTraces({
client: profilingEsClient,
rangeFrom: rangeFromSecs,
rangeTo: rangeToSecs,
kuery,
sampleSize: targetSampleSize,
});
const { events, stackTraces, executables, stackFrames, totalFrames, samplingRate } =
await searchStackTraces({
client: profilingEsClient,
rangeFrom: rangeFromSecs,
rangeTo: rangeToSecs,
kuery,
sampleSize: targetSampleSize,
return await withProfilingSpan('create_flamegraph', async () => {
const tree = createCalleeTree(
events,
stackTraces,
stackFrames,
executables,
totalFrames,
samplingRate
);
return createBaseFlameGraph(tree, samplingRate, totalSeconds);
});
}
const flamegraph = await withProfilingSpan('create_flamegraph', async () => {
const tree = createCalleeTree(
events,
stackTraces,
stackFrames,
executables,
totalFrames,
samplingRate
);
return createBaseFlameGraph(tree, samplingRate, totalSeconds);
const flamegraph = await profilingEsClient.profilingFlamegraph({
query: {
bool: {
filter: [
...kqlQuery(kuery),
{
range: {
['@timestamp']: {
gte: String(rangeFromSecs),
lt: String(rangeToSecs),
format: 'epoch_second',
},
},
},
],
},
},
sampleSize: targetSampleSize,
});
return flamegraph;
return { ...flamegraph, TotalSeconds: totalSeconds };
};
}

View file

@ -7,7 +7,11 @@
import { ElasticsearchClient } from '@kbn/core/server';
import type { ESSearchRequest, InferSearchResponseOf } from '@kbn/es-types';
import type { ProfilingStatusResponse, StackTraceResponse } from '@kbn/profiling-utils';
import type {
BaseFlameGraph,
ProfilingStatusResponse,
StackTraceResponse,
} from '@kbn/profiling-utils';
import { ProfilingESClient } from '../../common/profiling_es_client';
import { unwrapEsResponse } from './unwrap_es_response';
import { withProfilingSpan } from './with_profiling_span';
@ -77,5 +81,26 @@ export function createProfilingEsClient({
getEsClient() {
return esClient;
},
profilingFlamegraph({ query, sampleSize }) {
const controller = new AbortController();
const promise = withProfilingSpan('_profiling/flamegraph', () => {
return esClient.transport.request(
{
method: 'POST',
path: encodeURI('/_profiling/flamegraph'),
body: {
query,
sample_size: sampleSize,
},
},
{
signal: controller.signal,
meta: true,
}
);
});
return unwrapEsResponse(promise) as Promise<BaseFlameGraph>;
},
};
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,81 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { getRoutePaths } from '@kbn/profiling-plugin/common';
import { BaseFlameGraph } from '@kbn/profiling-utils';
import { sortBy } from 'lodash';
import { getBettertest } from '../common/bettertest';
import { FtrProviderContext } from '../common/ftr_provider_context';
import { loadProfilingData, setupProfiling } from '../utils/profiling_data';
const profilingRoutePaths = getRoutePaths();
type BaseFlameGraphKeys = keyof BaseFlameGraph;
export default function featureControlsTests({ getService }: FtrProviderContext) {
const registry = getService('registry');
const profilingApiClient = getService('profilingApiClient');
const log = getService('log');
const supertest = getService('supertest');
const bettertest = getBettertest(supertest);
const es = getService('es');
const start = new Date('2023-03-17T01:00:00.000Z').getTime();
const end = new Date('2023-03-17T01:00:30.000Z').getTime();
registry.when('Flamegraph api', { config: 'cloud' }, () => {
before(async () => {
await setupProfiling(bettertest, log);
await loadProfilingData(es, log);
});
describe('With data', () => {
let flamegraph: BaseFlameGraph;
before(async () => {
await setupProfiling(bettertest, log);
await loadProfilingData(es, log);
const response = await profilingApiClient.adminUser({
endpoint: `GET ${profilingRoutePaths.Flamechart}`,
params: {
query: {
timeFrom: start,
timeTo: end,
kuery: '',
},
},
});
flamegraph = response.body as BaseFlameGraph;
});
(
[
'AddressOrLine',
'FileID',
'FrameType',
'Inline',
'ExeFilename',
'AddressOrLine',
'FunctionName',
'FunctionOffset',
'SourceFilename',
'SourceLine',
'CountInclusive',
'CountExclusive',
] as BaseFlameGraphKeys[]
).forEach((item) => {
it(`returns correct ${item}`, async () => {
expectSnapshot(sortBy(flamegraph[item] as any[])).toMatch();
});
});
(['SamplingRate', 'Size', 'TotalSeconds'] as BaseFlameGraphKeys[]).forEach((item) => {
it(`returns correct ${item}`, async () => {
expectSnapshot(flamegraph[item]).toMatch();
});
});
});
});
}