mirror of
https://github.com/elastic/kibana.git
synced 2025-04-23 09:19:04 -04:00
Apm per service telemetry (#137216)
* Add per_service telemetry (cloud_provider + cloud_region) * Clean up types and add availability_zone * Add top_hits for agent version/name * Use top_metrics instead of top_hits * Switch to metrics index + clean up constants * Add service.framework/language/runtime, plus kubernetes * Add container ID * Add faas trigger-type * Add timed_out * Update the schema Also switched to an array of dictionaries since I couldn't figure out how to make a dictionary with arbitrary keys in the schema. * Use the correct (?) syntax for the per_service schema * Fix bad unions and generate schema * Use flatmap+map to create the data immutably * Force string to match types * Generate schema * Add note about updating the jest snapshot
This commit is contained in:
parent
998b11a9a8
commit
3dff85bcdd
6 changed files with 504 additions and 2 deletions
|
@ -1078,6 +1078,102 @@ exports[`APM telemetry helpers getApmTelemetry generates a JSON object with the
|
|||
}
|
||||
}
|
||||
},
|
||||
"per_service": {
|
||||
"properties": {
|
||||
"service_id": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"timed_out": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"cloud": {
|
||||
"properties": {
|
||||
"availability_zones": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"regions": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"providers": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"faas": {
|
||||
"properties": {
|
||||
"trigger": {
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"agent": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"service": {
|
||||
"properties": {
|
||||
"language": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"framework": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"runtime": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"kubernetes": {
|
||||
"properties": {
|
||||
"pod": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"container": {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tasks": {
|
||||
"properties": {
|
||||
"aggregated_transactions": {
|
||||
|
@ -1233,6 +1329,17 @@ exports[`APM telemetry helpers getApmTelemetry generates a JSON object with the
|
|||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"per_service": {
|
||||
"properties": {
|
||||
"took": {
|
||||
"properties": {
|
||||
"ms": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,6 +61,9 @@ The collection tasks also use the [`APMDataTelemetry` type](../server/lib/apm_te
|
|||
|
||||
Running `node scripts/telemetry_check --fix` from the root Kibana directory will update the schemas which should automatically notify the Infra team when a pull request is opened so they can update the mapping in the telemetry clusters.
|
||||
|
||||
Running `node scripts/test/jest --updateSnapshot` from the `x-pack/plugins/apm` directory will update the
|
||||
mappings snapshot used in the jest tests.
|
||||
|
||||
## Behavioral Telemetry
|
||||
|
||||
Behavioral telemetry is recorded with the ui_metrics and application_usage methods from the Usage Collection plugin.
|
||||
|
|
|
@ -25,6 +25,7 @@ import {
|
|||
CLOUD_REGION,
|
||||
CONTAINER_ID,
|
||||
ERROR_GROUP_ID,
|
||||
FAAS_TRIGGER_TYPE,
|
||||
HOST_NAME,
|
||||
HOST_OS_PLATFORM,
|
||||
OBSERVER_HOSTNAME,
|
||||
|
@ -49,7 +50,7 @@ import { APMError } from '../../../../typings/es_schemas/ui/apm_error';
|
|||
import { AgentName } from '../../../../typings/es_schemas/ui/fields/agent';
|
||||
import { Span } from '../../../../typings/es_schemas/ui/span';
|
||||
import { Transaction } from '../../../../typings/es_schemas/ui/transaction';
|
||||
import { APMTelemetry } from '../types';
|
||||
import { APMTelemetry, APMPerService } from '../types';
|
||||
const TIME_RANGES = ['1d', 'all'] as const;
|
||||
type TimeRange = typeof TIME_RANGES[number];
|
||||
|
||||
|
@ -1150,4 +1151,184 @@ export const tasks: TelemetryTask[] = [
|
|||
};
|
||||
},
|
||||
},
|
||||
{
|
||||
name: 'per_service',
|
||||
executor: async ({ indices, search }) => {
|
||||
const response = await search({
|
||||
index: [indices.metric],
|
||||
body: {
|
||||
size: 0,
|
||||
timeout,
|
||||
query: {
|
||||
bool: {
|
||||
filter: [{ range: { '@timestamp': { gte: 'now-1h' } } }],
|
||||
},
|
||||
},
|
||||
aggs: {
|
||||
environments: {
|
||||
terms: {
|
||||
field: SERVICE_ENVIRONMENT,
|
||||
size: 1000,
|
||||
},
|
||||
aggs: {
|
||||
service_names: {
|
||||
terms: {
|
||||
field: SERVICE_NAME,
|
||||
size: 1000,
|
||||
},
|
||||
aggs: {
|
||||
top_metrics: {
|
||||
top_metrics: {
|
||||
sort: '_score',
|
||||
metrics: [
|
||||
{
|
||||
field: AGENT_NAME,
|
||||
},
|
||||
{
|
||||
field: AGENT_VERSION,
|
||||
},
|
||||
{
|
||||
field: SERVICE_LANGUAGE_NAME,
|
||||
},
|
||||
{
|
||||
field: SERVICE_LANGUAGE_VERSION,
|
||||
},
|
||||
{
|
||||
field: SERVICE_FRAMEWORK_NAME,
|
||||
},
|
||||
{
|
||||
field: SERVICE_FRAMEWORK_VERSION,
|
||||
},
|
||||
{
|
||||
field: SERVICE_RUNTIME_NAME,
|
||||
},
|
||||
{
|
||||
field: SERVICE_RUNTIME_VERSION,
|
||||
},
|
||||
{
|
||||
field: POD_NAME,
|
||||
},
|
||||
{
|
||||
field: CONTAINER_ID,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
[CLOUD_REGION]: {
|
||||
terms: {
|
||||
field: CLOUD_REGION,
|
||||
size: 5,
|
||||
},
|
||||
},
|
||||
[CLOUD_PROVIDER]: {
|
||||
terms: {
|
||||
field: CLOUD_PROVIDER,
|
||||
size: 3,
|
||||
},
|
||||
},
|
||||
[CLOUD_AVAILABILITY_ZONE]: {
|
||||
terms: {
|
||||
field: CLOUD_AVAILABILITY_ZONE,
|
||||
size: 5,
|
||||
},
|
||||
},
|
||||
[FAAS_TRIGGER_TYPE]: {
|
||||
terms: {
|
||||
field: FAAS_TRIGGER_TYPE,
|
||||
size: 5,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const envBuckets = response.aggregations?.environments.buckets ?? [];
|
||||
const data: APMPerService[] = envBuckets.flatMap((envBucket) => {
|
||||
const env = envBucket.key;
|
||||
const serviceBuckets = envBucket.service_names?.buckets ?? [];
|
||||
return serviceBuckets.map((serviceBucket) => {
|
||||
const name = serviceBucket.key;
|
||||
const fullServiceName = `${env}~${name}`;
|
||||
return {
|
||||
service_id: fullServiceName,
|
||||
timed_out: response.timed_out,
|
||||
cloud: {
|
||||
availability_zones:
|
||||
serviceBucket[CLOUD_AVAILABILITY_ZONE]?.buckets.map(
|
||||
(inner) => inner.key as string
|
||||
) ?? [],
|
||||
regions:
|
||||
serviceBucket[CLOUD_REGION]?.buckets.map(
|
||||
(inner) => inner.key as string
|
||||
) ?? [],
|
||||
providers:
|
||||
serviceBucket[CLOUD_PROVIDER]?.buckets.map(
|
||||
(inner) => inner.key as string
|
||||
) ?? [],
|
||||
},
|
||||
faas: {
|
||||
trigger: {
|
||||
type:
|
||||
serviceBucket[FAAS_TRIGGER_TYPE]?.buckets.map(
|
||||
(inner) => inner.key as string
|
||||
) ?? [],
|
||||
},
|
||||
},
|
||||
agent: {
|
||||
name: serviceBucket.top_metrics?.top[0].metrics[
|
||||
AGENT_NAME
|
||||
] as string,
|
||||
version: serviceBucket.top_metrics?.top[0].metrics[
|
||||
AGENT_VERSION
|
||||
] as string,
|
||||
},
|
||||
service: {
|
||||
language: {
|
||||
name: serviceBucket.top_metrics?.top[0].metrics[
|
||||
SERVICE_LANGUAGE_NAME
|
||||
] as string,
|
||||
version: serviceBucket.top_metrics?.top[0].metrics[
|
||||
SERVICE_LANGUAGE_VERSION
|
||||
] as string,
|
||||
},
|
||||
framework: {
|
||||
name: serviceBucket.top_metrics?.top[0].metrics[
|
||||
SERVICE_FRAMEWORK_NAME
|
||||
] as string,
|
||||
version: serviceBucket.top_metrics?.top[0].metrics[
|
||||
SERVICE_FRAMEWORK_VERSION
|
||||
] as string,
|
||||
},
|
||||
runtime: {
|
||||
name: serviceBucket.top_metrics?.top[0].metrics[
|
||||
SERVICE_RUNTIME_NAME
|
||||
] as string,
|
||||
version: serviceBucket.top_metrics?.top[0].metrics[
|
||||
SERVICE_RUNTIME_VERSION
|
||||
] as string,
|
||||
},
|
||||
},
|
||||
kubernetes: {
|
||||
pod: {
|
||||
name: serviceBucket.top_metrics?.top[0].metrics[
|
||||
POD_NAME
|
||||
] as string,
|
||||
},
|
||||
},
|
||||
container: {
|
||||
id: serviceBucket.top_metrics?.top[0].metrics[
|
||||
CONTAINER_ID
|
||||
] as string,
|
||||
},
|
||||
};
|
||||
});
|
||||
});
|
||||
return {
|
||||
per_service: data,
|
||||
};
|
||||
},
|
||||
},
|
||||
];
|
||||
|
|
|
@ -12,11 +12,14 @@ import {
|
|||
TimeframeMap,
|
||||
TimeframeMap1d,
|
||||
TimeframeMapAll,
|
||||
APMPerService,
|
||||
} from './types';
|
||||
import { ElasticAgentName } from '../../../typings/es_schemas/ui/fields/agent';
|
||||
|
||||
const long: { type: 'long' } = { type: 'long' };
|
||||
|
||||
const keyword: { type: 'keyword' } = { type: 'keyword' };
|
||||
|
||||
const aggregatedTransactionCountSchema: MakeSchemaFrom<AggregatedTransactionsCounts> =
|
||||
{
|
||||
expected_metric_document_count: long,
|
||||
|
@ -113,6 +116,47 @@ const apmPerAgentSchema: Pick<
|
|||
},
|
||||
};
|
||||
|
||||
export const apmPerServiceSchema: MakeSchemaFrom<APMPerService> = {
|
||||
service_id: keyword,
|
||||
timed_out: { type: 'boolean' },
|
||||
cloud: {
|
||||
availability_zones: { type: 'array', items: { type: 'keyword' } },
|
||||
regions: { type: 'array', items: { type: 'keyword' } },
|
||||
providers: { type: 'array', items: { type: 'keyword' } },
|
||||
},
|
||||
faas: {
|
||||
trigger: {
|
||||
type: { type: 'array', items: { type: 'keyword' } },
|
||||
},
|
||||
},
|
||||
agent: {
|
||||
name: keyword,
|
||||
version: keyword,
|
||||
},
|
||||
service: {
|
||||
language: {
|
||||
name: keyword,
|
||||
version: keyword,
|
||||
},
|
||||
framework: {
|
||||
name: keyword,
|
||||
version: keyword,
|
||||
},
|
||||
runtime: {
|
||||
name: keyword,
|
||||
version: keyword,
|
||||
},
|
||||
},
|
||||
kubernetes: {
|
||||
pod: {
|
||||
name: keyword,
|
||||
},
|
||||
},
|
||||
container: {
|
||||
id: keyword,
|
||||
},
|
||||
};
|
||||
|
||||
export const apmSchema: MakeSchemaFrom<APMUsage> = {
|
||||
...apmPerAgentSchema,
|
||||
has_any_services: { type: 'boolean' },
|
||||
|
@ -192,6 +236,7 @@ export const apmSchema: MakeSchemaFrom<APMUsage> = {
|
|||
service_groups: {
|
||||
kuery_fields: { type: 'array', items: { type: 'keyword' } },
|
||||
},
|
||||
per_service: { type: 'array', items: { ...apmPerServiceSchema } },
|
||||
tasks: {
|
||||
aggregated_transactions: { took: { ms: long } },
|
||||
cloud: { took: { ms: long } },
|
||||
|
@ -207,5 +252,6 @@ export const apmSchema: MakeSchemaFrom<APMUsage> = {
|
|||
cardinality: { took: { ms: long } },
|
||||
environments: { took: { ms: long } },
|
||||
service_groups: { took: { ms: long } },
|
||||
per_service: { took: { ms: long } },
|
||||
},
|
||||
};
|
||||
|
|
|
@ -24,6 +24,47 @@ export interface AggregatedTransactionsCounts {
|
|||
transaction_count: number;
|
||||
}
|
||||
|
||||
export interface APMPerService {
|
||||
service_id: string;
|
||||
timed_out: boolean;
|
||||
cloud: {
|
||||
availability_zones: string[];
|
||||
regions: string[];
|
||||
providers: string[];
|
||||
};
|
||||
faas: {
|
||||
trigger: {
|
||||
type: string[];
|
||||
};
|
||||
};
|
||||
agent: {
|
||||
name: string;
|
||||
version: string;
|
||||
};
|
||||
service: {
|
||||
language: {
|
||||
name: string;
|
||||
version: string;
|
||||
};
|
||||
framework: {
|
||||
name: string;
|
||||
version: string;
|
||||
};
|
||||
runtime: {
|
||||
name: string;
|
||||
version: string;
|
||||
};
|
||||
};
|
||||
kubernetes: {
|
||||
pod: {
|
||||
name: string;
|
||||
};
|
||||
};
|
||||
container: {
|
||||
id: string;
|
||||
};
|
||||
}
|
||||
|
||||
export interface APMUsage {
|
||||
has_any_services: boolean;
|
||||
services_per_agent: Record<AgentName, number>;
|
||||
|
@ -133,6 +174,7 @@ export interface APMUsage {
|
|||
service_groups: {
|
||||
kuery_fields: string[];
|
||||
};
|
||||
per_service: APMPerService[];
|
||||
tasks: Record<
|
||||
| 'aggregated_transactions'
|
||||
| 'cloud'
|
||||
|
@ -147,7 +189,8 @@ export interface APMUsage {
|
|||
| 'indices_stats'
|
||||
| 'cardinality'
|
||||
| 'environments'
|
||||
| 'service_groups',
|
||||
| 'service_groups'
|
||||
| 'per_service',
|
||||
{ took: { ms: number } }
|
||||
>;
|
||||
}
|
||||
|
|
|
@ -4015,6 +4015,117 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"per_service": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"properties": {
|
||||
"service_id": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"timed_out": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"cloud": {
|
||||
"properties": {
|
||||
"availability_zones": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "keyword"
|
||||
}
|
||||
},
|
||||
"regions": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "keyword"
|
||||
}
|
||||
},
|
||||
"providers": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"faas": {
|
||||
"properties": {
|
||||
"trigger": {
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"agent": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"service": {
|
||||
"properties": {
|
||||
"language": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"framework": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
},
|
||||
"runtime": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"version": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"kubernetes": {
|
||||
"properties": {
|
||||
"pod": {
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"container": {
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"tasks": {
|
||||
"properties": {
|
||||
"aggregated_transactions": {
|
||||
|
@ -4170,6 +4281,17 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"per_service": {
|
||||
"properties": {
|
||||
"took": {
|
||||
"properties": {
|
||||
"ms": {
|
||||
"type": "long"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue