Apm per service telemetry (#137216)

* Add per_service telemetry (cloud_provider + cloud_region)

* Clean up types and add availability_zone

* Add top_hits for agent version/name

* Use top_metrics instead of top_hits

* Switch to metrics index + clean up constants

* Add service.framework/language/runtime, plus kubernetes

* Add container ID

* Add faas trigger-type

* Add timed_out

* Update the schema

Also switched to an array of dictionaries since I couldn't figure
out how to make a dictionary with arbitrary keys in the schema.

* Use the correct (?) syntax for the per_service schema

* Fix bad unions and generate schema

* Use flatmap+map to create the data immutably

* Force string to match types

* Generate schema

* Add note about updating the jest snapshot
This commit is contained in:
Colton Myers 2022-07-27 10:50:35 -06:00 committed by GitHub
parent 998b11a9a8
commit 3dff85bcdd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 504 additions and 2 deletions

View file

@ -1078,6 +1078,102 @@ exports[`APM telemetry helpers getApmTelemetry generates a JSON object with the
}
}
},
"per_service": {
"properties": {
"service_id": {
"type": "keyword"
},
"timed_out": {
"type": "boolean"
},
"cloud": {
"properties": {
"availability_zones": {
"type": "keyword"
},
"regions": {
"type": "keyword"
},
"providers": {
"type": "keyword"
}
}
},
"faas": {
"properties": {
"trigger": {
"properties": {
"type": {
"type": "keyword"
}
}
}
}
},
"agent": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
},
"service": {
"properties": {
"language": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
},
"framework": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
},
"runtime": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
}
}
},
"kubernetes": {
"properties": {
"pod": {
"properties": {
"name": {
"type": "keyword"
}
}
}
}
},
"container": {
"properties": {
"id": {
"type": "keyword"
}
}
}
}
},
"tasks": {
"properties": {
"aggregated_transactions": {
@ -1233,6 +1329,17 @@ exports[`APM telemetry helpers getApmTelemetry generates a JSON object with the
}
}
}
},
"per_service": {
"properties": {
"took": {
"properties": {
"ms": {
"type": "long"
}
}
}
}
}
}
}

View file

@ -61,6 +61,9 @@ The collection tasks also use the [`APMDataTelemetry` type](../server/lib/apm_te
Running `node scripts/telemetry_check --fix` from the root Kibana directory will update the schemas which should automatically notify the Infra team when a pull request is opened so they can update the mapping in the telemetry clusters.
Running `node scripts/test/jest --updateSnapshot` from the `x-pack/plugins/apm` directory will update the
mappings snapshot used in the jest tests.
## Behavioral Telemetry
Behavioral telemetry is recorded with the ui_metrics and application_usage methods from the Usage Collection plugin.

View file

@ -25,6 +25,7 @@ import {
CLOUD_REGION,
CONTAINER_ID,
ERROR_GROUP_ID,
FAAS_TRIGGER_TYPE,
HOST_NAME,
HOST_OS_PLATFORM,
OBSERVER_HOSTNAME,
@ -49,7 +50,7 @@ import { APMError } from '../../../../typings/es_schemas/ui/apm_error';
import { AgentName } from '../../../../typings/es_schemas/ui/fields/agent';
import { Span } from '../../../../typings/es_schemas/ui/span';
import { Transaction } from '../../../../typings/es_schemas/ui/transaction';
import { APMTelemetry } from '../types';
import { APMTelemetry, APMPerService } from '../types';
const TIME_RANGES = ['1d', 'all'] as const;
type TimeRange = typeof TIME_RANGES[number];
@ -1150,4 +1151,184 @@ export const tasks: TelemetryTask[] = [
};
},
},
{
name: 'per_service',
executor: async ({ indices, search }) => {
const response = await search({
index: [indices.metric],
body: {
size: 0,
timeout,
query: {
bool: {
filter: [{ range: { '@timestamp': { gte: 'now-1h' } } }],
},
},
aggs: {
environments: {
terms: {
field: SERVICE_ENVIRONMENT,
size: 1000,
},
aggs: {
service_names: {
terms: {
field: SERVICE_NAME,
size: 1000,
},
aggs: {
top_metrics: {
top_metrics: {
sort: '_score',
metrics: [
{
field: AGENT_NAME,
},
{
field: AGENT_VERSION,
},
{
field: SERVICE_LANGUAGE_NAME,
},
{
field: SERVICE_LANGUAGE_VERSION,
},
{
field: SERVICE_FRAMEWORK_NAME,
},
{
field: SERVICE_FRAMEWORK_VERSION,
},
{
field: SERVICE_RUNTIME_NAME,
},
{
field: SERVICE_RUNTIME_VERSION,
},
{
field: POD_NAME,
},
{
field: CONTAINER_ID,
},
],
},
},
[CLOUD_REGION]: {
terms: {
field: CLOUD_REGION,
size: 5,
},
},
[CLOUD_PROVIDER]: {
terms: {
field: CLOUD_PROVIDER,
size: 3,
},
},
[CLOUD_AVAILABILITY_ZONE]: {
terms: {
field: CLOUD_AVAILABILITY_ZONE,
size: 5,
},
},
[FAAS_TRIGGER_TYPE]: {
terms: {
field: FAAS_TRIGGER_TYPE,
size: 5,
},
},
},
},
},
},
},
},
});
const envBuckets = response.aggregations?.environments.buckets ?? [];
const data: APMPerService[] = envBuckets.flatMap((envBucket) => {
const env = envBucket.key;
const serviceBuckets = envBucket.service_names?.buckets ?? [];
return serviceBuckets.map((serviceBucket) => {
const name = serviceBucket.key;
const fullServiceName = `${env}~${name}`;
return {
service_id: fullServiceName,
timed_out: response.timed_out,
cloud: {
availability_zones:
serviceBucket[CLOUD_AVAILABILITY_ZONE]?.buckets.map(
(inner) => inner.key as string
) ?? [],
regions:
serviceBucket[CLOUD_REGION]?.buckets.map(
(inner) => inner.key as string
) ?? [],
providers:
serviceBucket[CLOUD_PROVIDER]?.buckets.map(
(inner) => inner.key as string
) ?? [],
},
faas: {
trigger: {
type:
serviceBucket[FAAS_TRIGGER_TYPE]?.buckets.map(
(inner) => inner.key as string
) ?? [],
},
},
agent: {
name: serviceBucket.top_metrics?.top[0].metrics[
AGENT_NAME
] as string,
version: serviceBucket.top_metrics?.top[0].metrics[
AGENT_VERSION
] as string,
},
service: {
language: {
name: serviceBucket.top_metrics?.top[0].metrics[
SERVICE_LANGUAGE_NAME
] as string,
version: serviceBucket.top_metrics?.top[0].metrics[
SERVICE_LANGUAGE_VERSION
] as string,
},
framework: {
name: serviceBucket.top_metrics?.top[0].metrics[
SERVICE_FRAMEWORK_NAME
] as string,
version: serviceBucket.top_metrics?.top[0].metrics[
SERVICE_FRAMEWORK_VERSION
] as string,
},
runtime: {
name: serviceBucket.top_metrics?.top[0].metrics[
SERVICE_RUNTIME_NAME
] as string,
version: serviceBucket.top_metrics?.top[0].metrics[
SERVICE_RUNTIME_VERSION
] as string,
},
},
kubernetes: {
pod: {
name: serviceBucket.top_metrics?.top[0].metrics[
POD_NAME
] as string,
},
},
container: {
id: serviceBucket.top_metrics?.top[0].metrics[
CONTAINER_ID
] as string,
},
};
});
});
return {
per_service: data,
};
},
},
];

View file

@ -12,11 +12,14 @@ import {
TimeframeMap,
TimeframeMap1d,
TimeframeMapAll,
APMPerService,
} from './types';
import { ElasticAgentName } from '../../../typings/es_schemas/ui/fields/agent';
const long: { type: 'long' } = { type: 'long' };
const keyword: { type: 'keyword' } = { type: 'keyword' };
const aggregatedTransactionCountSchema: MakeSchemaFrom<AggregatedTransactionsCounts> =
{
expected_metric_document_count: long,
@ -113,6 +116,47 @@ const apmPerAgentSchema: Pick<
},
};
export const apmPerServiceSchema: MakeSchemaFrom<APMPerService> = {
service_id: keyword,
timed_out: { type: 'boolean' },
cloud: {
availability_zones: { type: 'array', items: { type: 'keyword' } },
regions: { type: 'array', items: { type: 'keyword' } },
providers: { type: 'array', items: { type: 'keyword' } },
},
faas: {
trigger: {
type: { type: 'array', items: { type: 'keyword' } },
},
},
agent: {
name: keyword,
version: keyword,
},
service: {
language: {
name: keyword,
version: keyword,
},
framework: {
name: keyword,
version: keyword,
},
runtime: {
name: keyword,
version: keyword,
},
},
kubernetes: {
pod: {
name: keyword,
},
},
container: {
id: keyword,
},
};
export const apmSchema: MakeSchemaFrom<APMUsage> = {
...apmPerAgentSchema,
has_any_services: { type: 'boolean' },
@ -192,6 +236,7 @@ export const apmSchema: MakeSchemaFrom<APMUsage> = {
service_groups: {
kuery_fields: { type: 'array', items: { type: 'keyword' } },
},
per_service: { type: 'array', items: { ...apmPerServiceSchema } },
tasks: {
aggregated_transactions: { took: { ms: long } },
cloud: { took: { ms: long } },
@ -207,5 +252,6 @@ export const apmSchema: MakeSchemaFrom<APMUsage> = {
cardinality: { took: { ms: long } },
environments: { took: { ms: long } },
service_groups: { took: { ms: long } },
per_service: { took: { ms: long } },
},
};

View file

@ -24,6 +24,47 @@ export interface AggregatedTransactionsCounts {
transaction_count: number;
}
export interface APMPerService {
service_id: string;
timed_out: boolean;
cloud: {
availability_zones: string[];
regions: string[];
providers: string[];
};
faas: {
trigger: {
type: string[];
};
};
agent: {
name: string;
version: string;
};
service: {
language: {
name: string;
version: string;
};
framework: {
name: string;
version: string;
};
runtime: {
name: string;
version: string;
};
};
kubernetes: {
pod: {
name: string;
};
};
container: {
id: string;
};
}
export interface APMUsage {
has_any_services: boolean;
services_per_agent: Record<AgentName, number>;
@ -133,6 +174,7 @@ export interface APMUsage {
service_groups: {
kuery_fields: string[];
};
per_service: APMPerService[];
tasks: Record<
| 'aggregated_transactions'
| 'cloud'
@ -147,7 +189,8 @@ export interface APMUsage {
| 'indices_stats'
| 'cardinality'
| 'environments'
| 'service_groups',
| 'service_groups'
| 'per_service',
{ took: { ms: number } }
>;
}

View file

@ -4015,6 +4015,117 @@
}
}
},
"per_service": {
"type": "array",
"items": {
"properties": {
"service_id": {
"type": "keyword"
},
"timed_out": {
"type": "boolean"
},
"cloud": {
"properties": {
"availability_zones": {
"type": "array",
"items": {
"type": "keyword"
}
},
"regions": {
"type": "array",
"items": {
"type": "keyword"
}
},
"providers": {
"type": "array",
"items": {
"type": "keyword"
}
}
}
},
"faas": {
"properties": {
"trigger": {
"properties": {
"type": {
"type": "array",
"items": {
"type": "keyword"
}
}
}
}
}
},
"agent": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
},
"service": {
"properties": {
"language": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
},
"framework": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
},
"runtime": {
"properties": {
"name": {
"type": "keyword"
},
"version": {
"type": "keyword"
}
}
}
}
},
"kubernetes": {
"properties": {
"pod": {
"properties": {
"name": {
"type": "keyword"
}
}
}
}
},
"container": {
"properties": {
"id": {
"type": "keyword"
}
}
}
}
}
},
"tasks": {
"properties": {
"aggregated_transactions": {
@ -4170,6 +4281,17 @@
}
}
}
},
"per_service": {
"properties": {
"took": {
"properties": {
"ms": {
"type": "long"
}
}
}
}
}
}
}