[Data Forge] Add service.logs dataset as a data stream (#188786)

This PR adds the `service.logs` dataset to Data Forge . The EEM project
needs this dataset to test the default service logs entity definition.
This dataset is different because I wanted to create a fully compliant
data stream. This change also includes changes to the name of the
default ingest pipeline to `logs@custom` to work with the `logs-*-*`
component templates and ingest pipelines. If a document has
`data_stream.dataset` it will now be routed to
`logs-${doc.data_stream.dataset}-default`. If the document has
`data_stream.type`, `data_stream.dataset`, and `data_stream.namespace`
it will be index to `{type}-{dataset}-{namespace}` following the default
data stream conventions.

Because I've changed the name of the ingest pipeline, I also had to
update the index templates for the other datasets.

### Testing

Use the following YAML:

```yaml
---
elasticsearch:
  installKibanaUser: false

kibana:
  installAssets: false
  host: "http://localhost:5601/kibana"

indexing:
  dataset: "service.logs"
  eventsPerCycle: 100
  interval: 10000

schedule:
  - template: "good"
    start: "now-1h"
    end: false
    eventsPerCycle: 100
```

Click on "Logs" under "Observability", you should see something like:

<img width="2048" alt="image"
src="https://github.com/user-attachments/assets/64837c5c-9380-4897-9ccc-acae313683ee">

To check the other data source, change `dataset` to `fake_stack`,
`fake_logs`, `fake_hosts` and check `event.ingested` is set on the
documents; none of these show up in the "Logs Explorer", you'll have to
check them out seperately via "Dev Console".
This commit is contained in:
Chris Cowan 2024-07-23 11:52:16 -06:00 committed by GitHub
parent 6203f9b6a4
commit cc92c478c8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 317 additions and 44 deletions

View file

@ -8,6 +8,7 @@
export const FAKE_HOSTS = 'fake_hosts';
export const FAKE_LOGS = 'fake_logs';
export const FAKE_STACK = 'fake_stack';
export const SERVICE_LOGS = 'service.logs';
export const INDEX_PREFIX = 'kbn-data-forge';
@ -35,4 +36,5 @@ export const DEFAULTS = {
REDUCE_WEEKEND_TRAFFIC_BY: 0,
EPHEMERAL_PROJECT_IDS: 0,
ALIGN_EVENTS_TO_INTERVAL: true,
CARDINALITY: 1,
};

View file

@ -4,7 +4,7 @@
"template": {
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec" : "best_compression",
"mapping": {
"total_fields": {

View file

@ -41,7 +41,7 @@
"settings": {
"index": {
"codec": "best_compression",
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"mapping": {
"total_fields": {
"limit": 2000

View file

@ -25,7 +25,7 @@ const components = [
];
export const indexTemplate: IndexTemplateDef = {
namespace: 'fake_hosts',
name: 'metrics-fake_hosts@template',
template: { ...template, composed_of: components.map(({ name }) => name) },
components,
};

View file

@ -4,7 +4,7 @@
"template": {
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec" : "best_compression",
"mapping": {
"total_fields": {

View file

@ -40,7 +40,7 @@
},
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec": "best_compression",
"mapping": {
"total_fields": {

View file

@ -25,7 +25,7 @@ const components = [
];
export const indexTemplate: IndexTemplateDef = {
namespace: 'fake_logs',
name: 'logs-fake_logs@template',
template: { ...template, composed_of: components.map(({ name }) => name) },
components,
};

View file

@ -4,7 +4,7 @@
"template": {
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec" : "best_compression",
"mapping": {
"total_fields": {

View file

@ -43,7 +43,7 @@
},
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec": "best_compression",
"mapping": {
"total_fields": {

View file

@ -33,7 +33,7 @@ const components = [
];
export const indexTemplate: IndexTemplateDef = {
namespace: ADMIN_CONSOLE,
name: `logs-${ADMIN_CONSOLE}@template`,
template: {
...template,
composed_of: components.map(({ name }) => name),

View file

@ -4,7 +4,7 @@
"template": {
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec" : "best_compression",
"mapping": {
"total_fields": {

View file

@ -38,7 +38,7 @@
},
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec": "best_compression",
"mapping": {
"total_fields": {

View file

@ -23,7 +23,7 @@ const components = [
];
export const indexTemplate: IndexTemplateDef = {
namespace: HEARTBEAT,
name: `logs-${HEARTBEAT}@template`,
template: { ...template, composed_of: components.map(({ name }) => name) },
components,
};

View file

@ -4,7 +4,7 @@
"template": {
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec" : "best_compression",
"mapping": {
"total_fields": {

View file

@ -39,7 +39,7 @@
},
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec": "best_compression",
"mapping": {
"total_fields": {

View file

@ -23,7 +23,7 @@ const components = [
];
export const indexTemplate: IndexTemplateDef = {
namespace: MESSAGE_PROCESSOR,
name: `logs-${MESSAGE_PROCESSOR}@template`,
template: { ...template, composed_of: components.map(({ name }) => name) },
components,
};

View file

@ -4,7 +4,7 @@
"template": {
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec" : "best_compression",
"mapping": {
"total_fields": {

View file

@ -39,7 +39,7 @@
},
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec": "best_compression",
"mapping": {
"total_fields": {

View file

@ -25,7 +25,7 @@ const components = [
];
export const indexTemplate: IndexTemplateDef = {
namespace: MONGODB,
name: `logs-${MONGODB}@template`,
template: { ...template, composed_of: components.map(({ name }) => name) },
components,
};

View file

@ -4,7 +4,7 @@
"template": {
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec" : "best_compression",
"mapping": {
"total_fields": {

View file

@ -40,7 +40,7 @@
},
"settings": {
"index": {
"final_pipeline": "kbn-data-forge-add-event-ingested",
"final_pipeline": "logs@custom",
"codec": "best_compression",
"mapping": {
"total_fields": {

View file

@ -23,7 +23,7 @@ const components = [
];
export const indexTemplate: IndexTemplateDef = {
namespace: NGINX_PROXY,
name: `logs-${NGINX_PROXY}@template`,
template: { ...template, composed_of: components.map(({ name }) => name) },
components,
};

View file

@ -6,26 +6,30 @@
*/
import { GeneratorFunction, Dataset, IndexTemplateDef } from '../types';
import { FAKE_HOSTS, FAKE_LOGS, FAKE_STACK } from '../constants';
import { FAKE_HOSTS, FAKE_LOGS, FAKE_STACK, SERVICE_LOGS } from '../constants';
import * as fakeLogs from './fake_logs';
import * as fakeHosts from './fake_hosts';
import * as fakeStack from './fake_stack';
import * as serviceLogs from './service_logs';
export const indexTemplates: Record<Dataset, IndexTemplateDef[]> = {
[FAKE_HOSTS]: [fakeHosts.indexTemplate],
[FAKE_LOGS]: [fakeLogs.indexTemplate],
[FAKE_STACK]: fakeStack.indexTemplate,
[SERVICE_LOGS]: [], // uses logs-*-* index templates
};
export const generateEvents: Record<Dataset, GeneratorFunction> = {
[FAKE_HOSTS]: fakeHosts.generateEvent,
[FAKE_LOGS]: fakeLogs.generateEvent,
[FAKE_STACK]: fakeStack.generteEvent,
[SERVICE_LOGS]: serviceLogs.generateEvent,
};
export const kibanaAssets: Record<Dataset, string[]> = {
[FAKE_HOSTS]: [],
[FAKE_LOGS]: [],
[FAKE_STACK]: fakeStack.kibanaAssets,
[SERVICE_LOGS]: [],
};

View file

@ -0,0 +1,28 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { faker } from '@faker-js/faker';
import { omit, sample } from 'lodash';
import { SERVICE_LOGS } from '../../constants';
import { GeneratorFunction } from '../../types';
import { generateService } from './lib/generate_service';
export const generateEvent: GeneratorFunction = (_config, _schedule, index, timestamp) => {
const service = generateService(index + 1);
const { hostsWithCloud } = service;
const hostWithCloud = sample(hostsWithCloud);
return [
{
namespace: SERVICE_LOGS,
'@timestamp': timestamp.toISOString(),
message: faker.git.commitMessage(),
data_stream: { type: 'logs', dataset: SERVICE_LOGS, namespace: 'default' },
service: omit(service, 'hostsWithCloud'),
...hostWithCloud,
},
];
};

View file

@ -0,0 +1,132 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { faker } from '@faker-js/faker';
interface Cloud {
availability_zone: string;
image: {
id: string;
};
instance: {
id: string;
};
provider: string;
service: {
name: string;
};
machine: {
type: string;
};
region: string;
account: {
id: string;
};
}
export function generateRegion() {
return (
faker.helpers
.shuffle([
'us-east-2',
'us-east-1',
'us-west-1',
'us-west-2',
'af-south-1',
'ap-east-1',
'ap-south-2',
'ap-southeast-3',
'ap-southeast-4',
'ap-south-1',
'ap-northeast-3',
'ap-northeast-2',
'ap-southeast-1',
'ap-southeast-2',
'ap-northeast-1',
'ca-central-1',
'ca-west-1',
'eu-central-1',
'eu-west-1',
'eu-west-2',
'eu-south-1',
'eu-west-3',
'eu-south-2',
'eu-north-1',
'eu-central-2',
'il-central-1',
'me-south-1',
'me-central-1',
'sa-east-1',
])
.pop() || 'us-east-1'
);
}
function machineType() {
return (
faker.helpers
.shuffle([
't2.micro',
't2.small',
't2.medium',
't3.micro',
't3.small',
't3.medium',
'm4.large',
'm4.xlarge',
'm4.2xlarge',
'm5.large',
'm5.xlarge',
'm5.2xlarge',
'c4.large',
'c4.xlarge',
'c4.2xlarge',
'c5.large',
'c5.xlarge',
'c5.2xlarge',
'r4.large',
'r4.xlarge',
])
.pop() || 'c4.large'
);
}
function generateAvailabilityZone(region: string) {
return `${region}${faker.helpers.shuffle(['a', 'b', 'c', 'd']).pop()}`;
}
export function generateAccountId() {
return faker.string.numeric(12);
}
interface Options {
region?: string;
accountId?: string;
}
export function generateCloud(options?: Options): Cloud {
const region = options?.region ?? generateRegion();
return {
availability_zone: generateAvailabilityZone(region),
image: {
id: faker.string.hexadecimal({ length: 12, prefix: 'ami-' }),
},
instance: {
id: faker.string.hexadecimal({ length: 12, prefix: 'i-' }),
},
provider: 'aws',
service: {
name: 'EC2',
},
machine: {
type: machineType(),
},
region,
account: {
id: options?.accountId ?? generateAccountId(),
},
};
}

View file

@ -0,0 +1,31 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { faker } from '@faker-js/faker';
import { times } from 'lodash';
export function generateHost() {
const ipAddress = faker.internet.ipv4();
const name = `ip-${ipAddress.replaceAll('.', '_')}.internal`;
return {
hostname: name,
os: {
kernel: '5.10.210-201.855.amzn2.aarch64',
codename: 'focal',
name: 'Ubuntu',
type: 'linux',
family: 'debian',
version: '20.04.6 LTS (Focal Fossa)',
platform: 'ubuntu',
},
containerized: true,
ip: [ipAddress, faker.internet.ipv6()],
name,
mac: times(faker.helpers.rangeToNumber({ min: 5, max: 10 })).map(() => faker.internet.mac()),
architecture: 'aarch64',
};
}

View file

@ -0,0 +1,61 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { faker } from '@faker-js/faker';
import { sample, times } from 'lodash';
import { generateHost } from './generate_host';
import { generateAccountId, generateCloud, generateRegion } from './generate_cloud';
interface Service {
id: string;
name: string;
environment: string;
namespace: string;
instance: {
id: string;
};
node: {
name: string;
roles: string[];
};
type: string;
version: string;
hostsWithCloud: any[];
}
const services: Record<number, Service> = {};
export function generateService(id: number): Service {
if (services[id]) return services[id];
const regions = times(faker.helpers.rangeToNumber({ min: 1, max: 3 })).map(() =>
generateRegion()
);
const accountId = generateAccountId();
const hostsWithCloud = times(faker.helpers.rangeToNumber({ min: 20, max: 60 })).map(() => ({
host: generateHost(),
cloud: generateCloud({ region: sample(regions) || regions[0], accountId }),
}));
const service = {
id: faker.string.nanoid(),
name: `${faker.git.branch()}-${id}`,
environment:
faker.helpers.shuffle(['production', 'staging', 'qa', 'development']).pop() || 'production',
namespace: faker.hacker.noun(),
instance: {
id: faker.string.uuid(),
},
node: {
roles: [`service-${id}`],
name: `instance-${faker.string.nanoid()}`,
},
type: 'fake',
version: faker.system.semver(),
hostsWithCloud,
};
services[id] = service;
return service;
}

View file

@ -10,19 +10,19 @@ import { indexTemplates } from '../data_sources';
import { Config } from '../types';
export async function deleteIndexTemplate(config: Config, client: Client, logger: ToolingLog) {
const namespace = config.indexing.dataset;
const templates = indexTemplates[namespace];
const templateNames = templates.map((templateDef) => templateDef.namespace).join(',');
const dataset = config.indexing.dataset;
const templates = indexTemplates[dataset];
const templateNames = templates.map((templateDef) => templateDef.name).join(',');
logger.info(`Deleteing index templates (${templateNames})`);
try {
for (const indexTemplateDef of templates) {
logger.info(`Deleteing index template (${indexTemplateDef.namespace})`);
logger.info(`Deleteing index template (${indexTemplateDef.name})`);
await client.indices.deleteIndexTemplate({
name: indexTemplateDef.namespace,
name: indexTemplateDef.name,
});
const componentNames = indexTemplateDef.components.map(({ name }) => name);
logger.info(`Deleteing components for ${indexTemplateDef.namespace} (${componentNames})`);
logger.info(`Deleteing components for ${indexTemplateDef.name} (${componentNames})`);
for (const component of indexTemplateDef.components) {
await client.cluster.deleteComponentTemplate({ name: component.name });
}

View file

@ -32,7 +32,7 @@ export const getEsClient = (config: Config) => {
tls: caCert
? {
ca: caCert,
rejectUnauthorized: true,
rejectUnauthorized: false,
}
: undefined,
});

View file

@ -36,7 +36,7 @@ export async function installDefaultComponentTemplate(
) {
logger.info('Installing base component template: kbn-data-forge_base');
await client.cluster.putComponentTemplate({
name: `kbn-data-forge_base`,
name: `kbn-data-forge@mappings`,
...eventIngestedCommonComponentTemplate,
});
}

View file

@ -25,7 +25,7 @@ export async function installDefaultIngestPipeline(
) {
logger.info('Installing default ingest pipeline: kbn-data-forge-add-event-ingested');
return client.ingest.putPipeline({
id: 'kbn-data-forge-add-event-ingested',
id: 'logs@custom',
processors,
version: 1,
});

View file

@ -16,24 +16,24 @@ export async function installIndexTemplate(
client: Client,
logger: ToolingLog
): Promise<void> {
const namespace = config.indexing.dataset;
const templates = indexTemplates[namespace];
const templateNames = templates.map((templateDef) => templateDef.namespace).join(',');
const { dataset } = config.indexing;
const templates = indexTemplates[dataset];
const templateNames = templates.map((templateDef) => templateDef.name).join(',');
logger.info(`Installing index templates (${templateNames})`);
for (const indexTemplateDef of templates) {
const componentNames = indexTemplateDef.components.map(({ name }) => name);
logger.info(`Installing components for ${indexTemplateDef.namespace} (${componentNames})`);
logger.info(`Installing components for ${indexTemplateDef.name} (${componentNames})`);
for (const component of indexTemplateDef.components) {
await client.cluster.putComponentTemplate({ name: component.name, ...component.template });
}
logger.info(`Installing index template (${indexTemplateDef.namespace})`);
logger.info(`Installing index template (${indexTemplateDef.name})`);
// Clone the template and add the base component name
const template = { ...indexTemplateDef.template };
if (isArray(template.composed_of)) {
template.composed_of.push('kbn-data-forge_base');
template.composed_of.push('kbn-data-forge@mappings');
}
await client.indices.putIndexTemplate({
name: indexTemplateDef.namespace,
name: indexTemplateDef.name,
body: template,
});
}

View file

@ -17,6 +17,18 @@ import { INDEX_PREFIX } from '../constants';
type CargoQueue = ReturnType<typeof cargoQueue<Doc, Error>>;
let queue: CargoQueue;
function calculateIndexName(config: Config, doc: Doc) {
if (doc.data_stream?.dataset) {
const { dataset } = doc.data_stream;
const type = doc.data_stream.type ?? 'logs';
const namespace = doc.data_stream.namespace ?? 'default';
return `${type}-${dataset}-${namespace}`;
} else {
const namespace = `${config.indexing.dataset}.${doc.namespace}`;
return `${INDEX_PREFIX}-${namespace}-${moment(doc['@timestamp']).format('YYYY-MM-01')}`;
}
}
export const createQueue = (config: Config, client: Client, logger: ToolingLog): CargoQueue => {
if (queue != null) return queue;
queue = cargoQueue<Doc, Error>(
@ -24,10 +36,7 @@ export const createQueue = (config: Config, client: Client, logger: ToolingLog):
const body: object[] = [];
const startTs = Date.now();
docs.forEach((doc) => {
const namespace = `${config.indexing.dataset}.${doc.namespace}`;
const indexName = `${INDEX_PREFIX}-${namespace}-${moment(doc['@timestamp']).format(
'YYYY-MM-01'
)}`;
const indexName = calculateIndexName(config, doc);
indices.add(indexName);
body.push({ create: { _index: indexName } });
body.push(omit(doc, 'namespace'));

View file

@ -8,19 +8,25 @@
import type { Moment } from 'moment';
import { Client } from '@elastic/elasticsearch';
import * as rt from 'io-ts';
import { FAKE_HOSTS, FAKE_LOGS, FAKE_STACK } from '../constants';
import { FAKE_HOSTS, FAKE_LOGS, FAKE_STACK, SERVICE_LOGS } from '../constants';
export interface Doc {
namespace: string;
'@timestamp': Moment | string;
labels?: object;
tags?: string[];
data_stream?: {
dataset?: string;
namespace?: string;
type?: 'logs' | 'metrics';
};
}
export const DatasetRT = rt.keyof({
[FAKE_HOSTS]: null,
[FAKE_LOGS]: null,
[FAKE_STACK]: null,
[SERVICE_LOGS]: null,
});
export type Dataset = rt.TypeOf<typeof DatasetRT>;
@ -147,7 +153,7 @@ export type EventTemplate = Array<[EventFunction, number]>;
export type ElasticSearchService = (client: Client) => Promise<any>;
export const IndexTemplateDefRT = rt.type({
namespace: rt.string,
name: rt.string,
template: rt.UnknownRecord,
components: rt.array(rt.type({ name: rt.string, template: rt.UnknownRecord })),
});