kibana/x-pack/platform/packages/shared/kbn-sample-parser/cli/index.ts
Dario Gieselaar ba13e86a70
[Streams] Replay loghub data with synthtrace (#212120)
Download, parse and replay loghub data with Synthtrace, for use in the
Streams project. In summary:

- adds a `@kbn/sample-log-parser` package which parses Loghub sample
data, creates valid parsers for extracting and replacing timestamps,
using the LLM
- add a `sample_logs` scenario which uses the parsed data sets to replay
Loghub data continuously as if it were live data
- refactor some parts of Synthtrace (follow-up work captured in
https://github.com/elastic/kibana/issues/212179)

## Synthtrace changes

- Replace custom Logger object with Kibana-standard ToolingLog
- Report progress and estimated time to completion for long-running jobs
- Simplify scenarioOpts (allow comma-separated key-value pairs instead
of just JSON)
- Simplify client initialization
- When using workers, only bootstrap once (in the main thread)
- Allow workers to gracefully shutdown
- Downgrade some logging levels for less noise

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
2025-03-11 13:30:06 +01:00

103 lines
2.8 KiB
TypeScript

/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { ToolingLog } from '@kbn/tooling-log';
import { Command } from 'commander';
import pLimit from 'p-limit';
import { partition } from 'lodash';
import { ensureLoghubRepo } from '../src/ensure_loghub_repo';
import { readLoghubSystemFiles } from '../src/read_loghub_system_files';
import { ensureValidParser } from '../src/ensure_valid_parser';
import { createOpenAIClient } from '../src/create_openai_client';
import { ensureValidQueries } from '../src/ensure_valid_queries';
async function run({ log }: { log: ToolingLog }) {
await ensureLoghubRepo({ log });
const systems = await readLoghubSystemFiles({ log });
const limiter = pLimit(5);
const openAIClient = createOpenAIClient();
const results = await Promise.all(
systems.map(async (system) => {
return limiter(async () =>
Promise.all([
ensureValidParser({
openAIClient,
log,
system,
}),
ensureValidQueries({
openAIClient,
system,
log,
}),
])
.then(() => {
return {
name: system.name,
error: null,
};
})
.catch((error) => {
return {
name: system.name,
error,
};
})
);
})
);
const [valid, invalid] = partition(results, (result) => !result.error);
if (invalid.length === 0) {
log.info(`Ensured ${valid.length} parsers`);
return;
}
invalid.forEach((result) => {
log.error(`Failed generating a valid parser for ${result.name}`);
log.error(result.error);
});
throw new Error(`${invalid.length} out of ${results.length} parsers are invalid`);
}
export function cli() {
const program = new Command('bin/kibana-setup');
program
.name('loghub-parser')
.description(
'Generates code to extract and replace timestamps in loglines from Loghub datasets'
)
.option('-d, --debug', 'Debug logging', false)
.option('-v, --verbose', 'Verbose logging', false)
.option('-s, --silent', 'Prevent all logging', false)
.action(async () => {
const options = program.opts() as {
silent: boolean;
verbose: boolean;
debug: boolean;
};
const log = new ToolingLog({
level: options.silent
? 'silent'
: options.debug
? 'debug'
: options.verbose
? 'verbose'
: 'info',
writeTo: process.stdout,
});
return run({ log });
})
.parse(process.argv);
}