mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 17:59:23 -04:00
[ML] Module filtering (#168721)
Adds a new `tags` property to our ML Modules which contains an array of strings to used to filter the modules returned when calling `/internal/ml/modules/get_module` or `/internal/ml/modules/recognize` Adds a new kibana config setting `xpack.ml.compatibleModuleType` which will enforce a module filter. This setting supports the values `security`, `observability` or `search` and will be used by the serverless projects to ensure only modules relevant to the current project are discoverable. When matching against the `xpack.ml.compatibleModuleType` setting, modules with no tags will be returned along with modules which match the `compatibleModuleType` value. The endpoints `/internal/ml/modules/get_module` and `/internal/ml/modules/recognize` can also take a `filter` query parameter in the form of a comma separated list of values. If any of these filter values match a module's tags, the module will be returned. Modules with no tags will not be matched when using the `filter` parameter. This PR also updates the security plugin to apply a `security` filter when retrieving modules and running recognize. This will improve performance, especially for recognize which runs the queries from every module on the supplied index pattern. Examples **Running the Security serverless project.** Request: `/internal/ml/modules/get_module` Response: All modules containing `tags: ["security"]` Plus all modules with no `tags` or `tags: []` Request: `/internal/ml/modules/get_module?filter=observability` Response: An empty list **Running stateful es** Request: `/internal/ml/modules/get_module` Response: All modules Request: `/internal/ml/modules/get_module?filter=security` Response: Only modules containing `tags: ["security"]` Request: `/internal/ml/modules/get_module?filter=security,observability` Response: Modules containing `tags: ["security"]` and `tags: ["observability"]` --------- Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
1f6090cb3f
commit
6a6df9d72f
37 changed files with 385 additions and 190 deletions
|
@ -15,6 +15,7 @@ import type { Module } from '../../../common/types/modules';
|
|||
import { DataRecognizer } from '.';
|
||||
import type { MlClient } from '../../lib/ml_client';
|
||||
import type { MLSavedObjectService } from '../../saved_objects';
|
||||
import { type Config, filterConfigs } from './data_recognizer';
|
||||
|
||||
const callAs = () => Promise.resolve({ body: {} });
|
||||
|
||||
|
@ -35,7 +36,8 @@ describe('ML - data recognizer', () => {
|
|||
} as unknown as SavedObjectsClientContract,
|
||||
{ find: jest.fn() } as unknown as DataViewsService,
|
||||
{} as MLSavedObjectService,
|
||||
{ headers: { authorization: '' } } as unknown as KibanaRequest
|
||||
{ headers: { authorization: '' } } as unknown as KibanaRequest,
|
||||
null
|
||||
);
|
||||
|
||||
describe('jobOverrides', () => {
|
||||
|
@ -93,5 +95,107 @@ describe('ML - data recognizer', () => {
|
|||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should filter configs', () => {
|
||||
const configs = [
|
||||
{
|
||||
module: { tags: ['security'] },
|
||||
},
|
||||
{
|
||||
module: { tags: ['security', 'observability'] },
|
||||
},
|
||||
{
|
||||
module: { tags: ['security', 'logs'] },
|
||||
},
|
||||
{
|
||||
module: { tags: ['search'] },
|
||||
},
|
||||
{
|
||||
module: { tags: [] },
|
||||
},
|
||||
{
|
||||
module: { tags: [] },
|
||||
},
|
||||
{
|
||||
module: {},
|
||||
},
|
||||
] as unknown as Config[];
|
||||
|
||||
// no compatible module type, no filters
|
||||
// test all modules are returned
|
||||
const c1 = filterConfigs(configs, null, []);
|
||||
expect(c1).toStrictEqual(configs);
|
||||
|
||||
// compatible module type is security, no filters
|
||||
// test only security modules and modules without tags are returned
|
||||
const c2 = filterConfigs(configs, 'security', []);
|
||||
expect(c2).toStrictEqual([
|
||||
{
|
||||
module: { tags: ['security'] },
|
||||
},
|
||||
{
|
||||
module: { tags: ['security', 'observability'] },
|
||||
},
|
||||
{
|
||||
module: { tags: ['security', 'logs'] },
|
||||
},
|
||||
{
|
||||
module: { tags: [] },
|
||||
},
|
||||
{
|
||||
module: { tags: [] },
|
||||
},
|
||||
{
|
||||
module: {},
|
||||
},
|
||||
]);
|
||||
|
||||
// no compatible module type, filter is search
|
||||
// test only modules with search tag are returned
|
||||
const c3 = filterConfigs(configs, null, ['search']);
|
||||
expect(c3).toStrictEqual([
|
||||
{
|
||||
module: { tags: ['search'] },
|
||||
},
|
||||
]);
|
||||
|
||||
// compatible module type is security, filter is search
|
||||
// test no modules are returned
|
||||
const c4 = filterConfigs(configs, 'security', ['search']);
|
||||
expect(c4).toStrictEqual([]);
|
||||
|
||||
// compatible module type is observability, filter is search
|
||||
// test no modules are returned
|
||||
const c5 = filterConfigs(configs, 'observability', ['search']);
|
||||
expect(c5).toStrictEqual([]);
|
||||
|
||||
// compatible module type is observability, filter is security
|
||||
// test only modules with security and observability tags are returned
|
||||
const c6 = filterConfigs(configs, 'observability', ['security']);
|
||||
expect(c6).toStrictEqual([
|
||||
{
|
||||
module: { tags: ['security', 'observability'] },
|
||||
},
|
||||
]);
|
||||
|
||||
// filter is not a valid tag
|
||||
// test no modules are returned
|
||||
const c7 = filterConfigs(configs, null, ['missing']);
|
||||
expect(c7).toStrictEqual([]);
|
||||
|
||||
// compatible module type is not a valid tag, no filters
|
||||
const c8 = filterConfigs(configs, 'missing' as any, []);
|
||||
expect(c8).toStrictEqual([
|
||||
{
|
||||
module: { tags: [] },
|
||||
},
|
||||
{
|
||||
module: { tags: [] },
|
||||
},
|
||||
{
|
||||
module: {},
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -15,10 +15,11 @@ import type {
|
|||
} from '@kbn/core/server';
|
||||
|
||||
import moment from 'moment';
|
||||
import { merge } from 'lodash';
|
||||
import { merge, intersection } from 'lodash';
|
||||
import type { DataViewsService } from '@kbn/data-views-plugin/common';
|
||||
import { isPopulatedObject } from '@kbn/ml-is-populated-object';
|
||||
import { isDefined } from '@kbn/ml-is-defined';
|
||||
import type { CompatibleModule } from '../../../common/constants/app';
|
||||
import type { AnalysisLimits } from '../../../common/types/anomaly_detection_jobs';
|
||||
import { getAuthorizationHeader } from '../../lib/request_authorization';
|
||||
import type { MlClient } from '../../lib/ml_client';
|
||||
|
@ -75,7 +76,7 @@ function isFileBasedModule(arg: unknown): arg is FileBasedModule {
|
|||
return isPopulatedObject(arg) && Array.isArray(arg.jobs) && arg.jobs[0]?.file !== undefined;
|
||||
}
|
||||
|
||||
interface Config {
|
||||
export interface Config {
|
||||
dirName?: string;
|
||||
module: FileBasedModule | Module;
|
||||
isSavedObject: boolean;
|
||||
|
@ -116,6 +117,7 @@ export class DataRecognizer {
|
|||
private _jobsService: ReturnType<typeof jobServiceProvider>;
|
||||
private _resultsService: ReturnType<typeof resultsServiceProvider>;
|
||||
private _calculateModelMemoryLimit: ReturnType<typeof calculateModelMemoryLimitProvider>;
|
||||
private _compatibleModuleType: CompatibleModule | null;
|
||||
|
||||
/**
|
||||
* A temporary cache of configs loaded from disk and from save object service.
|
||||
|
@ -139,7 +141,8 @@ export class DataRecognizer {
|
|||
savedObjectsClient: SavedObjectsClientContract,
|
||||
dataViewsService: DataViewsService,
|
||||
mlSavedObjectService: MLSavedObjectService,
|
||||
request: KibanaRequest
|
||||
request: KibanaRequest,
|
||||
compatibleModuleType: CompatibleModule | null
|
||||
) {
|
||||
this._client = mlClusterClient;
|
||||
this._mlClient = mlClient;
|
||||
|
@ -151,6 +154,7 @@ export class DataRecognizer {
|
|||
this._jobsService = jobServiceProvider(mlClusterClient, mlClient);
|
||||
this._resultsService = resultsServiceProvider(mlClient);
|
||||
this._calculateModelMemoryLimit = calculateModelMemoryLimitProvider(mlClusterClient, mlClient);
|
||||
this._compatibleModuleType = compatibleModuleType;
|
||||
}
|
||||
|
||||
// list all directories under the given directory
|
||||
|
@ -184,12 +188,12 @@ export class DataRecognizer {
|
|||
});
|
||||
}
|
||||
|
||||
private async _loadConfigs(): Promise<Config[]> {
|
||||
private async _loadConfigs(moduleTagFilters?: string[]): Promise<Config[]> {
|
||||
if (this._configCache !== null) {
|
||||
return this._configCache;
|
||||
}
|
||||
|
||||
const configs: Config[] = [];
|
||||
const localConfigs: Config[] = [];
|
||||
const dirs = await this._listDirs(this._modulesDir);
|
||||
await Promise.all(
|
||||
dirs.map(async (dir) => {
|
||||
|
@ -202,7 +206,7 @@ export class DataRecognizer {
|
|||
|
||||
if (file !== undefined) {
|
||||
try {
|
||||
configs.push({
|
||||
localConfigs.push({
|
||||
dirName: dir,
|
||||
module: JSON.parse(file),
|
||||
isSavedObject: false,
|
||||
|
@ -219,7 +223,11 @@ export class DataRecognizer {
|
|||
isSavedObject: true,
|
||||
}));
|
||||
|
||||
this._configCache = [...configs, ...savedObjectConfigs];
|
||||
this._configCache = filterConfigs(
|
||||
[...localConfigs, ...savedObjectConfigs],
|
||||
this._compatibleModuleType,
|
||||
moduleTagFilters ?? []
|
||||
);
|
||||
|
||||
return this._configCache;
|
||||
}
|
||||
|
@ -234,14 +242,17 @@ export class DataRecognizer {
|
|||
}
|
||||
|
||||
// get the manifest.json file for a specified id, e.g. "nginx"
|
||||
private async _findConfig(id: string) {
|
||||
const configs = await this._loadConfigs();
|
||||
private async _findConfig(id: string, moduleTagFilters?: string[]) {
|
||||
const configs = await this._loadConfigs(moduleTagFilters);
|
||||
return configs.find((i) => i.module.id === id);
|
||||
}
|
||||
|
||||
// called externally by an endpoint
|
||||
public async findMatches(indexPattern: string): Promise<RecognizeResult[]> {
|
||||
const manifestFiles = await this._loadConfigs();
|
||||
public async findMatches(
|
||||
indexPattern: string,
|
||||
moduleTagFilters?: string[]
|
||||
): Promise<RecognizeResult[]> {
|
||||
const manifestFiles = await this._loadConfigs(moduleTagFilters);
|
||||
const results: RecognizeResult[] = [];
|
||||
|
||||
await Promise.all(
|
||||
|
@ -310,8 +321,8 @@ export class DataRecognizer {
|
|||
return body.hits.total.value > 0;
|
||||
}
|
||||
|
||||
public async listModules() {
|
||||
const manifestFiles = await this._loadConfigs();
|
||||
public async listModules(moduleTagFilters?: string[]): Promise<Module[]> {
|
||||
const manifestFiles = await this._loadConfigs(moduleTagFilters);
|
||||
manifestFiles.sort((a, b) => a.module.id.localeCompare(b.module.id)); // sort as json files are read from disk and could be in any order.
|
||||
|
||||
const configs: Array<Module | FileBasedModule> = [];
|
||||
|
@ -319,7 +330,7 @@ export class DataRecognizer {
|
|||
if (config.isSavedObject) {
|
||||
configs.push(config.module);
|
||||
} else {
|
||||
configs.push(await this.getModule(config.module.id));
|
||||
configs.push(await this.getModule(config.module.id, moduleTagFilters));
|
||||
}
|
||||
}
|
||||
// casting return as Module[] so not to break external plugins who rely on this function
|
||||
|
@ -330,11 +341,11 @@ export class DataRecognizer {
|
|||
// called externally by an endpoint
|
||||
// supplying an optional prefix will add the prefix
|
||||
// to the job and datafeed configs
|
||||
public async getModule(id: string, prefix = ''): Promise<Module> {
|
||||
public async getModule(id: string, moduleTagFilters?: string[], prefix = ''): Promise<Module> {
|
||||
let module: FileBasedModule | Module | null = null;
|
||||
let dirName: string | null = null;
|
||||
|
||||
const config = await this._findConfig(id);
|
||||
const config = await this._findConfig(id, moduleTagFilters);
|
||||
if (config !== undefined) {
|
||||
module = config.module;
|
||||
dirName = config.dirName ?? null;
|
||||
|
@ -468,7 +479,7 @@ export class DataRecognizer {
|
|||
applyToAllSpaces: boolean = false
|
||||
) {
|
||||
// load the config from disk
|
||||
const moduleConfig = await this.getModule(moduleId, jobPrefix);
|
||||
const moduleConfig = await this.getModule(moduleId, undefined, jobPrefix);
|
||||
|
||||
if (indexPatternName === undefined && moduleConfig.defaultIndexPattern === undefined) {
|
||||
throw Boom.badRequest(
|
||||
|
@ -1395,7 +1406,8 @@ export function dataRecognizerFactory(
|
|||
savedObjectsClient: SavedObjectsClientContract,
|
||||
dataViewsService: DataViewsService,
|
||||
mlSavedObjectService: MLSavedObjectService,
|
||||
request: KibanaRequest
|
||||
request: KibanaRequest,
|
||||
compatibleModuleType: CompatibleModule | null
|
||||
) {
|
||||
return new DataRecognizer(
|
||||
client,
|
||||
|
@ -1403,6 +1415,50 @@ export function dataRecognizerFactory(
|
|||
savedObjectsClient,
|
||||
dataViewsService,
|
||||
mlSavedObjectService,
|
||||
request
|
||||
request,
|
||||
compatibleModuleType
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters an array of modules based on the provided tag filters
|
||||
*
|
||||
* @param configs - The array of module config objects to filter.
|
||||
* @param compatibleModuleType - The CompatibleModule type to filter by, or null to include all modules. The compatibleModuleType is provided by the kibana yml config.
|
||||
* @param moduleTagFilters - An array of module tags to filter by. Only modules that have at least one matching tag will be included. The moduleTagFilters are provided as a query parameter to the endpoint.
|
||||
* @returns An array of module Config objects that match the provided criteria.
|
||||
*/
|
||||
export function filterConfigs(
|
||||
configs: Config[],
|
||||
compatibleModuleType: CompatibleModule | null,
|
||||
moduleTagFilters: string[]
|
||||
) {
|
||||
let filteredConfigs: Config[] = [];
|
||||
if (compatibleModuleType === null && moduleTagFilters.length === 0) {
|
||||
filteredConfigs = configs;
|
||||
} else {
|
||||
const filteredForCompatibleModule =
|
||||
compatibleModuleType === null
|
||||
? configs
|
||||
: configs.filter(({ module }) => {
|
||||
if (module.tags === undefined || module.tags.length === 0) {
|
||||
// if the module has no tags, it is compatible with all serverless projects
|
||||
return true;
|
||||
}
|
||||
return module.tags.includes(compatibleModuleType!);
|
||||
});
|
||||
const filteredForModuleTags =
|
||||
moduleTagFilters.length === 0
|
||||
? filteredForCompatibleModule
|
||||
: filteredForCompatibleModule.filter(({ module }) => {
|
||||
if (module.tags === undefined || module.tags.length === 0) {
|
||||
// a tag filter has been specified when calling the endpoint therefore
|
||||
// if the module has no tags, it should be filtered out from the results
|
||||
return false;
|
||||
}
|
||||
return intersection(module.tags, moduleTagFilters).length > 0;
|
||||
});
|
||||
filteredConfigs = filteredForModuleTags;
|
||||
}
|
||||
return filteredConfigs;
|
||||
}
|
||||
|
|
|
@ -108,5 +108,6 @@
|
|||
"file": "ml_http_access_events_timechart_ecs.json"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
|
|
|
@ -26,5 +26,6 @@
|
|||
"file": "datafeed_apm_tx_metrics.json",
|
||||
"job_id": "apm_tx_metrics"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": ["observability"]
|
||||
}
|
||||
|
|
|
@ -70,5 +70,6 @@
|
|||
"file": "ml_auditbeat_docker_process_occurrence_ecs.json"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"tags": ["security"]
|
||||
}
|
||||
|
|
|
@ -16,5 +16,10 @@
|
|||
"file": "datafeed_log_entry_rate.json",
|
||||
"job_id": "log-entry-rate"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"logs",
|
||||
"observability",
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -16,5 +16,10 @@
|
|||
"file": "datafeed_log_entry_categories_count.json",
|
||||
"job_id": "log-entry-categories-count"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"logs",
|
||||
"observability",
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -48,5 +48,6 @@
|
|||
"file": "datafeed_high_mean_cpu_iowait_ecs.json",
|
||||
"job_id": "high_mean_cpu_iowait_ecs"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": ["observability"]
|
||||
}
|
||||
|
|
|
@ -34,5 +34,6 @@
|
|||
"file": "datafeed_hosts_network_out.json",
|
||||
"job_id": "hosts_network_out"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": ["observability"]
|
||||
}
|
||||
|
|
|
@ -34,5 +34,6 @@
|
|||
"file": "datafeed_k8s_network_out.json",
|
||||
"job_id": "k8s_network_out"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": ["observability"]
|
||||
}
|
||||
|
|
|
@ -108,5 +108,6 @@
|
|||
"file": "ml_http_access_events_timechart_ecs.json"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"tags": []
|
||||
}
|
||||
|
|
|
@ -24,5 +24,6 @@
|
|||
"job_id": "high_sum_total_sales"
|
||||
}
|
||||
],
|
||||
"kibana": {}
|
||||
"kibana": {},
|
||||
"tags": []
|
||||
}
|
||||
|
|
|
@ -42,5 +42,6 @@
|
|||
"job_id": "url_scanning"
|
||||
}
|
||||
],
|
||||
"kibana": {}
|
||||
"kibana": {},
|
||||
"tags": []
|
||||
}
|
||||
|
|
|
@ -83,5 +83,8 @@
|
|||
"file": "datafeed_suspicious_login_activity.json",
|
||||
"job_id": "suspicious_login_activity"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -59,5 +59,8 @@
|
|||
"file": "datafeed_rare_error_code.json",
|
||||
"job_id": "rare_error_code"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -177,5 +177,8 @@
|
|||
"file": "datafeed_v3_linux_anomalous_network_activity.json",
|
||||
"job_id": "v3_linux_anomalous_network_activity"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -56,5 +56,8 @@
|
|||
"file": "datafeed_rare_destination_country.json",
|
||||
"job_id": "rare_destination_country"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -59,5 +59,8 @@
|
|||
"file": "datafeed_packetbeat_rare_user_agent.json",
|
||||
"job_id": "packetbeat_rare_user_agent"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -152,5 +152,8 @@
|
|||
"file": "datafeed_v3_windows_anomalous_script.json",
|
||||
"job_id": "v3_windows_anomalous_script"
|
||||
}
|
||||
],
|
||||
"tags": [
|
||||
"security"
|
||||
]
|
||||
}
|
||||
|
|
|
@ -23,5 +23,6 @@
|
|||
"file": "datafeed_high_latency_by_geo.json",
|
||||
"job_id": "high_latency_by_geo"
|
||||
}
|
||||
]
|
||||
],
|
||||
"tags": ["observability"]
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue