[9.0] [Security Assistant] Fix timeout during Knowledge Base setup (#213738) (#213973)

# Backport

This will backport the following commits from `main` to `9.0`:
- [[Security Assistant] Fix timeout during Knowledge Base setup
(#213738)](https://github.com/elastic/kibana/pull/213738)

<!--- Backport version: 9.6.6 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sorenlouv/backport)

<!--BACKPORT [{"author":{"name":"Patryk
Kopyciński","email":"contact@patrykkopycinski.com"},"sourceCommit":{"committedDate":"2025-03-11T01:30:44Z","message":"[Security
Assistant] Fix timeout during Knowledge Base setup (#213738)\n\n##
Summary\n\nCluster with autoscaling for ML nodes can take couple minutes
to\nproperly allocate ML node on Cloud, so increasing timeout by
10min\nshould improve the UX and make the process more
streamlined.\n\nHowever it's still just arbitrary value, so in the
future we should\nthink about more reliable
solution","sha":"0b77522dc1db45c1fd43f83165407a5cd0899ad4","branchLabelMapping":{"^v9.1.0$":"main","^v8.19.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["bug","release_note:skip","v9.0.0","ci:cloud-deploy","ci:project-deploy-security","ci:cloud-deploy-elser","backport:version","v8.18.0","v9.1.0","v8.19.0"],"title":"[Security
Assistant] Fix timeout during Knowledge Base
setup","number":213738,"url":"https://github.com/elastic/kibana/pull/213738","mergeCommit":{"message":"[Security
Assistant] Fix timeout during Knowledge Base setup (#213738)\n\n##
Summary\n\nCluster with autoscaling for ML nodes can take couple minutes
to\nproperly allocate ML node on Cloud, so increasing timeout by
10min\nshould improve the UX and make the process more
streamlined.\n\nHowever it's still just arbitrary value, so in the
future we should\nthink about more reliable
solution","sha":"0b77522dc1db45c1fd43f83165407a5cd0899ad4"}},"sourceBranch":"main","suggestedTargetBranches":["9.0"],"targetPullRequestStates":[{"branch":"9.0","label":"v9.0.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"8.18","label":"v8.18.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"url":"https://github.com/elastic/kibana/pull/213854","number":213854,"state":"MERGED","mergeCommit":{"sha":"fbafcd597e1d025daf3163719910cc4b61d274db","message":"[8.18]
[Security Assistant] Fix timeout during Knowledge Base setup (#213738)
(#213854)\n\n# Backport\n\nThis will backport the following commits from
`main` to `8.18`:\n- [[Security Assistant] Fix timeout during Knowledge
Base
setup\n(#213738)](https://github.com/elastic/kibana/pull/213738)\n\n\n\n###
Questions ?\nPlease refer to the [Backport
tool\ndocumentation](https://github.com/sorenlouv/backport)\n\n"}},{"branch":"main","label":"v9.1.0","branchLabelMappingKey":"^v9.1.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/213738","number":213738,"mergeCommit":{"message":"[Security
Assistant] Fix timeout during Knowledge Base setup (#213738)\n\n##
Summary\n\nCluster with autoscaling for ML nodes can take couple minutes
to\nproperly allocate ML node on Cloud, so increasing timeout by
10min\nshould improve the UX and make the process more
streamlined.\n\nHowever it's still just arbitrary value, so in the
future we should\nthink about more reliable
solution","sha":"0b77522dc1db45c1fd43f83165407a5cd0899ad4"}},{"branch":"8.x","label":"v8.19.0","branchLabelMappingKey":"^v8.19.0$","isSourceBranch":false,"url":"https://github.com/elastic/kibana/pull/213853","number":213853,"state":"MERGED","mergeCommit":{"sha":"3bfe2d5a87a091b2f0da12fef81dadd6796fc4db","message":"[8.x]
[Security Assistant] Fix timeout during Knowledge Base setup (#213738)
(#213853)\n\n# Backport\n\nThis will backport the following commits from
`main` to `8.x`:\n- [[Security Assistant] Fix timeout during Knowledge
Base
setup\n(#213738)](https://github.com/elastic/kibana/pull/213738)\n\n\n\n###
Questions ?\nPlease refer to the [Backport
tool\ndocumentation](https://github.com/sorenlouv/backport)\n\n\n\nCo-authored-by:
Patryk Kopyciński <contact@patrykkopycinski.com>"}}]}] BACKPORT-->
This commit is contained in:
Patryk Kopyciński 2025-03-11 20:40:49 +01:00 committed by GitHub
parent f1e9a5f631
commit e7eaf81477
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 50 additions and 45 deletions

View file

@ -50,7 +50,7 @@ const useUserProfile = ({ username, enabled = true }: { username: string; enable
avatar: profile?.[0].data.avatar,
};
},
enabled,
enabled: !!(enabled && username?.length),
});
};

View file

@ -332,7 +332,7 @@ describe('AIAssistantKnowledgeBaseDataClient', () => {
{ fully_defined: false, model_id: '', tags: [], input: { field_names: ['content'] } },
],
});
mockLoadSecurityLabs.mockRejectedValue(new Error('Installation error'));
(getMlNodeCount as jest.Mock).mockRejectedValue(new Error('Installation error'));
const client = new AIAssistantKnowledgeBaseDataClient(mockOptions);
await expect(client.setupKnowledgeBase({})).rejects.toThrow(

View file

@ -178,13 +178,11 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
if (elasticsearchInference) {
return ASSISTANT_ELSER_INFERENCE_ID;
}
} catch (error) {
this.options.logger.debug(
`Error checking if Inference endpoint ${ASSISTANT_ELSER_INFERENCE_ID} exists: ${error}`
);
} catch (_) {
/* empty */
}
// Fallback to the dedicated inference endpoint
// Fallback to the default inference endpoint
return ELASTICSEARCH_ELSER_INFERENCE_ID;
};
@ -233,7 +231,7 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
?.some((stats) => isReadyESS(stats) || isReadyServerless(stats));
} catch (error) {
this.options.logger.debug(
`Error checking if Inference endpoint ${ASSISTANT_ELSER_INFERENCE_ID} exists: ${error}`
`Error checking if Inference endpoint ${inferenceId} exists: ${error}`
);
return false;
}
@ -363,37 +361,39 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
return;
}
this.options.logger.debug('Checking if ML nodes are available...');
const mlNodesCount = await getMlNodeCount({ asInternalUser: esClient } as IScopedClusterClient);
if (mlNodesCount.count === 0 && mlNodesCount.lazyNodeCount === 0) {
throw new Error('No ML nodes available');
}
this.options.logger.debug('Starting Knowledge Base setup...');
this.options.setIsKBSetupInProgress(this.spaceId, true);
const elserId = await this.options.getElserId();
// Delete legacy ESQL knowledge base docs if they exist, and silence the error if they do not
try {
const legacyESQL = await esClient.deleteByQuery({
index: this.indexTemplateAndPattern.alias,
query: {
bool: {
must: [{ terms: { 'metadata.kbResource': ['esql', 'unknown'] } }],
},
},
});
if (legacyESQL?.total != null && legacyESQL?.total > 0) {
this.options.logger.info(
`Removed ${legacyESQL?.total} ESQL knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.`
);
this.options.logger.debug('Checking if ML nodes are available...');
const mlNodesCount = await getMlNodeCount({
asInternalUser: esClient,
} as IScopedClusterClient);
if (mlNodesCount.count === 0 && mlNodesCount.lazyNodeCount === 0) {
throw new Error('No ML nodes available');
}
this.options.logger.debug('Starting Knowledge Base setup...');
this.options.setIsKBSetupInProgress(this.spaceId, true);
const elserId = await this.options.getElserId();
// Delete legacy ESQL knowledge base docs if they exist, and silence the error if they do not
try {
const legacyESQL = await esClient.deleteByQuery({
index: this.indexTemplateAndPattern.alias,
query: {
bool: {
must: [{ terms: { 'metadata.kbResource': ['esql', 'unknown'] } }],
},
},
});
if (legacyESQL?.total != null && legacyESQL?.total > 0) {
this.options.logger.info(
`Removed ${legacyESQL?.total} ESQL knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.`
);
}
} catch (e) {
this.options.logger.info('No legacy ESQL or Security Labs knowledge base docs to delete');
}
} catch (e) {
this.options.logger.info('No legacy ESQL or Security Labs knowledge base docs to delete');
}
try {
/*
#1 Check if ELSER model is downloaded
#2 Check if inference endpoint is deployed
@ -409,7 +409,7 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
(await this.isModelInstalled())
? Promise.resolve()
: Promise.reject(new Error('Model not installed')),
{ minTimeout: 10000, maxTimeout: 10000, retries: 10 }
{ minTimeout: 30000, maxTimeout: 30000, retries: 20 }
);
this.options.logger.debug(`ELSER model '${elserId}' successfully installed!`);
} else {
@ -420,11 +420,11 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
if (!inferenceExists) {
await this.createInferenceEndpoint();
this.options.logger.error(
this.options.logger.debug(
`Inference endpoint for ELSER model '${elserId}' successfully deployed!`
);
} else {
this.options.logger.error(
this.options.logger.debug(
`Inference endpoint for ELSER model '${elserId}' is already deployed`
);
}
@ -453,7 +453,10 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
}
this.options.logger.debug(`Loading Security Labs KB docs...`);
await loadSecurityLabs(this, this.options.logger);
void loadSecurityLabs(this, this.options.logger)?.then(() => {
this.options.setIsKBSetupInProgress(this.spaceId, false);
});
} else {
this.options.logger.debug(`Security Labs Knowledge Base docs already loaded!`);
}
@ -473,12 +476,15 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
);
}
}
// If loading security labs, we need to wait for the docs to be loaded
if (ignoreSecurityLabs) {
this.options.setIsKBSetupInProgress(this.spaceId, false);
}
} catch (e) {
this.options.setIsKBSetupInProgress(this.spaceId, false);
this.options.logger.error(`Error setting up Knowledge Base: ${e.message}`);
throw new Error(`Error setting up Knowledge Base: ${e.message}`);
} finally {
this.options.setIsKBSetupInProgress(this.spaceId, false);
}
};

View file

@ -19,7 +19,7 @@ import { ElasticAssistantPluginRouter } from '../../types';
// Since we're awaiting on ELSER setup, this could take a bit (especially if ML needs to autoscale)
// Consider just returning if attempt was successful, and switch to client polling
const ROUTE_HANDLER_TIMEOUT = 10 * 60 * 1000; // 10 * 60 seconds = 10 minutes
const ROUTE_HANDLER_TIMEOUT = 20 * 60 * 1000; // 20 * 60 seconds = 20 minutes
/**
* Load Knowledge Base index, pipeline, and resources (collection of documents)

View file

@ -14,8 +14,7 @@ export default ({ getService }: FtrProviderContext) => {
const log = getService('log');
const esArchiver = getService('esArchiver');
// Failing: See https://github.com/elastic/kibana/issues/208603
describe.skip('@ess Security AI Assistant - Indices with `semantic_text` fields', () => {
describe('@ess Security AI Assistant - Indices with `semantic_text` fields', () => {
before(async () => {
await esArchiver.load('x-pack/test/functional/es_archives/security_solution/ignore_fields');
await esArchiver.load(