[Security Assistant] Fix timeout during Knowledge Base setup (#213738)

## Summary

Cluster with autoscaling for ML nodes can take couple minutes to
properly allocate ML node on Cloud, so increasing timeout by 10min
should improve the UX and make the process more streamlined.

However it's still just arbitrary value, so in the future we should
think about more reliable solution
This commit is contained in:
Patryk Kopyciński 2025-03-11 02:30:44 +01:00 committed by GitHub
parent 5b6dbf2b27
commit 0b77522dc1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 50 additions and 45 deletions

View file

@ -50,7 +50,7 @@ const useUserProfile = ({ username, enabled = true }: { username: string; enable
avatar: profile?.[0].data.avatar,
};
},
enabled,
enabled: !!(enabled && username?.length),
});
};

View file

@ -332,7 +332,7 @@ describe('AIAssistantKnowledgeBaseDataClient', () => {
{ fully_defined: false, model_id: '', tags: [], input: { field_names: ['content'] } },
],
});
mockLoadSecurityLabs.mockRejectedValue(new Error('Installation error'));
(getMlNodeCount as jest.Mock).mockRejectedValue(new Error('Installation error'));
const client = new AIAssistantKnowledgeBaseDataClient(mockOptions);
await expect(client.setupKnowledgeBase({})).rejects.toThrow(

View file

@ -178,13 +178,11 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
if (elasticsearchInference) {
return ASSISTANT_ELSER_INFERENCE_ID;
}
} catch (error) {
this.options.logger.debug(
`Error checking if Inference endpoint ${ASSISTANT_ELSER_INFERENCE_ID} exists: ${error}`
);
} catch (_) {
/* empty */
}
// Fallback to the dedicated inference endpoint
// Fallback to the default inference endpoint
return ELASTICSEARCH_ELSER_INFERENCE_ID;
};
@ -233,7 +231,7 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
?.some((stats) => isReadyESS(stats) || isReadyServerless(stats));
} catch (error) {
this.options.logger.debug(
`Error checking if Inference endpoint ${ASSISTANT_ELSER_INFERENCE_ID} exists: ${error}`
`Error checking if Inference endpoint ${inferenceId} exists: ${error}`
);
return false;
}
@ -363,37 +361,39 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
return;
}
this.options.logger.debug('Checking if ML nodes are available...');
const mlNodesCount = await getMlNodeCount({ asInternalUser: esClient } as IScopedClusterClient);
if (mlNodesCount.count === 0 && mlNodesCount.lazyNodeCount === 0) {
throw new Error('No ML nodes available');
}
this.options.logger.debug('Starting Knowledge Base setup...');
this.options.setIsKBSetupInProgress(this.spaceId, true);
const elserId = await this.options.getElserId();
// Delete legacy ESQL knowledge base docs if they exist, and silence the error if they do not
try {
const legacyESQL = await esClient.deleteByQuery({
index: this.indexTemplateAndPattern.alias,
query: {
bool: {
must: [{ terms: { 'metadata.kbResource': ['esql', 'unknown'] } }],
},
},
});
if (legacyESQL?.total != null && legacyESQL?.total > 0) {
this.options.logger.info(
`Removed ${legacyESQL?.total} ESQL knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.`
);
this.options.logger.debug('Checking if ML nodes are available...');
const mlNodesCount = await getMlNodeCount({
asInternalUser: esClient,
} as IScopedClusterClient);
if (mlNodesCount.count === 0 && mlNodesCount.lazyNodeCount === 0) {
throw new Error('No ML nodes available');
}
this.options.logger.debug('Starting Knowledge Base setup...');
this.options.setIsKBSetupInProgress(this.spaceId, true);
const elserId = await this.options.getElserId();
// Delete legacy ESQL knowledge base docs if they exist, and silence the error if they do not
try {
const legacyESQL = await esClient.deleteByQuery({
index: this.indexTemplateAndPattern.alias,
query: {
bool: {
must: [{ terms: { 'metadata.kbResource': ['esql', 'unknown'] } }],
},
},
});
if (legacyESQL?.total != null && legacyESQL?.total > 0) {
this.options.logger.info(
`Removed ${legacyESQL?.total} ESQL knowledge base docs from knowledge base data stream: ${this.indexTemplateAndPattern.alias}.`
);
}
} catch (e) {
this.options.logger.info('No legacy ESQL or Security Labs knowledge base docs to delete');
}
} catch (e) {
this.options.logger.info('No legacy ESQL or Security Labs knowledge base docs to delete');
}
try {
/*
#1 Check if ELSER model is downloaded
#2 Check if inference endpoint is deployed
@ -409,7 +409,7 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
(await this.isModelInstalled())
? Promise.resolve()
: Promise.reject(new Error('Model not installed')),
{ minTimeout: 10000, maxTimeout: 10000, retries: 10 }
{ minTimeout: 30000, maxTimeout: 30000, retries: 20 }
);
this.options.logger.debug(`ELSER model '${elserId}' successfully installed!`);
} else {
@ -420,11 +420,11 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
if (!inferenceExists) {
await this.createInferenceEndpoint();
this.options.logger.error(
this.options.logger.debug(
`Inference endpoint for ELSER model '${elserId}' successfully deployed!`
);
} else {
this.options.logger.error(
this.options.logger.debug(
`Inference endpoint for ELSER model '${elserId}' is already deployed`
);
}
@ -453,7 +453,10 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
}
this.options.logger.debug(`Loading Security Labs KB docs...`);
await loadSecurityLabs(this, this.options.logger);
void loadSecurityLabs(this, this.options.logger)?.then(() => {
this.options.setIsKBSetupInProgress(this.spaceId, false);
});
} else {
this.options.logger.debug(`Security Labs Knowledge Base docs already loaded!`);
}
@ -473,12 +476,15 @@ export class AIAssistantKnowledgeBaseDataClient extends AIAssistantDataClient {
);
}
}
// If loading security labs, we need to wait for the docs to be loaded
if (ignoreSecurityLabs) {
this.options.setIsKBSetupInProgress(this.spaceId, false);
}
} catch (e) {
this.options.setIsKBSetupInProgress(this.spaceId, false);
this.options.logger.error(`Error setting up Knowledge Base: ${e.message}`);
throw new Error(`Error setting up Knowledge Base: ${e.message}`);
} finally {
this.options.setIsKBSetupInProgress(this.spaceId, false);
}
};

View file

@ -19,7 +19,7 @@ import { ElasticAssistantPluginRouter } from '../../types';
// Since we're awaiting on ELSER setup, this could take a bit (especially if ML needs to autoscale)
// Consider just returning if attempt was successful, and switch to client polling
const ROUTE_HANDLER_TIMEOUT = 10 * 60 * 1000; // 10 * 60 seconds = 10 minutes
const ROUTE_HANDLER_TIMEOUT = 20 * 60 * 1000; // 20 * 60 seconds = 20 minutes
/**
* Load Knowledge Base index, pipeline, and resources (collection of documents)

View file

@ -14,8 +14,7 @@ export default ({ getService }: FtrProviderContext) => {
const log = getService('log');
const esArchiver = getService('esArchiver');
// Failing: See https://github.com/elastic/kibana/issues/208603
describe.skip('@ess Security AI Assistant - Indices with `semantic_text` fields', () => {
describe('@ess Security AI Assistant - Indices with `semantic_text` fields', () => {
before(async () => {
await esArchiver.load('x-pack/test/functional/es_archives/security_solution/ignore_fields');
await esArchiver.load(