[Fleet] increased retries for cancel action, improvements on agent activity (#149845)

## Summary

Fixes https://github.com/elastic/kibana/issues/148709

Increased `retry_on_conflict` to `5` for cancel bulk update agents and
logging out errors on debug level.
Could't reproduce conflicts locally with 5k horde agents, will try on
cloud.

Small improvements on agent activity:
- Only set `FAILED` status if all agents acked, so the action remains in
`IN_PROGRESS` action while agents are still executing. This makes
`Cancel` still accessible on a bulk upgrade, even if there are some
failures. Also makes it easier to understand that the FAILED status is a
finished state.
- Only set `EXPIRED` status if the action is not upgrade. In the recent
changes, we don't consider an upgrade action expired ever, only rollout
passed.

<img width="1150" alt="image"
src="https://user-images.githubusercontent.com/90178898/215520463-82877a2e-667b-4853-82d7-5e53b231aaa5.png">



### Checklist

- [ ] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
This commit is contained in:
Julia Bardi 2023-01-31 15:06:42 +01:00 committed by GitHub
parent 96b3ade94b
commit 05c8fe8a6d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 24 additions and 16 deletions

View file

@ -294,7 +294,7 @@ const formattedTime = (time?: string) => {
const inProgressTitle = (action: ActionStatus) => (
<FormattedMessage
id="xpack.fleet.agentActivity.inProgressTitle"
defaultMessage="{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}"
defaultMessage="{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}{failuresText}"
values={{
nbAgents:
action.nbAgentsAck >= action.nbAgentsActioned
@ -307,6 +307,7 @@ const inProgressTitle = (action: ActionStatus) => (
reassignText:
action.type === 'POLICY_REASSIGN' && action.newPolicyId ? `to ${action.newPolicyId}` : '',
upgradeText: action.type === 'UPGRADE' ? `to version ${action.version}` : '',
failuresText: action.nbAgentsFailed > 0 ? `, has ${action.nbAgentsFailed} failure(s)` : '',
}}
/>
);

View file

@ -119,14 +119,13 @@ export async function getActionStatuses(
...action,
nbAgentsAck: nbAgentsAck - errorCount,
nbAgentsFailed: errorCount,
status:
errorCount > 0
? 'FAILED'
: complete
? 'COMPLETE'
: cancelledAction
? 'CANCELLED'
: action.status,
status: cancelledAction
? 'CANCELLED'
: errorCount > 0 && complete
? 'FAILED'
: complete
? 'COMPLETE'
: action.status,
nbAgentsActioned,
cancellationTime: cancelledAction?.timestamp,
completionTime,
@ -196,7 +195,10 @@ async function _getActions(
const source = hit._source!;
if (!acc[source.action_id!]) {
const isExpired = source.expiration ? Date.parse(source.expiration) < Date.now() : false;
const isExpired =
source.expiration && source.type !== 'UPGRADE'
? Date.parse(source.expiration) < Date.now()
: false;
acc[hit._source.action_id] = {
actionId: hit._source.action_id,
nbAgentsActionCreated: 0,

View file

@ -261,6 +261,7 @@ export async function cancelAgentAction(esClient: ElasticsearchClient, actionId:
continue;
}
if (hit._source.type === 'UPGRADE') {
const errors = {};
await bulkUpdateAgents(
esClient,
hit._source.agents.map((agentId) => ({
@ -270,8 +271,13 @@ export async function cancelAgentAction(esClient: ElasticsearchClient, actionId:
upgrade_started_at: null,
},
})),
{}
errors
);
if (Object.keys(errors).length > 0) {
appContextService
.getLogger()
.debug(`Errors while bulk updating agents for cancel action: ${JSON.stringify(errors)}`);
}
}
await createAgentAction(esClient, {
id: cancelActionId,

View file

@ -477,7 +477,7 @@ export async function bulkUpdateAgents(
{
update: {
_id: agentId,
retry_on_conflict: 3,
retry_on_conflict: 5,
},
},
{

View file

@ -13289,7 +13289,7 @@
"xpack.fleet.addIntegration.installAgentStepTitle": "Ces étapes configurent et enregistrent l'agent Elastic Agent dans Fleet afin d'en centraliser la gestion tout en déployant automatiquement les mises à jour. Comme alternative à Fleet, les utilisateurs avancés peuvent exécuter des agents dans {standaloneLink}.",
"xpack.fleet.addIntegration.standaloneWarning": "La configuration des intégrations en exécutant Elastic Agent en mode autonome est une opération avancée. Si possible, nous vous conseillons d'utiliser plutôt {link}. ",
"xpack.fleet.agentActivity.completedTitle": "{nbAgents} {agents} {completedText}{offlineText}",
"xpack.fleet.agentActivity.inProgressTitle": "{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}",
"xpack.fleet.agentActivity.inProgressTitle": "{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}{failuresText}",
"xpack.fleet.agentActivityFlyout.cancelledDescription": "Annulé le {date}",
"xpack.fleet.agentActivityFlyout.cancelledTitle": "Agent {cancelledText} annulé",
"xpack.fleet.agentActivityFlyout.completedDescription": "Terminé {date}",

View file

@ -13276,7 +13276,7 @@
"xpack.fleet.addIntegration.installAgentStepTitle": "FleetでElasticエージェントを構成および登録して、自動的に更新をデプロイしたり、一元的にエージェントを管理したりします。上級者ユーザーは、Fleetの代わりに、{standaloneLink}でエージェントを実行できます。",
"xpack.fleet.addIntegration.standaloneWarning": "スタンドアロンモードでElasticエージェントを実行して統合を設定する方法は、上級者向けです。可能なかぎり、{link}を使用することをお勧めします。",
"xpack.fleet.agentActivity.completedTitle": "{nbAgents} {agents} {completedText}{offlineText}",
"xpack.fleet.agentActivity.inProgressTitle": "{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}",
"xpack.fleet.agentActivity.inProgressTitle": "{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}{failuresText}",
"xpack.fleet.agentActivityFlyout.cancelledDescription": "{date}にキャンセルされました",
"xpack.fleet.agentActivityFlyout.cancelledTitle": "エージェント{cancelledText}がキャンセルされました",
"xpack.fleet.agentActivityFlyout.completedDescription": "{date}に完了しました",

View file

@ -13293,7 +13293,7 @@
"xpack.fleet.addIntegration.installAgentStepTitle": "这些步骤将在 Fleet 中配置和注册 Elastic 代理,以便自动部署更新并集中管理该代理。作为 Fleet 的替代方案,高级用户可以在 {standaloneLink} 中运行代理。",
"xpack.fleet.addIntegration.standaloneWarning": "通过在独立模式下运行 Elastic 代理来设置集成为高级选项。如果可能,我们建议改用 {link}。",
"xpack.fleet.agentActivity.completedTitle": "{nbAgents} {agents} {completedText}{offlineText}",
"xpack.fleet.agentActivity.inProgressTitle": "{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}",
"xpack.fleet.agentActivity.inProgressTitle": "{inProgressText} {nbAgents} {agents} {reassignText}{upgradeText}{failuresText}",
"xpack.fleet.agentActivityFlyout.cancelledDescription": "已于 {date}取消",
"xpack.fleet.agentActivityFlyout.cancelledTitle": "代理 {cancelledText} 已取消",
"xpack.fleet.agentActivityFlyout.completedDescription": "完成于 {date}",

View file

@ -1027,7 +1027,6 @@ export default function (providerContext: FtrProviderContext) {
.get(`/api/fleet/agents/action_status`)
.set('kbn-xsrf', 'xxx');
const actionStatus = body.items[0];
expect(actionStatus.status).to.eql('FAILED');
expect(actionStatus.nbAgentsFailed).to.eql(1);
});