[8.7] [Fleet] Fixing update tags status reporting complete too early (#151330) (#151675)

# Backport This will backport the following commits from `main` to `8.7`: - [[Fleet] Fixing update tags status reporting complete too early (#151330)](https://github.com/elastic/kibana/pull/151330)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Julia Bardi <90178898+juliaElastic@users.noreply.github.com>
2025-04-24 17:59:23 -04:00 · 2023-02-21 05:39:06 -05:00 · 2023-02-21 05:39:06 -05:00 · 4fcef5cd8f
commit 4fcef5cd8f
parent 0899980f7e
2 changed files with 94 additions and 24 deletions
--- a/x-pack/plugins/fleet/server/services/agents/update_agent_tags.test.ts
+++ b/x-pack/plugins/fleet/server/services/agents/update_agent_tags.test.ts
@ -110,7 +110,7 @@ describe('update_agent_tags', () => {
    expect(agentAction?.body).toEqual(
      expect.objectContaining({
        action_id: expect.anything(),
-        agents: ['agent1'],
+        agents: [expect.any(String)],
        type: 'UPDATE_TAGS',
        total: 1,
      })
@ -120,7 +120,7 @@ describe('update_agent_tags', () => {
    const agentIds = actionResults?.body
      ?.filter((i: any) => i.agent_id)
      .map((i: any) => i.agent_id);
-    expect(agentIds).toEqual(['agent1']);
+    expect(agentIds.length).toEqual(1);
    expect(actionResults.body[1].error).not.toBeDefined();
  });

@ -142,7 +142,7 @@ describe('update_agent_tags', () => {
    expect(agentAction?.body).toEqual(
      expect.objectContaining({
        action_id: expect.anything(),
-        agents: [agentInRegularDoc._id],
+        agents: [expect.any(String)],
        type: 'UPDATE_TAGS',
        total: 1,
      })
@ -152,12 +152,23 @@ describe('update_agent_tags', () => {
  it('should write error action results when failures are returned', async () => {
    esClient.updateByQuery.mockReset();
    esClient.updateByQuery.mockResolvedValue({
-      failures: [{ cause: { reason: 'error reason' } }],
+      failures: [{ id: 'failure1', cause: { reason: 'error reason' } }],
      updated: 0,
+      total: 1,
    } as any);

    await updateAgentTags(soClient, esClient, { agentIds: ['agent1'] }, ['one'], []);

+    const agentAction = esClient.create.mock.calls[0][0] as any;
+    expect(agentAction?.body).toEqual(
+      expect.objectContaining({
+        action_id: expect.anything(),
+        agents: ['failure1'],
+        type: 'UPDATE_TAGS',
+        total: 1,
+      })
+    );
+
    const errorResults = esClient.bulk.mock.calls[0][0] as any;
    expect(errorResults.body[1].error).toEqual('error reason');
  });
@ -181,6 +192,7 @@ describe('update_agent_tags', () => {
      failures: [],
      updated: 0,
      version_conflicts: 100,
+      total: 100,
    } as any);

    await expect(
@ -198,10 +210,43 @@ describe('update_agent_tags', () => {
        }
      )
    ).rejects.toThrowError('version conflict of 100 agents');
+
+    const agentAction = esClient.create.mock.calls[0][0] as any;
+    expect(agentAction?.body.agents.length).toEqual(100);
+
    const errorResults = esClient.bulk.mock.calls[0][0] as any;
    expect(errorResults.body[1].error).toEqual('version conflict on last retry');
  });

+  it('should combine action agents from updated, failures and version conflicts on last retry', async () => {
+    esClient.updateByQuery.mockReset();
+    esClient.updateByQuery.mockResolvedValue({
+      failures: [{ id: 'failure1', cause: { reason: 'error reason' } }],
+      updated: 1,
+      version_conflicts: 1,
+      total: 3,
+    } as any);
+
+    await expect(
+      updateTagsBatch(
+        soClient,
+        esClient,
+        [{ id: 'agent1' } as Agent],
+        {},
+        {
+          tagsToAdd: ['new'],
+          tagsToRemove: [],
+          kuery: '',
+          total: 3,
+          retryCount: MAX_RETRY_COUNT,
+        }
+      )
+    ).rejects.toThrowError('version conflict of 1 agents');
+
+    const agentAction = esClient.create.mock.calls[0][0] as any;
+    expect(agentAction?.body.agents.length).toEqual(3);
+  });
+
  it('should run add tags async when actioning more agents than batch size', async () => {
    esClient.search.mockResolvedValue({
      hits: {
@ -301,7 +346,7 @@ describe('update_agent_tags', () => {

  it('should write total from total param if updateByQuery returns less results', async () => {
    esClient.updateByQuery.mockReset();
-    esClient.updateByQuery.mockResolvedValue({ failures: [], updated: 0, total: 50 } as any);
+    esClient.updateByQuery.mockResolvedValue({ failures: [], updated: 1, total: 50 } as any);

    await updateTagsBatch(
      soClient,
@ -320,7 +365,7 @@ describe('update_agent_tags', () => {
    expect(agentAction?.body).toEqual(
      expect.objectContaining({
        action_id: expect.anything(),
-        agents: ['agent1'],
+        agents: [expect.any(String)],
        type: 'UPDATE_TAGS',
        total: 100,
      })
--- a/x-pack/plugins/fleet/server/services/agents/update_agent_tags_action_runner.ts
+++ b/x-pack/plugins/fleet/server/services/agents/update_agent_tags_action_runner.ts
@ -129,56 +129,81 @@ export async function updateTagsBatch(

  appContextService.getLogger().debug(JSON.stringify(res).slice(0, 1000));

-  if (options.retryCount === undefined) {
-    // creating an action doc so that update tags  shows up in activity
-    await createAgentAction(esClient, {
-      id: actionId,
-      agents: agentIds,
-      created_at: new Date().toISOString(),
-      type: 'UPDATE_TAGS',
-      total: options.total ?? res.total,
-    });
-  }
-
  // creating unique ids to use as agentId, as we don't have all agent ids in case of action by kuery
  const getUuidArray = (count: number) => Array.from({ length: count }, () => uuidv4());

+  const updatedCount = res.updated ?? 0;
+  const updatedIds = getUuidArray(updatedCount);
+
+  const failures = res.failures ?? [];
+  const failureCount = failures.length;
+
+  const isLastRetry = options.retryCount === MAX_RETRY_COUNT;
+
+  const versionConflictCount = res.version_conflicts ?? 0;
+  const versionConflictIds = isLastRetry ? getUuidArray(versionConflictCount) : [];
+
+  // creating an action doc so that update tags  shows up in activity
+  // the logic only saves agent count in the action that updated, failed or in case of last retry, conflicted
+  // this ensures that the action status count will be accurate
+  await createAgentAction(esClient, {
+    id: actionId,
+    agents: updatedIds
+      .concat(failures.map((failure) => failure.id))
+      .concat(isLastRetry ? versionConflictIds : []),
+    created_at: new Date().toISOString(),
+    type: 'UPDATE_TAGS',
+    total: options.total ?? res.total,
+  });
+  appContextService
+    .getLogger()
+    .debug(
+      `action doc wrote on ${
+        updatedCount + failureCount + (isLastRetry ? versionConflictCount : 0)
+      } agentIds, updated: ${updatedCount}, failed: ${failureCount}, version_conflicts: ${versionConflictCount}`
+    );
+
  // writing successful action results
-  if (res.updated ?? 0 > 0) {
+  if (updatedCount > 0) {
    await bulkCreateAgentActionResults(
      esClient,
-      agentIds.map((id) => ({
+      updatedIds.map((id) => ({
        agentId: id,
        actionId,
      }))
    );
+    appContextService.getLogger().debug(`action updated result wrote on ${updatedCount} agents`);
  }

  // writing failures from es update
-  if (res.failures && res.failures.length > 0) {
+  if (failures.length > 0) {
    await bulkCreateAgentActionResults(
      esClient,
-      res.failures.map((failure) => ({
+      failures.map((failure) => ({
        agentId: failure.id,
        actionId,
        error: failure.cause.reason,
      }))
    );
+    appContextService.getLogger().debug(`action failed result wrote on ${failureCount} agents`);
  }

-  if (res.version_conflicts ?? 0 > 0) {
+  if (versionConflictCount > 0) {
    // write out error results on last retry, so action is not stuck in progress
    if (options.retryCount === MAX_RETRY_COUNT) {
      await bulkCreateAgentActionResults(
        esClient,
-        getUuidArray(res.version_conflicts!).map((id) => ({
+        versionConflictIds.map((id) => ({
          agentId: id,
          actionId,
          error: 'version conflict on last retry',
        }))
      );
+      appContextService
+        .getLogger()
+        .debug(`action conflict result wrote on ${versionConflictCount} agents`);
    }
-    throw new Error(`version conflict of ${res.version_conflicts} agents`);
+    throw new Error(`version conflict of ${versionConflictCount} agents`);
  }

  return { actionId, updated: res.updated, took: res.took };