[8.x] [Fleet] Fix flaky agent status field (#211453) (#211764)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[Fleet] Fix flaky agent status field
(#211453)](https://github.com/elastic/kibana/pull/211453)

<!--- Backport version: 9.6.6 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sorenlouv/backport)

<!--BACKPORT [{"author":{"name":"Nicolas
Chaulet","email":"nicolas.chaulet@elastic.co"},"sourceCommit":{"committedDate":"2025-02-18T13:57:08Z","message":"[Fleet]
Fix flaky agent status field (#211453)\n\n## Summary\r\n\r\nResolve
https://github.com/elastic/kibana/issues/209008 \r\n\r\nIt seems with ES
> 9 that runtime field is sometimes failing, while I am\r\nnot sure why,
that PR make it more robust and should avoid
test\r\nflakyness.\r\n\r\n---------\r\n\r\nCo-authored-by: Julia Bardi
<90178898+juliaElastic@users.noreply.github.com>\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>","sha":"e6709dd78f0243c1defcfac4383dcfd2d6438368","branchLabelMapping":{"^v9.1.0$":"main","^v8.19.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","Team:Fleet","backport:prev-minor","v9.1.0","backport:8.18"],"title":"[Fleet]
Fix flaky agent status
field","number":211453,"url":"https://github.com/elastic/kibana/pull/211453","mergeCommit":{"message":"[Fleet]
Fix flaky agent status field (#211453)\n\n## Summary\r\n\r\nResolve
https://github.com/elastic/kibana/issues/209008 \r\n\r\nIt seems with ES
> 9 that runtime field is sometimes failing, while I am\r\nnot sure why,
that PR make it more robust and should avoid
test\r\nflakyness.\r\n\r\n---------\r\n\r\nCo-authored-by: Julia Bardi
<90178898+juliaElastic@users.noreply.github.com>\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>","sha":"e6709dd78f0243c1defcfac4383dcfd2d6438368"}},"sourceBranch":"main","suggestedTargetBranches":[],"targetPullRequestStates":[{"branch":"main","label":"v9.1.0","branchLabelMappingKey":"^v9.1.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/211453","number":211453,"mergeCommit":{"message":"[Fleet]
Fix flaky agent status field (#211453)\n\n## Summary\r\n\r\nResolve
https://github.com/elastic/kibana/issues/209008 \r\n\r\nIt seems with ES
> 9 that runtime field is sometimes failing, while I am\r\nnot sure why,
that PR make it more robust and should avoid
test\r\nflakyness.\r\n\r\n---------\r\n\r\nCo-authored-by: Julia Bardi
<90178898+juliaElastic@users.noreply.github.com>\r\nCo-authored-by:
Elastic Machine
<elasticmachine@users.noreply.github.com>","sha":"e6709dd78f0243c1defcfac4383dcfd2d6438368"}}]}]
BACKPORT-->

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
This commit is contained in:
Nicolas Chaulet 2025-02-20 09:35:55 -05:00 committed by GitHub
parent 0dfefe59de
commit 88aed407ff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 17 additions and 5 deletions

View file

@ -56,7 +56,7 @@ describe('buildStatusRuntimeField', () => {
"status": Object {
"script": Object {
"lang": "painless",
"source": " long lastCheckinMillis = doc['last_checkin'].size() > 0 ? doc['last_checkin'].value.toInstant().toEpochMilli() : ( doc['enrolled_at'].size() > 0 ? doc['enrolled_at'].value.toInstant().toEpochMilli() : -1 ); if (doc['active'].size() > 0 && doc['active'].value == false) { emit('unenrolled'); } else if (lastCheckinMillis > 0 && doc['policy_id'].size() > 0 && ['policy-1'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567590123L) {emit('inactive');} else if ( lastCheckinMillis > 0 && lastCheckinMillis < 1234567590123L ) { emit('offline'); } else if ( doc['policy_revision_idx'].size() == 0 || ( doc['upgrade_started_at'].size() > 0 && doc['upgraded_at'].size() == 0 ) ) { emit('updating'); } else if (doc['last_checkin'].size() == 0) { emit('enrolling'); } else if (doc['unenrollment_started_at'].size() > 0) { emit('unenrolling'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'error' ) { emit('error'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'degraded' ) { emit('degraded'); } else { emit('online'); }",
"source": " long lastCheckinMillis = doc['last_checkin'].size() > 0 ? doc['last_checkin'].value.toInstant().toEpochMilli() : ( doc['enrolled_at'].size() > 0 ? doc['enrolled_at'].value.toInstant().toEpochMilli() : -1 ); if (doc['active'].size() > 0 && doc['active'].value == false) { emit('unenrolled'); } else if (lastCheckinMillis > 0 && doc.containsKey('policy_id') && doc['policy_id'].size() > 0 && ['policy-1'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567590123L) {emit('inactive');} else if ( lastCheckinMillis > 0 && lastCheckinMillis < 1234567590123L ) { emit('offline'); } else if ( doc['policy_revision_idx'].size() == 0 || ( doc['upgrade_started_at'].size() > 0 && doc['upgraded_at'].size() == 0 ) ) { emit('updating'); } else if (doc['last_checkin'].size() == 0) { emit('enrolling'); } else if (doc['unenrollment_started_at'].size() > 0) { emit('unenrolling'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'error' ) { emit('error'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'degraded' ) { emit('degraded'); } else { emit('online'); }",
},
"type": "keyword",
},
@ -76,7 +76,7 @@ describe('buildStatusRuntimeField', () => {
"status": Object {
"script": Object {
"lang": "painless",
"source": " long lastCheckinMillis = doc['last_checkin'].size() > 0 ? doc['last_checkin'].value.toInstant().toEpochMilli() : ( doc['enrolled_at'].size() > 0 ? doc['enrolled_at'].value.toInstant().toEpochMilli() : -1 ); if (doc['active'].size() > 0 && doc['active'].value == false) { emit('unenrolled'); } else if (lastCheckinMillis > 0 && doc['policy_id'].size() > 0 && ['policy-1','policy-2'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567590123L) {emit('inactive');} else if ( lastCheckinMillis > 0 && lastCheckinMillis < 1234567590123L ) { emit('offline'); } else if ( doc['policy_revision_idx'].size() == 0 || ( doc['upgrade_started_at'].size() > 0 && doc['upgraded_at'].size() == 0 ) ) { emit('updating'); } else if (doc['last_checkin'].size() == 0) { emit('enrolling'); } else if (doc['unenrollment_started_at'].size() > 0) { emit('unenrolling'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'error' ) { emit('error'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'degraded' ) { emit('degraded'); } else { emit('online'); }",
"source": " long lastCheckinMillis = doc['last_checkin'].size() > 0 ? doc['last_checkin'].value.toInstant().toEpochMilli() : ( doc['enrolled_at'].size() > 0 ? doc['enrolled_at'].value.toInstant().toEpochMilli() : -1 ); if (doc['active'].size() > 0 && doc['active'].value == false) { emit('unenrolled'); } else if (lastCheckinMillis > 0 && doc.containsKey('policy_id') && doc['policy_id'].size() > 0 && ['policy-1','policy-2'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567590123L) {emit('inactive');} else if ( lastCheckinMillis > 0 && lastCheckinMillis < 1234567590123L ) { emit('offline'); } else if ( doc['policy_revision_idx'].size() == 0 || ( doc['upgrade_started_at'].size() > 0 && doc['upgraded_at'].size() == 0 ) ) { emit('updating'); } else if (doc['last_checkin'].size() == 0) { emit('enrolling'); } else if (doc['unenrollment_started_at'].size() > 0) { emit('unenrolling'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'error' ) { emit('error'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'degraded' ) { emit('degraded'); } else { emit('online'); }",
},
"type": "keyword",
},
@ -124,7 +124,7 @@ describe('buildStatusRuntimeField', () => {
"status": Object {
"script": Object {
"lang": "painless",
"source": " long lastCheckinMillis = doc['last_checkin'].size() > 0 ? doc['last_checkin'].value.toInstant().toEpochMilli() : ( doc['enrolled_at'].size() > 0 ? doc['enrolled_at'].value.toInstant().toEpochMilli() : -1 ); if (doc['active'].size() > 0 && doc['active'].value == false) { emit('unenrolled'); } else if (lastCheckinMillis > 0 && doc['policy_id'].size() > 0 && ['policy-1','policy-2'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567590123L || ['policy-3'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567490123L) {emit('inactive');} else if ( lastCheckinMillis > 0 && lastCheckinMillis < 1234567590123L ) { emit('offline'); } else if ( doc['policy_revision_idx'].size() == 0 || ( doc['upgrade_started_at'].size() > 0 && doc['upgraded_at'].size() == 0 ) ) { emit('updating'); } else if (doc['last_checkin'].size() == 0) { emit('enrolling'); } else if (doc['unenrollment_started_at'].size() > 0) { emit('unenrolling'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'error' ) { emit('error'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'degraded' ) { emit('degraded'); } else { emit('online'); }",
"source": " long lastCheckinMillis = doc['last_checkin'].size() > 0 ? doc['last_checkin'].value.toInstant().toEpochMilli() : ( doc['enrolled_at'].size() > 0 ? doc['enrolled_at'].value.toInstant().toEpochMilli() : -1 ); if (doc['active'].size() > 0 && doc['active'].value == false) { emit('unenrolled'); } else if (lastCheckinMillis > 0 && doc.containsKey('policy_id') && doc['policy_id'].size() > 0 && ['policy-1','policy-2'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567590123L || ['policy-3'].contains(doc['policy_id'].value) && lastCheckinMillis < 1234567490123L) {emit('inactive');} else if ( lastCheckinMillis > 0 && lastCheckinMillis < 1234567590123L ) { emit('offline'); } else if ( doc['policy_revision_idx'].size() == 0 || ( doc['upgrade_started_at'].size() > 0 && doc['upgraded_at'].size() == 0 ) ) { emit('updating'); } else if (doc['last_checkin'].size() == 0) { emit('enrolling'); } else if (doc['unenrollment_started_at'].size() > 0) { emit('unenrolling'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'error' ) { emit('error'); } else if ( doc['last_checkin_status'].size() > 0 && doc['last_checkin_status'].value.toLowerCase() == 'degraded' ) { emit('degraded'); } else { emit('online'); }",
},
"type": "keyword",
},

View file

@ -26,9 +26,17 @@ const _buildInactiveCondition = (opts: {
inactivityTimeouts: InactivityTimeouts;
maxAgentPoliciesWithInactivityTimeout: number;
field: (path: string) => string;
fieldPath: (path: string) => string;
logger?: Logger;
}): string | null => {
const { now, inactivityTimeouts, maxAgentPoliciesWithInactivityTimeout, field, logger } = opts;
const {
now,
inactivityTimeouts,
maxAgentPoliciesWithInactivityTimeout,
field,
fieldPath,
logger,
} = opts;
// if there are no policies with inactivity timeouts, then no agents are inactive
if (inactivityTimeouts.length === 0) {
return null;
@ -70,7 +78,9 @@ const _buildInactiveCondition = (opts: {
})
.join(' || ');
return `lastCheckinMillis > 0 && ${field('policy_id')}.size() > 0 && ${policyClauses}`;
return `lastCheckinMillis > 0 && doc.containsKey(${fieldPath('policy_id')}) && ${field(
'policy_id'
)}.size() > 0 && ${policyClauses}`;
};
function _buildSource(
@ -81,12 +91,14 @@ function _buildSource(
) {
const normalizedPrefix = pathPrefix ? `${pathPrefix}${pathPrefix.endsWith('.') ? '' : '.'}` : '';
const field = (path: string) => `doc['${normalizedPrefix + path}']`;
const fieldPath = (path: string) => `'${normalizedPrefix + path}'`;
const now = Date.now();
const agentIsInactiveCondition = _buildInactiveCondition({
now,
inactivityTimeouts,
maxAgentPoliciesWithInactivityTimeout,
field,
fieldPath,
logger,
});