[Fleet] Enforce 10 min cooldown for agent upgrade (#168606)

## Summary

Closes https://github.com/elastic/kibana/issues/168233

This PR adds a check based on the `agent.upgraded_at` field and the time
a request to upgrade the issue. If the request is issued sooner than 10
minutes after the last upgrade, it is rejected, even if `force: true` is
passed:
- `POST agents/{agentId}/upgrade` will fail with 400
- agents included in `POST agents/bulk_upgrade` will not be upgraded

### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios

---------

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
Co-authored-by: Kyle Pollich <kyle.pollich@elastic.co>
This commit is contained in:
Jill Guyonnet 2023-10-18 20:34:33 +02:00 committed by GitHub
parent 2a5c5db783
commit 4fffedd4bb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 335 additions and 20 deletions

View file

@ -18,7 +18,7 @@ export { isPackageLimited, doesAgentPolicyAlreadyIncludePackage } from './limite
export { isValidNamespace, INVALID_NAMESPACE_CHARACTERS } from './is_valid_namespace';
export { isDiffPathProtocol } from './is_diff_path_protocol';
export { LicenseService } from './license';
export { isAgentUpgradeable } from './is_agent_upgradeable';
export * from './is_agent_upgradeable';
export {
isAgentRequestDiagnosticsSupported,
MINIMUM_DIAGNOSTICS_AGENT_VERSION,

View file

@ -7,7 +7,7 @@
import type { Agent } from '../types/models/agent';
import { isAgentUpgradeable } from './is_agent_upgradeable';
import { getRecentUpgradeInfoForAgent, isAgentUpgradeable } from './is_agent_upgradeable';
const getAgent = ({
version,
@ -15,14 +15,14 @@ const getAgent = ({
unenrolling = false,
unenrolled = false,
updating = false,
upgraded = false,
minutesSinceUpgrade,
}: {
version: string;
upgradeable?: boolean;
unenrolling?: boolean;
unenrolled?: boolean;
updating?: boolean;
upgraded?: boolean;
minutesSinceUpgrade?: number;
}): Agent => {
const agent: Agent = {
id: 'de9006e1-54a7-4320-b24e-927e6fe518a8',
@ -101,8 +101,8 @@ const getAgent = ({
if (updating) {
agent.upgrade_started_at = new Date(Date.now()).toISOString();
}
if (upgraded) {
agent.upgraded_at = new Date(Date.now()).toISOString();
if (minutesSinceUpgrade) {
agent.upgraded_at = new Date(Date.now() - minutesSinceUpgrade * 6e4).toISOString();
}
return agent;
};
@ -176,9 +176,42 @@ describe('Fleet - isAgentUpgradeable', () => {
isAgentUpgradeable(getAgent({ version: '7.9.0', upgradeable: true, updating: true }), '8.0.0')
).toBe(false);
});
it('returns true if agent was recently upgraded', () => {
it('returns false if the agent reports upgradeable but was upgraded less than 10 minutes ago', () => {
expect(
isAgentUpgradeable(getAgent({ version: '7.9.0', upgradeable: true, upgraded: true }), '8.0.0')
isAgentUpgradeable(
getAgent({ version: '7.9.0', upgradeable: true, minutesSinceUpgrade: 9 }),
'8.0.0'
)
).toBe(false);
});
it('returns true if agent reports upgradeable and was upgraded more than 10 minutes ago', () => {
expect(
isAgentUpgradeable(
getAgent({ version: '7.9.0', upgradeable: true, minutesSinceUpgrade: 11 }),
'8.0.0'
)
).toBe(true);
});
});
describe('hasAgentBeenUpgradedRecently', () => {
it('returns true if the agent was upgraded less than 10 minutes ago', () => {
expect(
getRecentUpgradeInfoForAgent(getAgent({ version: '7.9.0', minutesSinceUpgrade: 9 }))
.hasBeenUpgradedRecently
).toBe(true);
});
it('returns false if the agent was upgraded more than 10 minutes ago', () => {
expect(
getRecentUpgradeInfoForAgent(getAgent({ version: '7.9.0', minutesSinceUpgrade: 11 }))
.hasBeenUpgradedRecently
).toBe(false);
});
it('returns false if the agent does not have an upgrade_at field', () => {
expect(
getRecentUpgradeInfoForAgent(getAgent({ version: '7.9.0' })).hasBeenUpgradedRecently
).toBe(false);
});
});

View file

@ -11,6 +11,8 @@ import semverGt from 'semver/functions/gt';
import type { Agent } from '../types';
export const AGENT_UPGRADE_COOLDOWN_IN_MIN = 10;
export function isAgentUpgradeable(
agent: Agent,
latestAgentVersion: string,
@ -32,6 +34,10 @@ export function isAgentUpgradeable(
if (agent.upgrade_started_at && !agent.upgraded_at) {
return false;
}
// check that the agent has not been upgraded more recently than the monitoring period
if (getRecentUpgradeInfoForAgent(agent).hasBeenUpgradedRecently) {
return false;
}
if (versionToUpgrade !== undefined) {
return isNotDowngrade(agentVersion, versionToUpgrade);
}
@ -56,3 +62,21 @@ const isNotDowngrade = (agentVersion: string, versionToUpgrade: string) => {
return semverGt(versionToUpgradeNumber, agentVersionNumber);
};
export function getRecentUpgradeInfoForAgent(agent: Agent): {
hasBeenUpgradedRecently: boolean;
timeToWaitMs: number;
} {
if (!agent.upgraded_at) {
return {
hasBeenUpgradedRecently: false,
timeToWaitMs: 0,
};
}
const elaspedSinceUpgradeInMillis = Date.now() - Date.parse(agent.upgraded_at);
const timeToWaitMs = AGENT_UPGRADE_COOLDOWN_IN_MIN * 6e4 - elaspedSinceUpgradeInMillis;
const hasBeenUpgradedRecently = elaspedSinceUpgradeInMillis / 6e4 < AGENT_UPGRADE_COOLDOWN_IN_MIN;
return { hasBeenUpgradedRecently, timeToWaitMs };
}

View file

@ -27,6 +27,8 @@ import type { EuiComboBoxOptionOption } from '@elastic/eui';
import semverGt from 'semver/functions/gt';
import semverLt from 'semver/functions/lt';
import { AGENT_UPGRADE_COOLDOWN_IN_MIN } from '../../../../../../../common/services';
import { getMinVersion } from '../../../../../../../common/services/get_min_max_version';
import {
AGENT_UPDATING_TIMEOUT_HOURS,
@ -361,14 +363,32 @@ export const AgentUpgradeAgentModal: React.FunctionComponent<AgentUpgradeAgentMo
defaultMessage="No selected agents are eligible for an upgrade. Please select one or more eligible agents."
/>
) : isSingleAgent ? (
<FormattedMessage
id="xpack.fleet.upgradeAgents.upgradeSingleDescription"
defaultMessage="This action will upgrade the agent running on '{hostName}' to version {version}. This action can not be undone. Are you sure you wish to continue?"
values={{
hostName: ((agents[0] as Agent).local_metadata.host as any).hostname,
version: getVersion(selectedVersion),
}}
/>
<>
<p>
<FormattedMessage
id="xpack.fleet.upgradeAgents.upgradeSingleDescription"
defaultMessage="This action will upgrade the agent running on '{hostName}' to version {version}. This action can not be undone. Are you sure you wish to continue?"
values={{
hostName: ((agents[0] as Agent).local_metadata.host as any).hostname,
version: getVersion(selectedVersion),
}}
/>
</p>
{isUpdating && (
<p>
<em>
<FormattedMessage
id="xpack.fleet.upgradeAgents.upgradeSingleTimeout"
// TODO: Add link to docs regarding agent upgrade cooldowns
defaultMessage="Note that you may only restart an upgrade every {minutes} minutes to ensure that the upgrade will not be rolled back."
values={{
minutes: AGENT_UPGRADE_COOLDOWN_IN_MIN,
}}
/>
</em>
</p>
)}
</>
) : (
<FormattedMessage
id="xpack.fleet.upgradeAgents.upgradeMultipleDescription"

View file

@ -13,12 +13,18 @@ import semverGt from 'semver/functions/gt';
import semverMajor from 'semver/functions/major';
import semverMinor from 'semver/functions/minor';
import moment from 'moment';
import type { PostAgentUpgradeResponse } from '../../../common/types';
import type { PostAgentUpgradeRequestSchema, PostBulkAgentUpgradeRequestSchema } from '../../types';
import * as AgentService from '../../services/agents';
import { appContextService } from '../../services';
import { defaultFleetErrorHandler } from '../../errors';
import { isAgentUpgradeable } from '../../../common/services';
import {
getRecentUpgradeInfoForAgent,
isAgentUpgradeable,
AGENT_UPGRADE_COOLDOWN_IN_MIN,
} from '../../../common/services';
import { getMaxVersion } from '../../../common/services/get_min_max_version';
import { getAgentById } from '../../services/agents';
import type { Agent } from '../../types';
@ -67,6 +73,24 @@ export const postAgentUpgradeHandler: RequestHandler<
}
}
const { hasBeenUpgradedRecently, timeToWaitMs } = getRecentUpgradeInfoForAgent(agent);
const timeToWaitString = moment
.utc(moment.duration(timeToWaitMs).asMilliseconds())
.format('mm[m]ss[s]');
if (hasBeenUpgradedRecently) {
return response.customError({
statusCode: 429,
body: {
message: `agent ${request.params.agentId} was upgraded less than ${AGENT_UPGRADE_COOLDOWN_IN_MIN} minutes ago. Please wait ${timeToWaitString} before trying again to ensure the upgrade will not be rolled back.`,
},
headers: {
// retry-after expects seconds
'retry-after': Math.ceil(timeToWaitMs / 1000).toString(),
},
});
}
if (agent.unenrollment_started_at || agent.unenrolled_at) {
return response.customError({
statusCode: 400,

View file

@ -46,6 +46,7 @@ describe('sendUpgradeAgentsActions (plural)', () => {
const docs = (calledWith as estypes.BulkRequest)?.body
?.filter((i: any) => i.doc)
.map((i: any) => i.doc);
expect(ids).toEqual(idsToAction);
for (const doc of docs!) {
expect(doc).toHaveProperty('upgrade_started_at');

View file

@ -10,7 +10,7 @@ import type { SavedObjectsClientContract, ElasticsearchClient } from '@kbn/core/
import { v4 as uuidv4 } from 'uuid';
import moment from 'moment';
import { isAgentUpgradeable } from '../../../common/services';
import { getRecentUpgradeInfoForAgent, isAgentUpgradeable } from '../../../common/services';
import type { Agent } from '../../types';
@ -76,9 +76,10 @@ export async function upgradeBatch(
const latestAgentVersion = await getLatestAvailableVersion();
const upgradeableResults = await Promise.allSettled(
agentsToCheckUpgradeable.map(async (agent) => {
// Filter out agents currently unenrolling, unenrolled, or not upgradeable b/c of version check
// Filter out agents currently unenrolling, unenrolled, recently upgraded or not upgradeable b/c of version check
const isNotAllowed =
!options.force && !isAgentUpgradeable(agent, latestAgentVersion, options.version);
getRecentUpgradeInfoForAgent(agent).hasBeenUpgradedRecently ||
(!options.force && !isAgentUpgradeable(agent, latestAgentVersion, options.version));
if (isNotAllowed) {
throw new FleetError(`Agent ${agent.id} is not upgradeable`);
}

View file

@ -147,6 +147,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(400);
});
it('should respond 200 if upgrading agent with version the same as snapshot version and force flag is passed', async () => {
const fleetServerVersionSnapshot = makeSnapshotVersion(fleetServerVersion);
await es.update({
@ -170,6 +171,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(200);
});
it('should respond 200 if upgrading agent with version less than kibana snapshot version', async () => {
const fleetServerVersionSnapshot = makeSnapshotVersion(fleetServerVersion);
@ -191,6 +193,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(200);
});
it('should respond 200 if trying to upgrade with source_uri set', async () => {
await es.update({
id: 'agent1',
@ -219,6 +222,7 @@ export default function (providerContext: FtrProviderContext) {
const action: any = actionsRes.hits.hits[0]._source;
expect(action.data.sourceURI).contain('http://path/to/download');
});
it('should respond 400 if trying to upgrade to a version that does not match installed kibana version', async () => {
const kibanaVersion = await kibanaServer.version.get();
const higherVersion = semver.inc(kibanaVersion, 'patch');
@ -230,6 +234,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(400);
});
it('should respond 400 if trying to downgrade version', async () => {
await es.update({
id: 'agent1',
@ -249,6 +254,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(400);
});
it('should respond 400 if trying to upgrade an agent that is unenrolling', async () => {
await supertest.post(`/api/fleet/agents/agent1/unenroll`).set('kbn-xsrf', 'xxx').send({
revoke: true,
@ -261,6 +267,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(400);
});
it('should respond 400 if trying to upgrade an agent that is unenrolled', async () => {
await es.update({
id: 'agent1',
@ -344,6 +351,98 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(403);
});
it('should respond 429 if trying to upgrade a recently upgraded agent', async () => {
await es.update({
id: 'agent1',
refresh: 'wait_for',
index: AGENTS_INDEX,
body: {
doc: {
upgraded_at: new Date(Date.now() - 9 * 6e4).toISOString(),
local_metadata: {
elastic: {
agent: {
upgradeable: true,
version: '0.0.0',
},
},
},
},
},
});
const response = await supertest
.post(`/api/fleet/agents/agent1/upgrade`)
.set('kbn-xsrf', 'xxx')
.send({
version: fleetServerVersion,
})
.expect(429);
expect(response.body.message).to.contain('was upgraded less than 10 minutes ago');
// We don't know how long this test will take to run, so we can't really assert on the actual elapsed time here
expect(response.body.message).to.match(/please wait \d{2}m\d{2}s/i);
expect(response.header['retry-after']).to.match(/^\d+$/);
});
it('should respond 429 if trying to upgrade a recently upgraded agent with force flag', async () => {
await es.update({
id: 'agent1',
refresh: 'wait_for',
index: AGENTS_INDEX,
body: {
doc: {
upgraded_at: new Date(Date.now() - 9 * 6e4).toISOString(),
local_metadata: {
elastic: {
agent: {
upgradeable: true,
version: '0.0.0',
},
},
},
},
},
});
await supertest
.post(`/api/fleet/agents/agent1/upgrade`)
.set('kbn-xsrf', 'xxx')
.send({
version: fleetServerVersion,
force: true,
})
.expect(429);
});
it('should respond 200 if trying to upgrade an agent that was upgraded more than 10 minutes ago', async () => {
await es.update({
id: 'agent1',
refresh: 'wait_for',
index: AGENTS_INDEX,
body: {
doc: {
local_metadata: {
elastic: {
agent: {
upgradeable: true,
upgraded_at: new Date(Date.now() - 11 * 6e4).toString(),
version: '0.0.0',
},
},
},
},
},
});
await supertest
.post(`/api/fleet/agents/agent1/upgrade`)
.set('kbn-xsrf', 'xxx')
.send({
version: fleetServerVersion,
})
.expect(200);
});
});
describe('multiple agents', () => {
@ -397,6 +496,7 @@ export default function (providerContext: FtrProviderContext) {
},
});
});
it('should respond 200 to bulk upgrade upgradeable agents and update the agent SOs', async () => {
await es.update({
id: 'agent1',
@ -483,6 +583,7 @@ export default function (providerContext: FtrProviderContext) {
expect(action.agents).contain('agent1');
expect(action.agents).contain('agent2');
});
it('should create a .fleet-actions document with the agents, version, and start_time if start_time passed', async () => {
await es.update({
id: 'agent1',
@ -675,6 +776,7 @@ export default function (providerContext: FtrProviderContext) {
expect(typeof agent1data.body.item.upgrade_started_at).to.be('undefined');
expect(typeof agent2data.body.item.upgrade_started_at).to.be('string');
});
it('should not upgrade an unenrolled agent during bulk_upgrade', async () => {
await es.update({
id: 'agent1',
@ -713,6 +815,7 @@ export default function (providerContext: FtrProviderContext) {
expect(typeof agent1data.body.item.upgrade_started_at).to.be('undefined');
expect(typeof agent2data.body.item.upgrade_started_at).to.be('string');
});
it('should not upgrade a non-upgradeable agent during bulk_upgrade', async () => {
const kibanaVersion = await kibanaServer.version.get();
await es.update({
@ -765,6 +868,112 @@ export default function (providerContext: FtrProviderContext) {
expect(typeof agent2data.body.item.upgrade_started_at).to.be('undefined');
expect(typeof agent3data.body.item.upgrade_started_at).to.be('undefined');
});
it('should not upgrade a recently upgraded agent during bulk_upgrade', async () => {
await es.update({
id: 'agent1',
refresh: 'wait_for',
index: AGENTS_INDEX,
body: {
doc: {
upgraded_at: new Date(Date.now() - 11 * 6e4).toISOString(),
local_metadata: {
elastic: {
agent: {
upgradeable: true,
version: '0.0.0',
},
},
},
},
},
});
await es.update({
id: 'agent2',
refresh: 'wait_for',
index: AGENTS_INDEX,
body: {
doc: {
upgraded_at: new Date(Date.now() - 9 * 6e4).toISOString(),
local_metadata: {
elastic: {
agent: {
upgradeable: true,
version: '0.0.0',
},
},
},
},
},
});
await supertest
.post(`/api/fleet/agents/bulk_upgrade`)
.set('kbn-xsrf', 'xxx')
.send({
agents: ['agent1', 'agent2'],
version: fleetServerVersion,
});
const [agent1data, agent2data] = await Promise.all([
supertest.get(`/api/fleet/agents/agent1`).set('kbn-xsrf', 'xxx'),
supertest.get(`/api/fleet/agents/agent2`).set('kbn-xsrf', 'xxx'),
]);
expect(typeof agent1data.body.item.upgrade_started_at).to.be('string');
expect(typeof agent2data.body.item.upgrade_started_at).to.be('undefined');
});
it('should not upgrade a recently upgraded agent during bulk_upgrade even with force flag', async () => {
await es.update({
id: 'agent1',
refresh: 'wait_for',
index: AGENTS_INDEX,
body: {
doc: {
upgraded_at: new Date(Date.now() - 11 * 6e4).toISOString(),
local_metadata: {
elastic: {
agent: {
upgradeable: true,
version: '0.0.0',
},
},
},
},
},
});
await es.update({
id: 'agent2',
refresh: 'wait_for',
index: AGENTS_INDEX,
body: {
doc: {
upgraded_at: new Date(Date.now() - 9 * 6e4).toISOString(),
local_metadata: {
elastic: {
agent: {
upgradeable: true,
version: '0.0.0',
},
},
},
},
},
});
await supertest
.post(`/api/fleet/agents/bulk_upgrade`)
.set('kbn-xsrf', 'xxx')
.send({
agents: ['agent1', 'agent2'],
version: fleetServerVersion,
force: true,
});
const [agent1data, agent2data] = await Promise.all([
supertest.get(`/api/fleet/agents/agent1`).set('kbn-xsrf', 'xxx'),
supertest.get(`/api/fleet/agents/agent2`).set('kbn-xsrf', 'xxx'),
]);
expect(typeof agent1data.body.item.upgrade_started_at).to.be('string');
expect(typeof agent2data.body.item.upgrade_started_at).to.be('undefined');
});
it('should upgrade a non upgradeable agent during bulk_upgrade with force flag', async () => {
await es.update({
id: 'agent1',
@ -817,6 +1026,7 @@ export default function (providerContext: FtrProviderContext) {
expect(typeof agent2data.body.item.upgrade_started_at).to.be('string');
expect(typeof agent3data.body.item.upgrade_started_at).to.be('string');
});
it('should respond 400 if trying to bulk upgrade to a version that is higher than the latest installed kibana version', async () => {
const kibanaVersion = await kibanaServer.version.get();
const higherVersion = semver.inc(kibanaVersion, 'patch');
@ -851,6 +1061,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(400);
});
it('should respond 400 if trying to bulk upgrade to a version that is higher than the latest fleet server version', async () => {
const higherVersion = semver.inc(fleetServerVersion, 'patch');
await es.update({
@ -884,6 +1095,7 @@ export default function (providerContext: FtrProviderContext) {
})
.expect(400);
});
it('should prevent any agent to downgrade', async () => {
await es.update({
id: 'agent1',