mirror of
https://github.com/elastic/kibana.git
synced 2025-04-24 09:48:58 -04:00
[Session cleanup] Update session index cleanup to check for missing shards (#205744)
Closes https://github.com/elastic/kibana/issues/205146 ## Summary We run a session clean up task that opens a point in time query to try and delete any older sessions in the session index. We've noticed that this task fails quite often with the same error `no_shard_available_action_exception`. On investigating, it's possible that the point in time query is opened when there are no shards available for that index. This PR fixes that by checking if the PIT query fails with 503 bails if it throws the error - allowing the task to be tried again in the next run of the task manager. We allow for up to 10 failures of the clean up task in succession before logging an error. ### Testing Unfortunately, there's no reliable way to simulate missing shards locally. I've added a new integration test config here: ``` node scripts/functional_tests_server.js --config x-pack/test/security_api_integration/session_shard_missing.config.ts ``` This overrides the ES function to return 503 when opening PIT query and then attempts to assert the result from the task manager. ### Release note Updates session cleanup mechanism to account for potential missing shards in Session index. ### Checklist Check the PR satisfies following conditions. Reviewers should verify this PR satisfies this list as well. - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [x] [Flaky Test Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was used on any tests changed [kibana-flaky-test-suite-runner#7836](https://buildkite.com/elastic/kibana-flaky-test-suite-runner/builds/7836) - [ ] The PR description includes the appropriate Release Notes section, and the correct `release_note:*` label is applied per the [guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process) --------- Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com> Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
parent
2dc3eeb443
commit
e0886ba73f
9 changed files with 455 additions and 39 deletions
|
@ -328,6 +328,7 @@ enabled:
|
|||
- x-pack/test/security_api_integration/chips.config.ts
|
||||
- x-pack/test/security_api_integration/features.config.ts
|
||||
- x-pack/test/security_api_integration/session_idle.config.ts
|
||||
- x-pack/test/security_api_integration/session_shard_missing.config.ts
|
||||
- x-pack/test/security_api_integration/session_invalidate.config.ts
|
||||
- x-pack/test/security_api_integration/session_lifespan.config.ts
|
||||
- x-pack/test/security_api_integration/session_concurrent_limit.config.ts
|
||||
|
|
|
@ -17,6 +17,7 @@ import type {
|
|||
|
||||
import { elasticsearchServiceMock, loggingSystemMock } from '@kbn/core/server/mocks';
|
||||
import type { AuditLogger } from '@kbn/security-plugin-types-server';
|
||||
import { type RunContext, TaskStatus } from '@kbn/task-manager-plugin/server';
|
||||
|
||||
import {
|
||||
getSessionIndexSettings,
|
||||
|
@ -39,6 +40,22 @@ describe('Session index', () => {
|
|||
const aliasName = '.kibana_some_tenant_security_session';
|
||||
const indexTemplateName = '.kibana_some_tenant_security_session_index_template_1';
|
||||
|
||||
const mockRunContext: RunContext = {
|
||||
taskInstance: {
|
||||
id: 'TASK_ID',
|
||||
taskType: 'TASK_TYPE',
|
||||
params: {},
|
||||
state: {},
|
||||
scheduledAt: new Date(),
|
||||
attempts: 0,
|
||||
retryAt: new Date(),
|
||||
ownerId: 'OWNER_ID',
|
||||
startedAt: new Date(),
|
||||
runAt: new Date(),
|
||||
status: TaskStatus.Idle,
|
||||
},
|
||||
};
|
||||
|
||||
const createSessionIndexOptions = (
|
||||
config: Record<string, any> = { session: { idleTimeout: null, lifespan: null } }
|
||||
) => ({
|
||||
|
@ -428,6 +445,7 @@ describe('Session index', () => {
|
|||
_source: { usernameHash: 'USERNAME_HASH', provider: { name: 'basic1', type: 'basic' } },
|
||||
sort: [0],
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
mockElasticsearchClient.openPointInTime.mockResponse({
|
||||
id: 'PIT_ID',
|
||||
|
@ -449,7 +467,7 @@ describe('Session index', () => {
|
|||
);
|
||||
mockElasticsearchClient.search.mockRejectedValue(failureReason);
|
||||
|
||||
await expect(sessionIndex.cleanUp()).rejects.toBe(failureReason);
|
||||
await expect(sessionIndex.cleanUp(mockRunContext)).rejects.toBe(failureReason);
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.bulk).not.toHaveBeenCalled();
|
||||
|
@ -463,7 +481,7 @@ describe('Session index', () => {
|
|||
);
|
||||
mockElasticsearchClient.bulk.mockRejectedValue(failureReason);
|
||||
|
||||
await expect(sessionIndex.cleanUp()).rejects.toBe(failureReason);
|
||||
await expect(sessionIndex.cleanUp(mockRunContext)).rejects.toBe(failureReason);
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.bulk).toHaveBeenCalledTimes(1);
|
||||
|
@ -476,7 +494,7 @@ describe('Session index', () => {
|
|||
);
|
||||
mockElasticsearchClient.indices.refresh.mockRejectedValue(failureReason);
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.bulk).toHaveBeenCalledTimes(1);
|
||||
|
@ -500,7 +518,7 @@ describe('Session index', () => {
|
|||
};
|
||||
});
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(2);
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenNthCalledWith(
|
||||
1,
|
||||
|
@ -529,7 +547,7 @@ describe('Session index', () => {
|
|||
});
|
||||
|
||||
it('when neither `lifespan` nor `idleTimeout` is configured', async () => {
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
|
@ -611,7 +629,7 @@ describe('Session index', () => {
|
|||
auditLogger,
|
||||
});
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
|
@ -705,7 +723,7 @@ describe('Session index', () => {
|
|||
auditLogger,
|
||||
});
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
|
@ -793,7 +811,7 @@ describe('Session index', () => {
|
|||
auditLogger,
|
||||
});
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
|
@ -906,7 +924,7 @@ describe('Session index', () => {
|
|||
auditLogger,
|
||||
});
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(1);
|
||||
|
@ -1030,7 +1048,7 @@ describe('Session index', () => {
|
|||
} as SearchResponse);
|
||||
}
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(2);
|
||||
|
@ -1044,7 +1062,7 @@ describe('Session index', () => {
|
|||
hits: { hits: new Array(10_000).fill(sessionValue, 0) },
|
||||
} as SearchResponse);
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.openPointInTime).toHaveBeenCalledTimes(1);
|
||||
expect(mockElasticsearchClient.search).toHaveBeenCalledTimes(10);
|
||||
|
@ -1054,7 +1072,7 @@ describe('Session index', () => {
|
|||
});
|
||||
|
||||
it('should log audit event', async () => {
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(auditLogger.log).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
|
@ -1063,6 +1081,54 @@ describe('Session index', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('should fail silently if shards are missing', async () => {
|
||||
const failureReason = new errors.ResponseError({
|
||||
statusCode: 503,
|
||||
body: {
|
||||
error: {
|
||||
type: 'search_phase_execution_exception Root causes: no_shard_available_action_exception',
|
||||
},
|
||||
},
|
||||
warnings: null,
|
||||
meta: {} as any,
|
||||
});
|
||||
|
||||
mockElasticsearchClient.openPointInTime.mockRejectedValue(failureReason);
|
||||
|
||||
const runResult = await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(runResult?.state).toBeTruthy();
|
||||
expect(runResult?.state.shardMissingCounter).toBe(1);
|
||||
});
|
||||
|
||||
it('should throw error if shards are missing for more than 10 tries', async () => {
|
||||
const failureReason = new errors.ResponseError({
|
||||
statusCode: 503,
|
||||
body: {
|
||||
error: {
|
||||
type: 'search_phase_execution_exception Root causes: no_shard_available_action_exception',
|
||||
},
|
||||
},
|
||||
warnings: null,
|
||||
meta: {} as any,
|
||||
});
|
||||
|
||||
mockElasticsearchClient.openPointInTime.mockRejectedValue(failureReason);
|
||||
|
||||
const runContext = {
|
||||
taskInstance: {
|
||||
...mockRunContext.taskInstance,
|
||||
state: { shardMissingCounter: 9 },
|
||||
},
|
||||
};
|
||||
|
||||
await expect(sessionIndex.cleanUp(runContext)).resolves.toEqual({
|
||||
error:
|
||||
'Failed to clean up sessions: Shards for session index are missing. Cleanup routine has failed 10 times. {"error":{"type":"search_phase_execution_exception Root causes: no_shard_available_action_exception"}}',
|
||||
state: { shardMissingCounter: 0 },
|
||||
});
|
||||
});
|
||||
|
||||
describe('concurrent session limit', () => {
|
||||
const expectedSearchParameters = () => ({
|
||||
index: '.kibana_some_tenant_security_session',
|
||||
|
@ -1137,7 +1203,7 @@ describe('Session index', () => {
|
|||
it('when concurrent session limit is not configured', async () => {
|
||||
sessionIndex = new SessionIndex(createSessionIndexOptions());
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
// Only search call for the invalid sessions (use `pit` as marker, since concurrent session limit cleanup
|
||||
// routine doesn't rely on PIT).
|
||||
|
@ -1155,7 +1221,7 @@ describe('Session index', () => {
|
|||
aggregations: { sessions_grouped_by_user: { sum_other_doc_count: 1 } },
|
||||
} as unknown as SearchResponse);
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
// Only search call for the invalid sessions (use `pit` as marker, since concurrent session limit cleanup
|
||||
// routine doesn't rely on PIT).
|
||||
|
@ -1182,7 +1248,7 @@ describe('Session index', () => {
|
|||
responses: [{ status: 200, hits: { hits: [{ _id: 'some-id' }, { _id: 'some-id-2' }] } }],
|
||||
} as MsearchMultiSearchResult);
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
// Only search call for the invalid sessions (use `pit` as marker, since concurrent session limit cleanup
|
||||
// routine doesn't rely on PIT).
|
||||
|
@ -1234,7 +1300,7 @@ describe('Session index', () => {
|
|||
],
|
||||
} as MsearchMultiSearchResult);
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
// Only search call for the invalid sessions (use `pit` as marker, since concurrent session limit cleanup
|
||||
// routine doesn't rely on PIT).
|
||||
|
@ -1296,7 +1362,7 @@ describe('Session index', () => {
|
|||
],
|
||||
} as MsearchMultiSearchResult);
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(mockElasticsearchClient.bulk).toHaveBeenCalledTimes(2);
|
||||
expect(mockElasticsearchClient.bulk).toHaveBeenNthCalledWith(
|
||||
|
@ -1347,7 +1413,7 @@ describe('Session index', () => {
|
|||
],
|
||||
} as MsearchMultiSearchResult);
|
||||
|
||||
await sessionIndex.cleanUp();
|
||||
await sessionIndex.cleanUp(mockRunContext);
|
||||
|
||||
expect(auditLogger.log).toHaveBeenCalledTimes(2);
|
||||
expect(auditLogger.log).toHaveBeenCalledWith(
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { errors } from '@elastic/elasticsearch';
|
||||
import type {
|
||||
AggregateName,
|
||||
AggregationsMultiTermsAggregate,
|
||||
|
@ -19,6 +20,7 @@ import semver from 'semver';
|
|||
|
||||
import type { ElasticsearchClient, Logger } from '@kbn/core/server';
|
||||
import type { AuditLogger } from '@kbn/security-plugin-types-server';
|
||||
import type { RunContext } from '@kbn/task-manager-plugin/server';
|
||||
|
||||
import type { AuthenticationProvider } from '../../common';
|
||||
import { sessionCleanupConcurrentLimitEvent, sessionCleanupEvent } from '../audit';
|
||||
|
@ -475,12 +477,15 @@ export class SessionIndex {
|
|||
/**
|
||||
* Trigger a removal of any outdated session values.
|
||||
*/
|
||||
async cleanUp() {
|
||||
async cleanUp(taskManagerRunContext: RunContext) {
|
||||
const { taskInstance } = taskManagerRunContext;
|
||||
const { auditLogger, logger } = this.options;
|
||||
logger.debug('Running cleanup routine.');
|
||||
|
||||
let error: Error | undefined;
|
||||
let indexNeedsRefresh = false;
|
||||
let shardMissingCounter = taskInstance.state?.shardMissingCounter ?? 0;
|
||||
|
||||
try {
|
||||
for await (const sessionValues of this.getSessionValuesInBatches()) {
|
||||
const operations = sessionValues.map(({ _id, _source }) => {
|
||||
|
@ -492,8 +497,37 @@ export class SessionIndex {
|
|||
indexNeedsRefresh = (await this.bulkDeleteSessions(operations)) || indexNeedsRefresh;
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error(`Failed to clean up sessions: ${err.message}`);
|
||||
error = err;
|
||||
if (
|
||||
err instanceof errors.ResponseError &&
|
||||
err.statusCode === 503 &&
|
||||
err.message.includes('no_shard_available_action_exception')
|
||||
) {
|
||||
shardMissingCounter++;
|
||||
if (shardMissingCounter < 10) {
|
||||
logger.warn(
|
||||
`No shards found for session index, skipping session cleanup. This operation has failed ${shardMissingCounter} time(s)`
|
||||
);
|
||||
return {
|
||||
state: {
|
||||
shardMissingCounter,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const errorMesage = `Failed to clean up sessions: Shards for session index are missing. Cleanup routine has failed ${shardMissingCounter} times. ${getDetailedErrorMessage(
|
||||
err
|
||||
)}`;
|
||||
logger.error(errorMesage);
|
||||
return {
|
||||
error: errorMesage,
|
||||
state: {
|
||||
shardMissingCounter: 0,
|
||||
},
|
||||
};
|
||||
} else {
|
||||
logger.error(`Failed to clean up sessions: ${err.message}`);
|
||||
error = err;
|
||||
}
|
||||
}
|
||||
|
||||
// Only refresh the index if we have actually deleted one or more sessions. The index will auto-refresh eventually anyway, this just
|
||||
|
@ -545,6 +579,11 @@ export class SessionIndex {
|
|||
}
|
||||
|
||||
logger.debug('Cleanup routine successfully completed.');
|
||||
return {
|
||||
state: {
|
||||
shardMissingCounter: 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -839,29 +878,30 @@ export class SessionIndex {
|
|||
});
|
||||
}
|
||||
|
||||
let { body: openPitResponse, statusCode } =
|
||||
await this.options.elasticsearchClient.openPointInTime(
|
||||
let response = await this.options.elasticsearchClient.openPointInTime(
|
||||
{
|
||||
index: this.aliasName,
|
||||
keep_alive: SESSION_INDEX_CLEANUP_KEEP_ALIVE,
|
||||
allow_partial_search_results: true,
|
||||
},
|
||||
{ ignore: [404], meta: true }
|
||||
);
|
||||
|
||||
if (response.statusCode === 404) {
|
||||
await this.ensureSessionIndexExists();
|
||||
response = await this.options.elasticsearchClient.openPointInTime(
|
||||
{
|
||||
index: this.aliasName,
|
||||
keep_alive: SESSION_INDEX_CLEANUP_KEEP_ALIVE,
|
||||
allow_partial_search_results: true,
|
||||
},
|
||||
{ ignore: [404], meta: true }
|
||||
{ meta: true }
|
||||
);
|
||||
|
||||
if (statusCode === 404) {
|
||||
await this.ensureSessionIndexExists();
|
||||
({ body: openPitResponse, statusCode } =
|
||||
await this.options.elasticsearchClient.openPointInTime(
|
||||
{
|
||||
index: this.aliasName,
|
||||
keep_alive: SESSION_INDEX_CLEANUP_KEEP_ALIVE,
|
||||
allow_partial_search_results: true,
|
||||
},
|
||||
{ meta: true }
|
||||
));
|
||||
} else if (response.statusCode === 503) {
|
||||
throw new errors.ResponseError(response);
|
||||
}
|
||||
|
||||
const openPitResponse = response.body;
|
||||
try {
|
||||
let searchAfter: SortResults | undefined;
|
||||
for (let i = 0; i < SESSION_INDEX_CLEANUP_BATCH_LIMIT; i++) {
|
||||
|
|
|
@ -30,7 +30,11 @@ const mockSessionIndexInitialize = jest.spyOn(SessionIndex.prototype, 'initializ
|
|||
mockSessionIndexInitialize.mockResolvedValue();
|
||||
|
||||
const mockSessionIndexCleanUp = jest.spyOn(SessionIndex.prototype, 'cleanUp');
|
||||
mockSessionIndexCleanUp.mockResolvedValue();
|
||||
mockSessionIndexCleanUp.mockResolvedValue({
|
||||
state: {
|
||||
shardMissingCounter: 0,
|
||||
},
|
||||
});
|
||||
|
||||
describe('SessionManagementService', () => {
|
||||
let service: SessionManagementService;
|
||||
|
|
|
@ -71,7 +71,7 @@ export class SessionManagementService {
|
|||
taskManager.registerTaskDefinitions({
|
||||
[SESSION_INDEX_CLEANUP_TASK_NAME]: {
|
||||
title: 'Cleanup expired or invalid user sessions',
|
||||
createTaskRunner: () => ({ run: () => this.sessionIndex.cleanUp() }),
|
||||
createTaskRunner: (context) => ({ run: () => this.sessionIndex.cleanUp(context) }),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { resolve } from 'path';
|
||||
|
||||
import type { FtrConfigProviderContext } from '@kbn/test';
|
||||
|
||||
import { services } from './services';
|
||||
|
||||
// the default export of config files must be a config provider
|
||||
// that returns an object with the projects config values
|
||||
export default async function ({ readConfigFile }: FtrConfigProviderContext) {
|
||||
const xPackAPITestsConfig = await readConfigFile(require.resolve('../api_integration/config.ts'));
|
||||
|
||||
const kibanaPort = xPackAPITestsConfig.get('servers.kibana.port');
|
||||
const idpPath = require.resolve('@kbn/security-api-integration-helpers/saml/idp_metadata.xml');
|
||||
|
||||
const testEndpointsPlugin = resolve(__dirname, '../security_functional/plugins/test_endpoints');
|
||||
|
||||
return {
|
||||
testFiles: [resolve(__dirname, './tests/session_shard_missing')],
|
||||
services,
|
||||
servers: xPackAPITestsConfig.get('servers'),
|
||||
esTestCluster: {
|
||||
...xPackAPITestsConfig.get('esTestCluster'),
|
||||
serverArgs: [
|
||||
...xPackAPITestsConfig.get('esTestCluster.serverArgs'),
|
||||
'xpack.security.authc.token.enabled=true',
|
||||
'xpack.security.authc.token.timeout=15s',
|
||||
'xpack.security.authc.realms.saml.saml1.order=0',
|
||||
`xpack.security.authc.realms.saml.saml1.idp.metadata.path=${idpPath}`,
|
||||
'xpack.security.authc.realms.saml.saml1.idp.entity_id=http://www.elastic.co/saml1',
|
||||
`xpack.security.authc.realms.saml.saml1.sp.entity_id=http://localhost:${kibanaPort}`,
|
||||
`xpack.security.authc.realms.saml.saml1.sp.logout=http://localhost:${kibanaPort}/logout`,
|
||||
`xpack.security.authc.realms.saml.saml1.sp.acs=http://localhost:${kibanaPort}/api/security/saml/callback`,
|
||||
'xpack.security.authc.realms.saml.saml1.attributes.principal=urn:oid:0.0.7',
|
||||
],
|
||||
},
|
||||
|
||||
kbnTestServer: {
|
||||
...xPackAPITestsConfig.get('kbnTestServer'),
|
||||
serverArgs: [
|
||||
...xPackAPITestsConfig.get('kbnTestServer.serverArgs'),
|
||||
`--plugin-path=${testEndpointsPlugin}`,
|
||||
'--xpack.security.session.idleTimeout=10s',
|
||||
'--xpack.security.session.cleanupInterval=20s',
|
||||
`--xpack.security.authc.providers=${JSON.stringify({
|
||||
basic: { basic1: { order: 0 } },
|
||||
saml: {
|
||||
saml_fallback: { order: 1, realm: 'saml1' },
|
||||
saml_override: { order: 2, realm: 'saml1', session: { idleTimeout: '2m' } },
|
||||
saml_disable: { order: 3, realm: 'saml1', session: { idleTimeout: 0 } },
|
||||
},
|
||||
})}`,
|
||||
// Exclude Uptime tasks to not interfere (additional ES load) with the session cleanup task.
|
||||
`--xpack.task_manager.unsafe.exclude_task_types=${JSON.stringify(['UPTIME:*'])}`,
|
||||
],
|
||||
},
|
||||
|
||||
junit: {
|
||||
reportName: 'X-Pack Security API Integration Tests (Session Idle Timeout)',
|
||||
},
|
||||
};
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import type { FtrProviderContext } from '../../ftr_provider_context';
|
||||
|
||||
export default function ({ loadTestFile }: FtrProviderContext) {
|
||||
describe('security APIs - Session Index Shard Missing', function () {
|
||||
loadTestFile(require.resolve('./shard_missing'));
|
||||
});
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||
* 2.0.
|
||||
*/
|
||||
|
||||
import { setTimeout as setTimeoutAsync } from 'timers/promises';
|
||||
|
||||
import expect from '@kbn/expect';
|
||||
import { adminTestUser } from '@kbn/test';
|
||||
|
||||
import type { FtrProviderContext } from '../../ftr_provider_context';
|
||||
|
||||
export default function ({ getService }: FtrProviderContext) {
|
||||
const supertest = getService('supertestWithoutAuth');
|
||||
const esSupertest = getService('esSupertest');
|
||||
const es = getService('es');
|
||||
const esDeleteAllIndices = getService('esDeleteAllIndices');
|
||||
const retry = getService('retry');
|
||||
const log = getService('log');
|
||||
|
||||
const { username: basicUsername, password: basicPassword } = adminTestUser;
|
||||
|
||||
async function getNumberOfSessionDocuments() {
|
||||
await es.indices.refresh({ index: '.kibana_security_session*' });
|
||||
return (
|
||||
// @ts-expect-error doesn't handle total as number
|
||||
(await es.search({ index: '.kibana_security_session*' })).hits.total.value as number
|
||||
);
|
||||
}
|
||||
|
||||
async function runCleanupTaskSoon() {
|
||||
// In most cases, an error would mean the task is currently running so let's run it again
|
||||
await retry.tryForTime(30000, async () => {
|
||||
await supertest
|
||||
.post('/session/_run_cleanup')
|
||||
.set('kbn-xsrf', 'xxx')
|
||||
.auth(adminTestUser.username, adminTestUser.password)
|
||||
.send()
|
||||
.expect(200);
|
||||
});
|
||||
}
|
||||
|
||||
async function addESDebugLoggingSettings() {
|
||||
const addLogging = {
|
||||
persistent: {
|
||||
'logger.org.elasticsearch.xpack.security.authc': 'debug',
|
||||
},
|
||||
};
|
||||
await esSupertest.put('/_cluster/settings').send(addLogging).expect(200);
|
||||
}
|
||||
|
||||
async function simulatePointInTimeFailure(simulateOpenPointInTimeFailure: boolean) {
|
||||
await supertest
|
||||
.post('/simulate_point_in_time_failure')
|
||||
.send({ simulateOpenPointInTimeFailure })
|
||||
.expect(200);
|
||||
}
|
||||
|
||||
async function getCleanupTaskStatus() {
|
||||
log.debug('Attempting to get task status');
|
||||
const response = await supertest.get('/cleanup_task_status').expect(200);
|
||||
const { state } = response.body;
|
||||
return state;
|
||||
}
|
||||
|
||||
async function resetCleanupTask() {
|
||||
log.debug('Resetting cleanup task state to 0');
|
||||
await runCleanupTaskSoon();
|
||||
let shardMissingCounter = -1;
|
||||
while (shardMissingCounter !== 0) {
|
||||
await setTimeoutAsync(5000);
|
||||
|
||||
const state = await getCleanupTaskStatus();
|
||||
log.debug(`Task status: ${JSON.stringify(state)}`);
|
||||
shardMissingCounter = state.shardMissingCounter ?? 0;
|
||||
}
|
||||
await simulatePointInTimeFailure(false);
|
||||
log.debug('Cleanup task reset');
|
||||
}
|
||||
|
||||
describe('Session index shard missing', () => {
|
||||
beforeEach(async () => {
|
||||
await es.cluster.health({ index: '.kibana_security_session*', wait_for_status: 'green' });
|
||||
await addESDebugLoggingSettings();
|
||||
await esDeleteAllIndices('.kibana_security_session*');
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await simulatePointInTimeFailure(false);
|
||||
});
|
||||
|
||||
it('quietly fails if shards are unavailable', async function () {
|
||||
this.timeout(100000);
|
||||
|
||||
await resetCleanupTask();
|
||||
await simulatePointInTimeFailure(true);
|
||||
|
||||
log.debug(`Log in as ${basicUsername} using ${basicPassword} password.`);
|
||||
await supertest
|
||||
.post('/internal/security/login')
|
||||
.set('kbn-xsrf', 'xxx')
|
||||
.send({
|
||||
providerType: 'basic',
|
||||
providerName: 'basic1',
|
||||
currentURL: '/',
|
||||
params: { username: basicUsername, password: basicPassword },
|
||||
})
|
||||
.expect(200);
|
||||
|
||||
await runCleanupTaskSoon();
|
||||
|
||||
log.debug('Waiting for cleanup job to run...');
|
||||
|
||||
await setTimeoutAsync(5000);
|
||||
await retry.tryForTime(20000, async () => {
|
||||
// Session does not clean up but the cleanup task has not failed either
|
||||
expect(await getNumberOfSessionDocuments()).to.be(1);
|
||||
});
|
||||
|
||||
await simulatePointInTimeFailure(false);
|
||||
});
|
||||
|
||||
it('fails if shards are unavailable more than 10 times', async function () {
|
||||
this.timeout(600000);
|
||||
|
||||
await resetCleanupTask();
|
||||
|
||||
await simulatePointInTimeFailure(true);
|
||||
|
||||
await supertest
|
||||
.post('/internal/security/login')
|
||||
.set('kbn-xsrf', 'xxx')
|
||||
.send({
|
||||
providerType: 'basic',
|
||||
providerName: 'basic1',
|
||||
currentURL: '/',
|
||||
params: { username: basicUsername, password: basicPassword },
|
||||
})
|
||||
.expect(200);
|
||||
|
||||
let shardMissingCounter = 0;
|
||||
while (shardMissingCounter < 9) {
|
||||
log.debug('Waiting for cleanup job to run...');
|
||||
const currentCounter = shardMissingCounter;
|
||||
await runCleanupTaskSoon();
|
||||
|
||||
while (shardMissingCounter <= currentCounter) {
|
||||
log.debug(
|
||||
`current counter: ${currentCounter}, shard missing counter: ${shardMissingCounter}`
|
||||
);
|
||||
await setTimeoutAsync(5000);
|
||||
const state = await getCleanupTaskStatus();
|
||||
shardMissingCounter = state.shardMissingCounter ?? 0;
|
||||
}
|
||||
}
|
||||
if (shardMissingCounter === 9) {
|
||||
log.debug('Shard missing counter reached 10, attempting next failure and expecting reset');
|
||||
await runCleanupTaskSoon();
|
||||
await setTimeoutAsync(5000);
|
||||
const state = await getCleanupTaskStatus();
|
||||
expect(state.shardMissingCounter).to.be(0);
|
||||
}
|
||||
|
||||
await simulatePointInTimeFailure(false);
|
||||
});
|
||||
});
|
||||
}
|
|
@ -5,7 +5,7 @@
|
|||
* 2.0.
|
||||
*/
|
||||
|
||||
import { errors } from '@elastic/elasticsearch';
|
||||
import { type DiagnosticResult, errors } from '@elastic/elasticsearch';
|
||||
|
||||
import { schema } from '@kbn/config-schema';
|
||||
import type { CoreSetup, CoreStart, PluginInitializerContext } from '@kbn/core/server';
|
||||
|
@ -290,4 +290,58 @@ export function initRoutes(
|
|||
return response.ok();
|
||||
}
|
||||
);
|
||||
|
||||
router.post(
|
||||
{
|
||||
path: '/simulate_point_in_time_failure',
|
||||
validate: { body: schema.object({ simulateOpenPointInTimeFailure: schema.boolean() }) },
|
||||
options: { authRequired: false, xsrfRequired: false },
|
||||
},
|
||||
async (context, request, response) => {
|
||||
const esClient = (await context.core).elasticsearch.client.asInternalUser;
|
||||
const originalOpenPointInTime = esClient.openPointInTime;
|
||||
|
||||
if (request.body.simulateOpenPointInTimeFailure) {
|
||||
// @ts-expect-error
|
||||
esClient.openPointInTime = async function (params, options) {
|
||||
const { index } = params;
|
||||
if (index.includes('kibana_security_session')) {
|
||||
return {
|
||||
statusCode: 503,
|
||||
meta: {},
|
||||
body: {
|
||||
error: {
|
||||
type: 'no_shard_available_action_exception',
|
||||
reason: 'no shard available for [open]',
|
||||
},
|
||||
},
|
||||
};
|
||||
return {
|
||||
statusCode: 503,
|
||||
message: 'no_shard_available_action_exception',
|
||||
} as unknown as DiagnosticResult;
|
||||
}
|
||||
return originalOpenPointInTime.call(this, params, options);
|
||||
};
|
||||
} else {
|
||||
esClient.openPointInTime = originalOpenPointInTime;
|
||||
}
|
||||
|
||||
return response.ok();
|
||||
}
|
||||
);
|
||||
|
||||
router.get(
|
||||
{
|
||||
path: '/cleanup_task_status',
|
||||
validate: false,
|
||||
options: { authRequired: false },
|
||||
},
|
||||
async (context, request, response) => {
|
||||
const [, { taskManager }] = await core.getStartServices();
|
||||
const res = await taskManager.get(SESSION_INDEX_CLEANUP_TASK_NAME);
|
||||
const { attempts, state, status } = res;
|
||||
return response.ok({ body: { attempts, state, status } });
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue