[kbn-test] retry 5xx in saml callback (#208977)

## Summary

When we run Scout tests in parallel, we call SAML authentication in
parallel too and since by default `.security-profile-8` index does not
exist, we periodically getting 503 response:

```
 proc [kibana] [2025-01-29T11:13:10.420+01:00][ERROR][plugins.security.user-profile] 
Failed to activate user profile: {"error":{"root_cause":[{"type":"unavailable_shards_exception","reason":
"at least one search shard for the index [.security-profile-8] is unavailable"}],
"type":"unavailable_shards_exception","reason":"at least one search shard
for the index [.security-profile-8] is unavailable"},"status":503}. {"service":{"node":
{"roles":["background_tasks","ui"]}}}
```

The solution is to retry the SAML callback assuming that index will be
created and the issue will be solved.
We agreed with Kibana-Security to retry only **5xx** errors, because for
**4xx** we most likely have to start the authentication from the start.

For reviews: it is not 100% reproducible, so I added unit tests to
verify the retry logic is working only for 5xx requests. Please let me
know if I miss something

Retry was verified locally, you might be seeing this logs output:

```
 proc [kibana] [2025-01-30T18:40:41.348+01:00][ERROR][plugins.security.user-profile] Failed to activate user profile:
{"error":{"root_cause":[{"type":"unavailable_shards_exception","reason":"at least one search shard for the index
[.security-profile-8] is unavailable"}],"type":"unavailable_shards_exception","reason":"at least one search shard
for the index [.security-profile-8] is unavailable"},"status":503}. {"service":{"node":{"roles":["background_tasks","ui"]}}}
 proc [kibana] [2025-01-30T18:40:41.349+01:00][ERROR][plugins.security.authentication] Login attempt with "saml"
provider failed due to unexpected error: {"error":{"root_cause":[{"type":"unavailable_shards_exception","reason":
"at least one search shard for the index [.security-profile-8] is unavailable"}],"type":"unavailable_shards_exception",
"reason":"at least one search shard for the index [.security-profile-8] is unavailable"},"status":503}
{"service":{"node":{"roles":["background_tasks","ui"]}}}
 proc [kibana] [2025-01-30T18:40:41.349+01:00][ERROR][http] 500 Server Error {"http":{"response":{"status_code":500},"request":{"method":"post","path":"/api/security/saml/callback"}},"error":
{"message":"unavailable_shards_exception\n\tRoot causes:\n\t\tunavailable_shards_exception: at least one
search shard for the index [.security-profile-8] is
    ERROR [scout] SAML callback failed: expected 302, got 500
    Waiting 939 ms before the next attempt
 proc [playwright]
 info [o.e.c.r.a.AllocationService] [scout] current.health="GREEN" message="Cluster health status changed
from [YELLOW] to [GREEN] (reason: [shards started [[.security-profile-8][0]]])."
previous.health="YELLOW" reason="shards started [[.security-profile-8][0]]"
```

To reproduce: 
```
node scripts/scout.js run-tests --stateful --config x-pack/platform/plugins/private/discover_enhanced/ui_tests/parallel.playwright.config.ts
```

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Dzmitry Lemechko 2025-02-06 18:39:47 +01:00 committed by GitHub
parent 228f83fde3
commit 2b5bbf8f86
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 198 additions and 218 deletions

View file

@ -49,6 +49,10 @@ export class KibanaUrl {
return url.href;
}
domain() {
return this.#baseUrl.hostname;
}
/**
* Get the URL for an app
* @param appName name of the app to get the URL for

View file

@ -19,7 +19,7 @@ import type {
} from './worker';
import {
scoutPageParallelFixture,
browserAuthParallelFixture,
browserAuthFixture,
pageObjectsParallelFixture,
validateTagsFixture,
} from './test';
@ -30,7 +30,7 @@ export const scoutParallelFixtures = mergeTests(
coreWorkerFixtures,
scoutSpaceParallelFixture,
// test scope fixtures
browserAuthParallelFixture,
browserAuthFixture,
scoutPageParallelFixture,
pageObjectsParallelFixture,
validateTagsFixture

View file

@ -7,6 +7,9 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import { PROJECT_DEFAULT_ROLES } from '../../../../common';
import { coreWorkerFixtures } from '../../worker';
export type LoginFunction = (role: string) => Promise<void>;
export interface BrowserAuthFixture {
@ -27,5 +30,40 @@ export interface BrowserAuthFixture {
loginAsPrivilegedUser: () => Promise<void>;
}
export { browserAuthParallelFixture } from './parallel';
export { browserAuthFixture } from './single_thread';
/**
* The "browserAuth" fixture simplifies the process of logging into Kibana with
* different roles during tests. It uses the "samlAuth" fixture to create an authentication session
* for the specified role and the "context" fixture to update the cookie with the role-scoped session.
*/
export const browserAuthFixture = coreWorkerFixtures.extend<{ browserAuth: BrowserAuthFixture }>({
browserAuth: async ({ log, context, samlAuth, config, kbnUrl }, use) => {
const setSessionCookie = async (cookieValue: string) => {
await context.clearCookies();
await context.addCookies([
{
name: 'sid',
value: cookieValue,
path: '/',
domain: kbnUrl.domain(),
},
]);
};
const loginAs: LoginFunction = async (role) => {
const cookie = await samlAuth.getInteractiveUserSessionCookieWithRoleScope(role);
await setSessionCookie(cookie);
};
const loginAsAdmin = () => loginAs('admin');
const loginAsViewer = () => loginAs('viewer');
const loginAsPrivilegedUser = () => {
const roleName = config.serverless
? PROJECT_DEFAULT_ROLES.get(config.projectType!)!
: 'editor';
return loginAs(roleName);
};
log.serviceLoaded('browserAuth');
await use({ loginAsAdmin, loginAsViewer, loginAsPrivilegedUser });
},
});

View file

@ -1,55 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import { BrowserAuthFixture, LoginFunction } from '.';
import { PROJECT_DEFAULT_ROLES } from '../../../../common';
import { coreWorkerFixtures } from '../../worker';
import { ScoutSpaceParallelFixture } from '../../worker/scout_space';
/**
* The "browserAuth" fixture simplifies the process of logging into Kibana with
* different roles during tests. It uses the "samlAuth" fixture to create an authentication session
* for the specified role and the "context" fixture to update the cookie with the role-scoped session.
*/
export const browserAuthParallelFixture = coreWorkerFixtures.extend<
{ browserAuth: BrowserAuthFixture },
{ scoutSpace: ScoutSpaceParallelFixture }
>({
browserAuth: async ({ log, context, samlAuth, config, scoutSpace }, use) => {
const setSessionCookie = async (cookieValue: string) => {
await context.clearCookies();
await context.addCookies([
{
name: 'sid',
value: cookieValue,
path: '/',
domain: 'localhost',
},
]);
};
const loginAs: LoginFunction = async (role) => {
const spaceId = scoutSpace.id;
const cookie = await samlAuth.getInteractiveUserSessionCookieWithRoleScope(role, { spaceId });
await setSessionCookie(cookie);
};
const loginAsAdmin = () => loginAs('admin');
const loginAsViewer = () => loginAs('viewer');
const loginAsPrivilegedUser = () => {
const roleName = config.serverless
? PROJECT_DEFAULT_ROLES.get(config.projectType!)!
: 'editor';
return loginAs(roleName);
};
log.serviceLoaded(`browserAuth:${scoutSpace.id}`);
await use({ loginAsAdmin, loginAsViewer, loginAsPrivilegedUser });
},
});

View file

@ -1,50 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import { PROJECT_DEFAULT_ROLES } from '../../../../common';
import { coreWorkerFixtures } from '../../worker';
import { BrowserAuthFixture, LoginFunction } from '.';
/**
* The "browserAuth" fixture simplifies the process of logging into Kibana with
* different roles during tests. It uses the "samlAuth" fixture to create an authentication session
* for the specified role and the "context" fixture to update the cookie with the role-scoped session.
*/
export const browserAuthFixture = coreWorkerFixtures.extend<{ browserAuth: BrowserAuthFixture }>({
browserAuth: async ({ log, context, samlAuth, config }, use) => {
const setSessionCookie = async (cookieValue: string) => {
await context.clearCookies();
await context.addCookies([
{
name: 'sid',
value: cookieValue,
path: '/',
domain: 'localhost',
},
]);
};
const loginAs: LoginFunction = async (role) => {
const cookie = await samlAuth.getInteractiveUserSessionCookieWithRoleScope(role);
await setSessionCookie(cookie);
};
const loginAsAdmin = () => loginAs('admin');
const loginAsViewer = () => loginAs('viewer');
const loginAsPrivilegedUser = () => {
const roleName = config.serverless
? PROJECT_DEFAULT_ROLES.get(config.projectType!)!
: 'editor';
return loginAs(roleName);
};
log.serviceLoaded('browserAuth');
await use({ loginAsAdmin, loginAsViewer, loginAsPrivilegedUser });
},
});

View file

@ -7,7 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
export { browserAuthFixture, browserAuthParallelFixture } from './browser_auth';
export { browserAuthFixture } from './browser_auth';
export type { BrowserAuthFixture } from './browser_auth';
export { scoutPageFixture, scoutPageParallelFixture } from './scout_page';
export type { ScoutPage } from './scout_page';

View file

@ -8,7 +8,7 @@
*/
import { ToolingLog } from '@kbn/tooling-log';
import axios, { AxiosRequestConfig } from 'axios';
import axios from 'axios';
jest.mock('axios');
import {
@ -21,33 +21,23 @@ import {
const axiosRequestMock = jest.spyOn(axios, 'request');
const axiosGetMock = jest.spyOn(axios, 'get');
const log = new ToolingLog();
const mockRequestOnce = (mockedPath: string, response: any) => {
axiosRequestMock.mockImplementationOnce((config: AxiosRequestConfig) => {
if (config.url?.endsWith(mockedPath)) {
return Promise.resolve(response);
}
return Promise.reject(new Error(`Unexpected URL: ${config.url}`));
});
};
const mockGetOnce = (mockedUrl: string, response: any) => {
axiosGetMock.mockImplementationOnce((url: string) => {
if (url === mockedUrl) {
return Promise.resolve(response);
}
return Promise.reject(new Error(`Unexpected URL`));
});
};
jest.mock('timers/promises', () => ({
setTimeout: jest.fn(() => Promise.resolve()),
}));
describe('saml_auth', () => {
const log = new ToolingLog();
describe('createCloudSession', () => {
afterEach(() => {
axiosRequestMock.mockClear();
jest.clearAllMocks();
});
test('returns token value', async () => {
mockRequestOnce('/api/v1/saas/auth/_login', { data: { token: 'mocked_token' }, status: 200 });
axiosRequestMock.mockResolvedValueOnce({
data: { token: 'mocked_token' },
status: 200,
});
const sessionToken = await createCloudSession({
hostname: 'cloud',
@ -60,21 +50,13 @@ describe('saml_auth', () => {
});
test('retries until response has the token value', async () => {
let callCount = 0;
axiosRequestMock.mockImplementation((config: AxiosRequestConfig) => {
if (config.url?.endsWith('/api/v1/saas/auth/_login')) {
callCount += 1;
if (callCount !== 3) {
return Promise.resolve({ data: { message: 'no token' }, status: 503 });
} else {
return Promise.resolve({
data: { token: 'mocked_token' },
status: 200,
});
}
}
return Promise.reject(new Error(`Unexpected URL: ${config.url}`));
});
axiosRequestMock
.mockResolvedValueOnce({ data: { message: 'no token' }, status: 503 })
.mockResolvedValueOnce({ data: { message: 'no token' }, status: 503 })
.mockResolvedValueOnce({
data: { token: 'mocked_token' },
status: 200,
});
const sessionToken = await createCloudSession(
{
@ -94,12 +76,7 @@ describe('saml_auth', () => {
});
test('retries and throws error when response code is not 200', async () => {
axiosRequestMock.mockImplementation((config: AxiosRequestConfig) => {
if (config.url?.endsWith('/api/v1/saas/auth/_login')) {
return Promise.resolve({ data: { message: 'no token' }, status: 503 });
}
return Promise.reject(new Error(`Unexpected URL: ${config.url}`));
});
axiosRequestMock.mockResolvedValue({ data: { message: 'no token' }, status: 503 });
await expect(
createCloudSession(
@ -121,14 +98,9 @@ describe('saml_auth', () => {
});
test('retries and throws error when response has no token value', async () => {
axiosRequestMock.mockImplementation((config: AxiosRequestConfig) => {
if (config.url?.endsWith('/api/v1/saas/auth/_login')) {
return Promise.resolve({
data: { user_id: 1234, okta_session_id: 5678, authenticated: false },
status: 200,
});
}
return Promise.reject(new Error(`Unexpected URL: ${config.url}`));
axiosRequestMock.mockResolvedValue({
data: { user_id: 1234, okta_session_id: 5678, authenticated: false },
status: 200,
});
await expect(
@ -151,6 +123,8 @@ describe('saml_auth', () => {
});
test(`throws error when retry 'attemptsCount' is below 1`, async () => {
axiosRequestMock.mockResolvedValue({ data: { message: 'no token' }, status: 503 });
await expect(
createCloudSession(
{
@ -170,14 +144,9 @@ describe('saml_auth', () => {
});
test(`should fail without retry when response has 'mfa_required: true'`, async () => {
axiosRequestMock.mockImplementation((config: AxiosRequestConfig) => {
if (config.url?.endsWith('/api/v1/saas/auth/_login')) {
return Promise.resolve({
data: { user_id: 12345, authenticated: false, mfa_required: true },
status: 200,
});
}
return Promise.reject(new Error(`Unexpected URL: ${config.url}`));
axiosRequestMock.mockResolvedValue({
data: { user_id: 12345, authenticated: false, mfa_required: true },
status: 200,
});
await expect(
@ -202,10 +171,11 @@ describe('saml_auth', () => {
describe('createSAMLRequest', () => {
afterEach(() => {
axiosRequestMock.mockClear();
jest.clearAllMocks();
});
test('returns { location, sid }', async () => {
mockRequestOnce('/internal/security/login', {
axiosRequestMock.mockResolvedValue({
data: {
location: 'https://cloud.test/saml?SAMLRequest=fVLLbtswEPwVgXe9K6%2F',
},
@ -222,7 +192,7 @@ describe('saml_auth', () => {
});
test(`throws error when response has no 'set-cookie' header`, async () => {
mockRequestOnce('/internal/security/login', {
axiosRequestMock.mockResolvedValue({
data: {
location: 'https://cloud.test/saml?SAMLRequest=fVLLbtswEPwVgXe9K6%2F',
},
@ -235,7 +205,7 @@ describe('saml_auth', () => {
});
test('throws error when location is not a valid url', async () => {
mockRequestOnce('/internal/security/login', {
axiosRequestMock.mockResolvedValue({
data: {
location: 'http/.test',
},
@ -251,7 +221,7 @@ describe('saml_auth', () => {
test('throws error when response has no location', async () => {
const data = { error: 'mocked error' };
mockRequestOnce('/internal/security/login', {
axiosRequestMock.mockResolvedValue({
data,
headers: {
'set-cookie': [`sid=Fe26.2**1234567890; Secure; HttpOnly; Path=/`],
@ -266,8 +236,9 @@ describe('saml_auth', () => {
describe('createSAMLResponse', () => {
afterEach(() => {
axiosGetMock.mockClear();
jest.clearAllMocks();
});
const location = 'https://cloud.test/saml?SAMLRequest=fVLLbtswEPwVgXe9K6%2F';
const createSAMLResponseParams = {
location,
@ -278,7 +249,7 @@ describe('saml_auth', () => {
};
test('returns valid saml response', async () => {
mockGetOnce(location, {
axiosGetMock.mockResolvedValueOnce({
data: `<!DOCTYPE html><html lang="en"><head><title>Test</title></head><body><input type="hidden" name="SAMLResponse" value="PD94bWluc2U+"></body></html>`,
});
@ -287,7 +258,7 @@ describe('saml_auth', () => {
});
test('throws error when failed to parse SAML response value', async () => {
mockGetOnce(location, {
axiosGetMock.mockResolvedValueOnce({
data: `<!DOCTYPE html><html lang="en"><head><title>Test</title></head><body></body></html>`,
});
@ -301,40 +272,77 @@ https://kbn.test.co in the same window.`);
});
describe('finishSAMLHandshake', () => {
afterEach(() => {
axiosRequestMock.mockClear();
});
const params = {
samlResponse: 'mockSAMLResponse',
kbnHost: 'https://kbn.test.co',
sid: 'Fe26.2**1234567890',
log,
};
const cookieStr = 'mocked_cookie';
test('returns valid cookie', async () => {
mockRequestOnce('/api/security/saml/callback', {
const retryCount = 3;
beforeEach(() => {
jest.clearAllMocks();
});
it('should return cookie on 302 response', async () => {
axiosRequestMock.mockResolvedValue({
status: 302,
headers: {
'set-cookie': [`sid=${cookieStr}; Secure; HttpOnly; Path=/`],
},
});
const response = await finishSAMLHandshake({
kbnHost: 'https://kbn.test.co',
samlResponse: 'PD94bWluc2U+',
sid: 'Fe26.2**1234567890',
log,
});
const response = await finishSAMLHandshake(params);
expect(response.key).toEqual('sid');
expect(response.value).toEqual(cookieStr);
expect(axiosRequestMock).toHaveBeenCalledTimes(1);
});
test(`throws error when response has no 'set-cookie' header`, async () => {
mockRequestOnce('/api/security/saml/callback', { headers: {} });
it('should throw an error on 4xx response without retrying', async () => {
axiosRequestMock.mockResolvedValue({ status: 401 });
await expect(
finishSAMLHandshake({
kbnHost: 'https://kbn.test.co',
samlResponse: 'PD94bWluc2U+',
sid: 'Fe26.2**1234567890',
log,
})
).rejects.toThrow(
/Failed to get cookie from SAML callback response: no 'set-cookie' header, response.data:/
await expect(finishSAMLHandshake(params)).rejects.toThrow(
'SAML callback failed: expected 302, got 401'
);
expect(axiosRequestMock).toHaveBeenCalledTimes(1);
});
it('should retry on 5xx response and succeed on 302 response', async () => {
axiosRequestMock
.mockResolvedValueOnce({ status: 503 }) // First attempt fails (5xx), retrying
.mockResolvedValueOnce({
status: 302,
headers: {
'set-cookie': [`sid=${cookieStr}; Secure; HttpOnly; Path=/`],
},
}); // Second attempt succeeds
const response = await finishSAMLHandshake(params);
expect(response.key).toEqual('sid');
expect(response.value).toEqual(cookieStr);
expect(axiosRequestMock).toHaveBeenCalledTimes(2);
});
it('should retry on 5xx response and fail after max attempts', async () => {
const attemptsCount = retryCount + 1;
axiosRequestMock.mockResolvedValue({ status: 503 });
await expect(finishSAMLHandshake(params)).rejects.toThrow(
`Retry failed after ${attemptsCount} attempts: SAML callback failed: expected 302, got 503`
);
expect(axiosRequestMock).toHaveBeenCalledTimes(attemptsCount);
});
it('should stop retrying if a later response is 4xx', async () => {
axiosRequestMock
.mockResolvedValueOnce({ status: 503 }) // First attempt fails (5xx), retrying
.mockResolvedValueOnce({ status: 400 }); // Second attempt gets a 4xx (stop retrying)
await expect(finishSAMLHandshake(params)).rejects.toThrow(
'SAML callback failed: expected 302, got 400'
);
expect(axiosRequestMock).toHaveBeenCalledTimes(2);
});
});
});

View file

@ -7,6 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import { setTimeout as delay } from 'timers/promises';
import { createSAMLResponse as createMockedSAMLResponse } from '@kbn/mock-idp-utils';
import { ToolingLog } from '@kbn/tooling-log';
import axios, { AxiosResponse } from 'axios';
@ -14,12 +15,14 @@ import util from 'util';
import * as cheerio from 'cheerio';
import { Cookie, parse as parseCookie } from 'tough-cookie';
import Url from 'url';
import { randomInt } from 'crypto';
import { isValidHostname, isValidUrl } from './helper';
import {
CloudSamlSessionParams,
CreateSamlSessionParams,
LocalSamlSessionParams,
RetryParams,
SAMLCallbackParams,
SAMLResponseValueParams,
UserProfile,
} from './types';
@ -160,7 +163,7 @@ export const createCloudSession = async (
if (--attemptsLeft > 0) {
// log only error message
log.error(`${ex.message}\nWaiting ${retryParams.attemptDelay} ms before the next attempt`);
await new Promise((resolve) => setTimeout(resolve, retryParams.attemptDelay));
await delay(retryParams.attemptDelay);
} else {
log.error(
`Failed to create the new cloud session with ${retryParams.attemptsCount} attempts`
@ -258,12 +261,8 @@ export const finishSAMLHandshake = async ({
samlResponse,
sid,
log,
}: {
kbnHost: string;
samlResponse: string;
sid?: string;
log: ToolingLog;
}) => {
maxRetryCount = 3,
}: SAMLCallbackParams) => {
const encodedResponse = encodeURIComponent(samlResponse);
const url = kbnHost + '/api/security/saml/callback';
const request = {
@ -279,20 +278,48 @@ export const finishSAMLHandshake = async ({
};
let authResponse: AxiosResponse;
try {
authResponse = await axios.request(request);
} catch (ex) {
log.error('Failed to call SAML callback');
cleanException(url, ex);
// Logging the `Cookie: sid=xxxx` header is safe here since its an intermediate, non-authenticated cookie that cannot be reused if leaked.
log.error(`Request sent: ${util.inspect(request)}`);
throw ex;
let attemptsLeft = maxRetryCount + 1;
while (attemptsLeft > 0) {
try {
authResponse = await axios.request(request);
// SAML callback should return 302
if (authResponse.status === 302) {
return getCookieFromResponseHeaders(
authResponse,
'Failed to get cookie from SAML callback response'
);
}
throw new Error(`SAML callback failed: expected 302, got ${authResponse.status}`, {
cause: {
status: authResponse.status, // use response status to retry on 5xx errors
},
});
} catch (ex) {
cleanException(kbnHost, ex);
// retry for 5xx errors
if (ex?.cause?.status >= 500) {
if (--attemptsLeft > 0) {
// randomize delay to avoid retrying API call in parallel workers concurrently
const attemptDelay = randomInt(500, 2_500);
// log only error message
log.error(`${ex.message}\nWaiting ${attemptDelay} ms before the next attempt`);
await delay(attemptDelay);
} else {
throw new Error(`Retry failed after ${maxRetryCount + 1} attempts: ${ex.message}`);
}
} else {
// exit for non 5xx errors
// Logging the `Cookie: sid=xxxx` header is safe here since its an intermediate, non-authenticated cookie that cannot be reused if leaked.
log.error(`Request sent: ${util.inspect(request)}`);
throw ex;
}
}
}
return getCookieFromResponseHeaders(
authResponse,
'Failed to get cookie from SAML callback response'
);
// should never be reached
throw new Error(`Failed to complete SAML handshake callback`);
};
export const getSecurityProfile = async ({

View file

@ -41,6 +41,14 @@ export interface SAMLResponseValueParams {
log: ToolingLog;
}
export interface SAMLCallbackParams {
kbnHost: string;
samlResponse: string;
sid?: string;
log: ToolingLog;
maxRetryCount?: number;
}
export interface User {
readonly email: string;
readonly password: string;