Reduce noisy logs about claiming on all partitions (#199405)

Resolves https://github.com/elastic/response-ops-team/issues/257

In this PR, I'm throttling the `Background task node
"${taskPartitioner.getPodName()}" has no assigned partitions, claiming
against all partitions` warning logs to once per minute. I'm also adding
an info log whenever the node has assigned partitions and a warning
message was previously logged.

## To verify

1. Startup Kibana with a fresh Elasticsearch instance
2. Notice the warning log followed by the info log
3. Apply the following diff
```
diff --git a/x-pack/plugins/task_manager/server/kibana_discovery_service/kibana_discovery_service.ts b/x-pack/plugins/task_manager/server/kibana_discovery_service/kibana_discovery_service.ts
index 1c4fcb00981..df6bf6ca377 100644
--- a/x-pack/plugins/task_manager/server/kibana_discovery_service/kibana_discovery_service.ts
+++ b/x-pack/plugins/task_manager/server/kibana_discovery_service/kibana_discovery_service.ts
@@ -63,7 +63,7 @@ export class KibanaDiscoveryService {
       const lastSeenDate = new Date();
       const lastSeen = lastSeenDate.toISOString();
       try {
-        await this.upsertCurrentNode({ id: this.currentNode, lastSeen });
+        // await this.upsertCurrentNode({ id: this.currentNode, lastSeen });
         if (!this.started) {
           this.logger.info('Kibana Discovery Service has been started');
           this.started = true;
```
4. Notice the warning log message happening every minute
This commit is contained in:
Mike Côté 2024-11-08 12:57:34 -05:00 committed by GitHub
parent eff0f2620b
commit 32b942edfb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 140 additions and 1 deletions

View file

@ -11,6 +11,7 @@ import { v4 as uuidv4 } from 'uuid';
import { filter, take } from 'rxjs';
import { CLAIM_STRATEGY_MGET, DEFAULT_KIBANAS_PER_PARTITION } from '../config';
import { NO_ASSIGNED_PARTITIONS_WARNING_INTERVAL } from './strategy_mget';
import {
TaskStatus,
@ -2260,6 +2261,129 @@ describe('TaskClaiming', () => {
}
`);
});
test(`it should log warning on interval when the node has no assigned partitions`, async () => {
// Reset the warning timer by advancing more
fakeTimer.tick(NO_ASSIGNED_PARTITIONS_WARNING_INTERVAL);
jest.spyOn(taskPartitioner, 'getPartitions').mockResolvedValue([]);
const taskManagerId = uuidv4();
const definitions = new TaskTypeDictionary(mockLogger());
definitions.registerTaskDefinitions({
foo: {
title: 'foo',
createTaskRunner: jest.fn(),
},
bar: {
title: 'bar',
createTaskRunner: jest.fn(),
},
});
await testClaimAvailableTasks({
storeOpts: {
taskManagerId,
definitions,
},
taskClaimingOpts: {},
claimingOpts: {
claimOwnershipUntil: new Date(),
},
});
expect(taskManagerLogger.warn).toHaveBeenCalledWith(
'Background task node "test" has no assigned partitions, claiming against all partitions',
{ tags: ['taskClaiming', 'claimAvailableTasksMget'] }
);
taskManagerLogger.warn.mockReset();
fakeTimer.tick(NO_ASSIGNED_PARTITIONS_WARNING_INTERVAL - 500);
await testClaimAvailableTasks({
storeOpts: {
taskManagerId,
definitions,
},
taskClaimingOpts: {},
claimingOpts: {
claimOwnershipUntil: new Date(),
},
});
expect(taskManagerLogger.warn).not.toHaveBeenCalled();
fakeTimer.tick(500);
await testClaimAvailableTasks({
storeOpts: {
taskManagerId,
definitions,
},
taskClaimingOpts: {},
claimingOpts: {
claimOwnershipUntil: new Date(),
},
});
expect(taskManagerLogger.warn).toHaveBeenCalledWith(
'Background task node "test" has no assigned partitions, claiming against all partitions',
{ tags: ['taskClaiming', 'claimAvailableTasksMget'] }
);
});
test(`it should log a message after the node no longer has no assigned partitions`, async () => {
// Reset the warning timer by advancing more
fakeTimer.tick(NO_ASSIGNED_PARTITIONS_WARNING_INTERVAL);
jest.spyOn(taskPartitioner, 'getPartitions').mockResolvedValue([]);
const taskManagerId = uuidv4();
const definitions = new TaskTypeDictionary(mockLogger());
definitions.registerTaskDefinitions({
foo: {
title: 'foo',
createTaskRunner: jest.fn(),
},
bar: {
title: 'bar',
createTaskRunner: jest.fn(),
},
});
await testClaimAvailableTasks({
storeOpts: {
taskManagerId,
definitions,
},
taskClaimingOpts: {},
claimingOpts: {
claimOwnershipUntil: new Date(),
},
});
expect(taskManagerLogger.warn).toHaveBeenCalledWith(
'Background task node "test" has no assigned partitions, claiming against all partitions',
{ tags: ['taskClaiming', 'claimAvailableTasksMget'] }
);
taskManagerLogger.warn.mockReset();
jest.spyOn(taskPartitioner, 'getPartitions').mockResolvedValue([1, 2, 3]);
fakeTimer.tick(500);
await testClaimAvailableTasks({
storeOpts: {
taskManagerId,
definitions,
},
taskClaimingOpts: {},
claimingOpts: {
claimOwnershipUntil: new Date(),
},
});
expect(taskManagerLogger.warn).not.toHaveBeenCalled();
expect(taskManagerLogger.info).toHaveBeenCalledWith(
`Background task node "${taskPartitioner.getPodName()}" now claiming with assigned partitions`,
{ tags: ['taskClaiming', 'claimAvailableTasksMget'] }
);
});
});
describe('task events', () => {

View file

@ -300,6 +300,9 @@ interface SearchAvailableTasksResponse {
versionMap: Map<string, ConcreteTaskInstanceVersion>;
}
let lastPartitionWarningLog: number | undefined;
export const NO_ASSIGNED_PARTITIONS_WARNING_INTERVAL = 60000;
async function searchAvailableTasks({
definitions,
taskTypes,
@ -320,10 +323,22 @@ async function searchAvailableTasks({
definitions,
});
const partitions = await taskPartitioner.getPartitions();
if (partitions.length === 0) {
if (
partitions.length === 0 &&
(lastPartitionWarningLog == null ||
lastPartitionWarningLog <= Date.now() - NO_ASSIGNED_PARTITIONS_WARNING_INTERVAL)
) {
logger.warn(
`Background task node "${taskPartitioner.getPodName()}" has no assigned partitions, claiming against all partitions`
);
lastPartitionWarningLog = Date.now();
}
if (partitions.length !== 0 && lastPartitionWarningLog) {
lastPartitionWarningLog = undefined;
logger.info(
`Background task node "${taskPartitioner.getPodName()}" now claiming with assigned partitions`
);
}
const sort: NonNullable<SearchOpts['sort']> = getClaimSort(definitions);