[8.x] [DOCS] Add minimal task manager health APIs (#213862) (#215987)

# Backport

This will backport the following commits from `main` to `8.x`:
- [[DOCS] Add minimal task manager health APIs
(#213862)](https://github.com/elastic/kibana/pull/213862)

<!--- Backport version: 9.6.4 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sorenlouv/backport)

<!--BACKPORT [{"author":{"name":"Lisa
Cawley","email":"lcawley@elastic.co"},"sourceCommit":{"committedDate":"2025-03-26T00:04:59Z","message":"[DOCS]
Add minimal task manager health APIs
(#213862)","sha":"c9bfa082a074a8bd1937acc5dc57d82ace306496","branchLabelMapping":{"^v9.1.0$":"main","^v8.19.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","Feature:Task
Manager","Team:ResponseOps","docs","backport:version","v9.1.0","v8.19.0","v8.18.1","v9.0.1"],"title":"[DOCS]
Add minimal task manager health
APIs","number":213862,"url":"https://github.com/elastic/kibana/pull/213862","mergeCommit":{"message":"[DOCS]
Add minimal task manager health APIs
(#213862)","sha":"c9bfa082a074a8bd1937acc5dc57d82ace306496"}},"sourceBranch":"main","suggestedTargetBranches":["8.x","8.18","9.0"],"targetPullRequestStates":[{"branch":"main","label":"v9.1.0","branchLabelMappingKey":"^v9.1.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/213862","number":213862,"mergeCommit":{"message":"[DOCS]
Add minimal task manager health APIs
(#213862)","sha":"c9bfa082a074a8bd1937acc5dc57d82ace306496"}},{"branch":"8.x","label":"v8.19.0","branchLabelMappingKey":"^v8.19.0$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"8.18","label":"v8.18.1","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"9.0","label":"v9.0.1","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Lisa Cawley 2025-03-26 01:49:05 -07:00 committed by GitHub
parent f417bd1ec0
commit 3be9169960
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 6253 additions and 0 deletions

View file

@ -151,6 +151,11 @@ tags:
x-displayName: System
description: |
Get information about the system status, resource usage, and installed plugins.
- externalDocs:
description: Task manager
url: https://www.elastic.co/guide/en/kibana/current/task-manager-production-considerations.html
name: task manager
x-displayName: Task manager
paths:
/api/actions/connector_types:
get:
@ -40310,6 +40315,24 @@ paths:
tags:
- streams
x-state: Technical Preview
/api/task_manager/_health:
get:
description: |
Get the health status of the Kibana task manager.
operationId: task-manager-health
responses:
'200':
content:
application/json:
examples:
taskManagerHealthResponse1:
$ref: '#/components/examples/Task_manager_health_Serverless_APIs_health_200response_serverless'
schema:
$ref: '#/components/schemas/Task_manager_health_Serverless_APIs_health_response_serverless'
description: Indicates a successful call
summary: Get the task manager health
tags:
- task manager
/api/timeline:
delete:
description: Delete one or more Timelines or Timeline templates.
@ -43036,6 +43059,305 @@ components:
icon: indexPatternApp
title: Kibana Sample Data Logs
type: index-pattern
Task_manager_health_Serverless_APIs_health_200response_serverless:
description: A successful response from `GET api/task_manager/_health`.
value: |-
{
"id": "b44483e1-3ba2-4f28-93d0-1d96c69c32c1",
"timestamp": "2025-03-21T21:49:50.409Z",
"status": "OK",
"last_update": "2025-03-21T21:48:53.996Z",
"stats": {
"configuration": {
"timestamp": "2025-03-21T21:47:51.663Z",
"value": {
"request_capacity": 1000,
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_running_average_window": 50,
"monitored_task_execution_thresholds": {
"custom": {},
"default": {
"error_threshold": 90,
"warn_threshold": 80
}
},
"claim_strategy": "mget",
"poll_interval": 500,
"capacity": {
"config": 10,
"as_workers": 10,
"as_cost": 20
}
},
"status": "OK"
},
"workload": {
"timestamp": "2025-03-21T21:48:53.996Z",
"value": {
"count": 21,
"cost": 42,
"task_types": {
"Fleet-Metrics-Task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Logger": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Sender": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"ML:saved-objects-sync": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions:connector_usage_reporting": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_health_check": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerts_invalidate_api_keys": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"cases-telemetry-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"dashboard_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:automatic-agent-upgrade-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:check-deleted-files-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:delete-unenrolled-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:sync-integrations-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:unenroll-inactive-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:upgrade-agentless-deployments-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"logs-data-telemetry": {
"count": 1,
"cost": 2,
"status": {
"running": 1
}
},
"session_cleanup": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:delete_inactive_background_task_nodes": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:mark_removed_tasks_as_unrecognized": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
}
},
"non_recurring": 1,
"non_recurring_cost": 2,
"schedule": [
[
"1m",
2
],
[
"5m",
2
],
[
"10m",
1
],
[
"15m",
1
],
[
"30m",
1
],
[
"1h",
5
],
[
"3600s",
1
],
[
"60m",
1
],
[
"720m",
1
],
[
"1d",
4
],
[
"1440m",
1
]
],
"overdue": 0,
"overdue_cost": 0,
"overdue_non_recurring": 0,
"estimated_schedule_density": [
0,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"capacity_requirements": {
"per_minute": 2,
"per_hour": 43,
"per_day": 7
}
},
"status": "OK"
}
}
}
get_connector_types_generativeai_response:
summary: A list of connector types for the `generativeAI` feature.
value:
@ -57570,6 +57892,33 @@ components:
$ref: '#/components/schemas/SLOs_time_window'
title: Update SLO request
type: object
Task_manager_health_Serverless_APIs_configuration:
description: |
This object summarizes the current configuration of Task Manager. This includes dynamic configurations that change over time, such as `poll_interval` and `max_workers`, which can adjust in reaction to changing load on the system.
type: object
Task_manager_health_Serverless_APIs_health_response_serverless:
title: Task health response properties
type: object
properties:
id:
type: string
last_update:
type: string
stats:
type: object
properties:
configuration:
$ref: '#/components/schemas/Task_manager_health_Serverless_APIs_configuration'
workload:
$ref: '#/components/schemas/Task_manager_health_Serverless_APIs_workload'
status:
type: string
timestamp:
type: string
Task_manager_health_Serverless_APIs_workload:
description: |
This object summarizes the work load across the cluster, including the tasks in the system, their types, and current status.
type: object
bedrock_config:
title: Connector request properties for an Amazon Bedrock connector
description: Defines properties for connectors when type is `.bedrock`.

File diff suppressed because it is too large Load diff

View file

@ -25,6 +25,7 @@ const { REPO_ROOT } = require('@kbn/repo-info');
`${REPO_ROOT}/x-pack/platform/plugins/shared/security/docs/openapi/user_session_apis.yaml`,
`${REPO_ROOT}/src/platform/plugins/shared/share/docs/openapi/short_url_apis.yaml`,
`${REPO_ROOT}/x-pack/platform/plugins/private/logstash/docs/openapi/logstash_apis.yaml`,
`${REPO_ROOT}/x-pack/platform/plugins/shared/task_manager/docs/openapi/bundled.yaml`,
// Observability Solution
`${REPO_ROOT}/x-pack/solutions/observability/plugins/apm/docs/openapi/apm/bundled.yaml`,

View file

@ -0,0 +1,21 @@
# OpenAPI (Experimental)
The current self-contained spec file can be used for online tools like those found at https://openapi.tools/. This spec is experimental and may be incomplete or change later.
A guide about the openApi specification can be found at [https://swagger.io/docs/specification/about/](https://swagger.io/docs/specification/about/).
## The `openapi` folder
* `entrypoint*.yaml` are the overview files that pull together all the components and examples.
* `components`: Reusable components
## Tools
Generate the `bundled` files by running the following commands:
```bash
npx @redocly/cli bundle entrypoint.yaml --output bundled.yaml --ext yaml
npx @redocly/cli bundle entrypoint_serverless.yaml --output bundled_serverless.yaml --ext yaml
```
These files are joined with the rest of the Kibana APIs per `oas_docs/README.md`

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,364 @@
openapi: 3.0.3
info:
title: Task manager health Serverless APIs
description: Kibana APIs for the task manager feature
version: 1.0.0
license:
name: Elastic License 2.0
url: https://www.elastic.co/licensing/elastic-license
servers:
- url: /
tags:
- name: task manager
x-displayName: Task manager
externalDocs:
url: https://www.elastic.co/guide/en/kibana/current/task-manager-production-considerations.html
description: Task manager
paths:
/api/task_manager/_health:
get:
summary: Get the task manager health
description: |
Get the health status of the Kibana task manager.
operationId: task-manager-health
tags:
- task manager
responses:
'200':
description: Indicates a successful call
content:
application/json:
schema:
$ref: '#/components/schemas/health_response_serverless'
examples:
taskManagerHealthResponse1:
$ref: '#/components/examples/health_200response_serverless'
components:
schemas:
configuration:
type: object
description: |
This object summarizes the current configuration of Task Manager. This includes dynamic configurations that change over time, such as `poll_interval` and `max_workers`, which can adjust in reaction to changing load on the system.
workload:
type: object
description: |
This object summarizes the work load across the cluster, including the tasks in the system, their types, and current status.
health_response_serverless:
title: Task health response properties
type: object
properties:
id:
type: string
last_update:
type: string
stats:
type: object
properties:
configuration:
$ref: '#/components/schemas/configuration'
workload:
$ref: '#/components/schemas/workload'
status:
type: string
timestamp:
type: string
examples:
health_200response_serverless:
description: A successful response from `GET api/task_manager/_health`.
value: |-
{
"id": "b44483e1-3ba2-4f28-93d0-1d96c69c32c1",
"timestamp": "2025-03-21T21:49:50.409Z",
"status": "OK",
"last_update": "2025-03-21T21:48:53.996Z",
"stats": {
"configuration": {
"timestamp": "2025-03-21T21:47:51.663Z",
"value": {
"request_capacity": 1000,
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_running_average_window": 50,
"monitored_task_execution_thresholds": {
"custom": {},
"default": {
"error_threshold": 90,
"warn_threshold": 80
}
},
"claim_strategy": "mget",
"poll_interval": 500,
"capacity": {
"config": 10,
"as_workers": 10,
"as_cost": 20
}
},
"status": "OK"
},
"workload": {
"timestamp": "2025-03-21T21:48:53.996Z",
"value": {
"count": 21,
"cost": 42,
"task_types": {
"Fleet-Metrics-Task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Logger": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Sender": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"ML:saved-objects-sync": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions:connector_usage_reporting": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_health_check": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerts_invalidate_api_keys": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"cases-telemetry-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"dashboard_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:automatic-agent-upgrade-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:check-deleted-files-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:delete-unenrolled-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:sync-integrations-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:unenroll-inactive-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:upgrade-agentless-deployments-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"logs-data-telemetry": {
"count": 1,
"cost": 2,
"status": {
"running": 1
}
},
"session_cleanup": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:delete_inactive_background_task_nodes": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:mark_removed_tasks_as_unrecognized": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
}
},
"non_recurring": 1,
"non_recurring_cost": 2,
"schedule": [
[
"1m",
2
],
[
"5m",
2
],
[
"10m",
1
],
[
"15m",
1
],
[
"30m",
1
],
[
"1h",
5
],
[
"3600s",
1
],
[
"60m",
1
],
[
"720m",
1
],
[
"1d",
4
],
[
"1440m",
1
]
],
"overdue": 0,
"overdue_cost": 0,
"overdue_non_recurring": 0,
"estimated_schedule_density": [
0,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"capacity_requirements": {
"per_minute": 2,
"per_hour": 43,
"per_day": 7
}
},
"status": "OK"
}
}
}

View file

@ -0,0 +1,299 @@
# summary:
description: A successful response from `GET api/task_manager/_health`.
value: |-
{
"id": "b44483e1-3ba2-4f28-93d0-1d96c69c32c1",
"timestamp": "2025-03-21T21:49:50.409Z",
"status": "OK",
"last_update": "2025-03-21T21:48:53.996Z",
"stats": {
"configuration": {
"timestamp": "2025-03-21T21:47:51.663Z",
"value": {
"request_capacity": 1000,
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_running_average_window": 50,
"monitored_task_execution_thresholds": {
"custom": {},
"default": {
"error_threshold": 90,
"warn_threshold": 80
}
},
"claim_strategy": "mget",
"poll_interval": 500,
"capacity": {
"config": 10,
"as_workers": 10,
"as_cost": 20
}
},
"status": "OK"
},
"workload": {
"timestamp": "2025-03-21T21:48:53.996Z",
"value": {
"count": 21,
"cost": 42,
"task_types": {
"Fleet-Metrics-Task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Logger": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Sender": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"ML:saved-objects-sync": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions:connector_usage_reporting": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_health_check": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerts_invalidate_api_keys": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"cases-telemetry-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"dashboard_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:automatic-agent-upgrade-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:check-deleted-files-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:delete-unenrolled-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:sync-integrations-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:unenroll-inactive-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:upgrade-agentless-deployments-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"logs-data-telemetry": {
"count": 1,
"cost": 2,
"status": {
"running": 1
}
},
"session_cleanup": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:delete_inactive_background_task_nodes": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:mark_removed_tasks_as_unrecognized": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
}
},
"non_recurring": 1,
"non_recurring_cost": 2,
"schedule": [
[
"1m",
2
],
[
"5m",
2
],
[
"10m",
1
],
[
"15m",
1
],
[
"30m",
1
],
[
"1h",
5
],
[
"3600s",
1
],
[
"60m",
1
],
[
"720m",
1
],
[
"1d",
4
],
[
"1440m",
1
]
],
"overdue": 0,
"overdue_cost": 0,
"overdue_non_recurring": 0,
"estimated_schedule_density": [
0,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"capacity_requirements": {
"per_minute": 2,
"per_hour": 43,
"per_day": 7
}
},
"status": "OK"
}
}
}

View file

@ -0,0 +1,4 @@
type: object
description: >
This object summarizes the current configuration of Task Manager.
This includes dynamic configurations that change over time, such as `poll_interval` and `max_workers`, which can adjust in reaction to changing load on the system.

View file

@ -0,0 +1,27 @@
title: Task health response properties
type: object
properties:
id:
type: string
last_update:
type: string
stats:
type: object
properties:
capacity_estimation:
type: object
description: >
This object provides a rough estimate about the sufficiency of its capacity.
These are estimates based on historical data and should not be used as predictions.
configuration:
$ref: 'configuration.yaml'
runtime:
type: object
description: >
This object tracks runtime performance of Task Manager, tracking task drift, worker load, and stats broken down by type, including duration and run results.
workload:
$ref: 'workload.yaml'
status:
type: string
timestamp:
type: string

View file

@ -0,0 +1,18 @@
title: Task health response properties
type: object
properties:
id:
type: string
last_update:
type: string
stats:
type: object
properties:
configuration:
$ref: 'configuration.yaml'
workload:
$ref: 'workload.yaml'
status:
type: string
timestamp:
type: string

View file

@ -0,0 +1,3 @@
type: object
description: >
This object summarizes the work load across the cluster, including the tasks in the system, their types, and current status.

View file

@ -0,0 +1,36 @@
openapi: 3.0.3
info:
title: Task manager health APIs
description: Kibana APIs for the task manager feature
version: 1.0.0
license:
name: Elastic License 2.0
url: https://www.elastic.co/licensing/elastic-license
tags:
- name: task manager
x-displayName: Task manager
externalDocs:
url: >-
https://www.elastic.co/guide/en/kibana/current/task-manager-production-considerations.html
description: Task manager
servers:
- url: /
paths:
/api/task_manager/_health:
get:
summary: Get the task manager health
description: |
Get the health status of the Kibana task manager.
operationId: task-manager-health
tags:
- task manager
responses:
'200':
description: Indicates a successful call
content:
application/json:
schema:
$ref: 'components/schemas/health_response.yaml'
examples:
taskManagerHealthResponse1:
$ref: 'components/examples/health_200response.yaml'

View file

@ -0,0 +1,36 @@
openapi: 3.0.3
info:
title: Task manager health Serverless APIs
description: Kibana APIs for the task manager feature
version: 1.0.0
license:
name: Elastic License 2.0
url: https://www.elastic.co/licensing/elastic-license
tags:
- name: task manager
x-displayName: Task manager
externalDocs:
url: >-
https://www.elastic.co/guide/en/kibana/current/task-manager-production-considerations.html
description: Task manager
servers:
- url: /
paths:
/api/task_manager/_health:
get:
summary: Get the task manager health
description: |
Get the health status of the Kibana task manager.
operationId: task-manager-health
tags:
- task manager
responses:
'200':
description: Indicates a successful call
content:
application/json:
schema:
$ref: 'components/schemas/health_response_serverless.yaml'
examples:
taskManagerHealthResponse1:
$ref: 'components/examples/health_200response_serverless.yaml'

File diff suppressed because it is too large Load diff