[9.0] [DOCS] Add minimal task manager health APIs (#213862) (#215986)

# Backport

This will backport the following commits from `main` to `9.0`:
- [[DOCS] Add minimal task manager health APIs
(#213862)](https://github.com/elastic/kibana/pull/213862)

<!--- Backport version: 9.6.4 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sorenlouv/backport)

<!--BACKPORT [{"author":{"name":"Lisa
Cawley","email":"lcawley@elastic.co"},"sourceCommit":{"committedDate":"2025-03-26T00:04:59Z","message":"[DOCS]
Add minimal task manager health APIs
(#213862)","sha":"c9bfa082a074a8bd1937acc5dc57d82ace306496","branchLabelMapping":{"^v9.1.0$":"main","^v8.19.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","Feature:Task
Manager","Team:ResponseOps","docs","backport:version","v9.1.0","v8.19.0","v8.18.1","v9.0.1"],"title":"[DOCS]
Add minimal task manager health
APIs","number":213862,"url":"https://github.com/elastic/kibana/pull/213862","mergeCommit":{"message":"[DOCS]
Add minimal task manager health APIs
(#213862)","sha":"c9bfa082a074a8bd1937acc5dc57d82ace306496"}},"sourceBranch":"main","suggestedTargetBranches":["8.x","8.18","9.0"],"targetPullRequestStates":[{"branch":"main","label":"v9.1.0","branchLabelMappingKey":"^v9.1.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/213862","number":213862,"mergeCommit":{"message":"[DOCS]
Add minimal task manager health APIs
(#213862)","sha":"c9bfa082a074a8bd1937acc5dc57d82ace306496"}},{"branch":"8.x","label":"v8.19.0","branchLabelMappingKey":"^v8.19.0$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"8.18","label":"v8.18.1","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"9.0","label":"v9.0.1","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

---------

Co-authored-by: kibanamachine <42973632+kibanamachine@users.noreply.github.com>
This commit is contained in:
Lisa Cawley 2025-03-26 01:48:35 -07:00 committed by GitHub
parent 009545dc8e
commit 07725fcc21
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 8885 additions and 1 deletions

View file

@ -28,7 +28,7 @@ merge-api-docs-stateful: ## Merge only kibana.yaml
.PHONY: api-docs-lint
api-docs-lint: ## Run redocly API docs linter
@npx @redocly/cli lint "output/*.yaml" --config "linters/redocly.yaml" --format stylish --max-problems 500
@npx @redocly/cli lint "output/kibana.yaml" --config "linters/redocly.yaml" --format stylish --max-problems 500
.PHONY: api-docs-lint-stateful
api-docs-lint-stateful: ## Run redocly API docs linter on kibana.yaml

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -25,6 +25,7 @@ const { REPO_ROOT } = require('@kbn/repo-info');
`${REPO_ROOT}/x-pack/platform/plugins/shared/security/docs/openapi/user_session_apis.yaml`,
`${REPO_ROOT}/src/platform/plugins/shared/share/docs/openapi/short_url_apis.yaml`,
`${REPO_ROOT}/x-pack/platform/plugins/private/logstash/docs/openapi/logstash_apis.yaml`,
`${REPO_ROOT}/x-pack/platform/plugins/shared/task_manager/docs/openapi/bundled.yaml`,
// Observability Solution
`${REPO_ROOT}/x-pack/solutions/observability/plugins/apm/docs/openapi/apm/bundled.yaml`,

View file

@ -18,6 +18,7 @@ const { REPO_ROOT } = require('@kbn/repo-info');
`${REPO_ROOT}/src/platform/plugins/shared/data_views/docs/openapi/bundled.yaml`,
`${REPO_ROOT}/x-pack/platform/plugins/shared/ml/common/openapi/ml_apis_serverless.yaml`,
`${REPO_ROOT}/src/core/packages/saved-objects/docs/openapi/bundled_serverless.yaml`,
`${REPO_ROOT}/x-pack/platform/plugins/shared/task_manager/docs/openapi/bundled_serverless.yaml`,
// Observability Solution
`${REPO_ROOT}/x-pack/solutions/observability/plugins/apm/docs/openapi/apm/bundled.yaml`,

View file

@ -0,0 +1,21 @@
# OpenAPI (Experimental)
The current self-contained spec file can be used for online tools like those found at https://openapi.tools/. This spec is experimental and may be incomplete or change later.
A guide about the openApi specification can be found at [https://swagger.io/docs/specification/about/](https://swagger.io/docs/specification/about/).
## The `openapi` folder
* `entrypoint*.yaml` are the overview files that pull together all the components and examples.
* `components`: Reusable components
## Tools
Generate the `bundled` files by running the following commands:
```bash
npx @redocly/cli bundle entrypoint.yaml --output bundled.yaml --ext yaml
npx @redocly/cli bundle entrypoint_serverless.yaml --output bundled_serverless.yaml --ext yaml
```
These files are joined with the rest of the Kibana APIs per `oas_docs/README.md`

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,364 @@
openapi: 3.0.3
info:
title: Task manager health Serverless APIs
description: Kibana APIs for the task manager feature
version: 1.0.0
license:
name: Elastic License 2.0
url: https://www.elastic.co/licensing/elastic-license
servers:
- url: /
tags:
- name: task manager
x-displayName: Task manager
externalDocs:
url: https://www.elastic.co/guide/en/kibana/current/task-manager-production-considerations.html
description: Task manager
paths:
/api/task_manager/_health:
get:
summary: Get the task manager health
description: |
Get the health status of the Kibana task manager.
operationId: task-manager-health
tags:
- task manager
responses:
'200':
description: Indicates a successful call
content:
application/json:
schema:
$ref: '#/components/schemas/health_response_serverless'
examples:
taskManagerHealthResponse1:
$ref: '#/components/examples/health_200response_serverless'
components:
schemas:
configuration:
type: object
description: |
This object summarizes the current configuration of Task Manager. This includes dynamic configurations that change over time, such as `poll_interval` and `max_workers`, which can adjust in reaction to changing load on the system.
workload:
type: object
description: |
This object summarizes the work load across the cluster, including the tasks in the system, their types, and current status.
health_response_serverless:
title: Task health response properties
type: object
properties:
id:
type: string
last_update:
type: string
stats:
type: object
properties:
configuration:
$ref: '#/components/schemas/configuration'
workload:
$ref: '#/components/schemas/workload'
status:
type: string
timestamp:
type: string
examples:
health_200response_serverless:
description: A successful response from `GET api/task_manager/_health`.
value: |-
{
"id": "b44483e1-3ba2-4f28-93d0-1d96c69c32c1",
"timestamp": "2025-03-21T21:49:50.409Z",
"status": "OK",
"last_update": "2025-03-21T21:48:53.996Z",
"stats": {
"configuration": {
"timestamp": "2025-03-21T21:47:51.663Z",
"value": {
"request_capacity": 1000,
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_running_average_window": 50,
"monitored_task_execution_thresholds": {
"custom": {},
"default": {
"error_threshold": 90,
"warn_threshold": 80
}
},
"claim_strategy": "mget",
"poll_interval": 500,
"capacity": {
"config": 10,
"as_workers": 10,
"as_cost": 20
}
},
"status": "OK"
},
"workload": {
"timestamp": "2025-03-21T21:48:53.996Z",
"value": {
"count": 21,
"cost": 42,
"task_types": {
"Fleet-Metrics-Task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Logger": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Sender": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"ML:saved-objects-sync": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions:connector_usage_reporting": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_health_check": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerts_invalidate_api_keys": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"cases-telemetry-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"dashboard_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:automatic-agent-upgrade-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:check-deleted-files-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:delete-unenrolled-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:sync-integrations-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:unenroll-inactive-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:upgrade-agentless-deployments-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"logs-data-telemetry": {
"count": 1,
"cost": 2,
"status": {
"running": 1
}
},
"session_cleanup": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:delete_inactive_background_task_nodes": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:mark_removed_tasks_as_unrecognized": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
}
},
"non_recurring": 1,
"non_recurring_cost": 2,
"schedule": [
[
"1m",
2
],
[
"5m",
2
],
[
"10m",
1
],
[
"15m",
1
],
[
"30m",
1
],
[
"1h",
5
],
[
"3600s",
1
],
[
"60m",
1
],
[
"720m",
1
],
[
"1d",
4
],
[
"1440m",
1
]
],
"overdue": 0,
"overdue_cost": 0,
"overdue_non_recurring": 0,
"estimated_schedule_density": [
0,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"capacity_requirements": {
"per_minute": 2,
"per_hour": 43,
"per_day": 7
}
},
"status": "OK"
}
}
}

View file

@ -0,0 +1,299 @@
# summary:
description: A successful response from `GET api/task_manager/_health`.
value: |-
{
"id": "b44483e1-3ba2-4f28-93d0-1d96c69c32c1",
"timestamp": "2025-03-21T21:49:50.409Z",
"status": "OK",
"last_update": "2025-03-21T21:48:53.996Z",
"stats": {
"configuration": {
"timestamp": "2025-03-21T21:47:51.663Z",
"value": {
"request_capacity": 1000,
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_running_average_window": 50,
"monitored_task_execution_thresholds": {
"custom": {},
"default": {
"error_threshold": 90,
"warn_threshold": 80
}
},
"claim_strategy": "mget",
"poll_interval": 500,
"capacity": {
"config": 10,
"as_workers": 10,
"as_cost": 20
}
},
"status": "OK"
},
"workload": {
"timestamp": "2025-03-21T21:48:53.996Z",
"value": {
"count": 21,
"cost": 42,
"task_types": {
"Fleet-Metrics-Task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Logger": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"Fleet-Usage-Sender": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"ML:saved-objects-sync": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions:connector_usage_reporting": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"actions_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_health_check": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerting_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"alerts_invalidate_api_keys": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"cases-telemetry-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"dashboard_telemetry": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:automatic-agent-upgrade-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:check-deleted-files-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:delete-unenrolled-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:sync-integrations-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:unenroll-inactive-agents-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"fleet:upgrade-agentless-deployments-task": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"logs-data-telemetry": {
"count": 1,
"cost": 2,
"status": {
"running": 1
}
},
"session_cleanup": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:delete_inactive_background_task_nodes": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
},
"task_manager:mark_removed_tasks_as_unrecognized": {
"count": 1,
"cost": 2,
"status": {
"idle": 1
}
}
},
"non_recurring": 1,
"non_recurring_cost": 2,
"schedule": [
[
"1m",
2
],
[
"5m",
2
],
[
"10m",
1
],
[
"15m",
1
],
[
"30m",
1
],
[
"1h",
5
],
[
"3600s",
1
],
[
"60m",
1
],
[
"720m",
1
],
[
"1d",
4
],
[
"1440m",
1
]
],
"overdue": 0,
"overdue_cost": 0,
"overdue_non_recurring": 0,
"estimated_schedule_density": [
0,
0,
1,
0,
0,
0,
0,
1,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
],
"capacity_requirements": {
"per_minute": 2,
"per_hour": 43,
"per_day": 7
}
},
"status": "OK"
}
}
}

View file

@ -0,0 +1,4 @@
type: object
description: >
This object summarizes the current configuration of Task Manager.
This includes dynamic configurations that change over time, such as `poll_interval` and `max_workers`, which can adjust in reaction to changing load on the system.

View file

@ -0,0 +1,27 @@
title: Task health response properties
type: object
properties:
id:
type: string
last_update:
type: string
stats:
type: object
properties:
capacity_estimation:
type: object
description: >
This object provides a rough estimate about the sufficiency of its capacity.
These are estimates based on historical data and should not be used as predictions.
configuration:
$ref: 'configuration.yaml'
runtime:
type: object
description: >
This object tracks runtime performance of Task Manager, tracking task drift, worker load, and stats broken down by type, including duration and run results.
workload:
$ref: 'workload.yaml'
status:
type: string
timestamp:
type: string

View file

@ -0,0 +1,18 @@
title: Task health response properties
type: object
properties:
id:
type: string
last_update:
type: string
stats:
type: object
properties:
configuration:
$ref: 'configuration.yaml'
workload:
$ref: 'workload.yaml'
status:
type: string
timestamp:
type: string

View file

@ -0,0 +1,3 @@
type: object
description: >
This object summarizes the work load across the cluster, including the tasks in the system, their types, and current status.

View file

@ -0,0 +1,36 @@
openapi: 3.0.3
info:
title: Task manager health APIs
description: Kibana APIs for the task manager feature
version: 1.0.0
license:
name: Elastic License 2.0
url: https://www.elastic.co/licensing/elastic-license
tags:
- name: task manager
x-displayName: Task manager
externalDocs:
url: >-
https://www.elastic.co/guide/en/kibana/current/task-manager-production-considerations.html
description: Task manager
servers:
- url: /
paths:
/api/task_manager/_health:
get:
summary: Get the task manager health
description: |
Get the health status of the Kibana task manager.
operationId: task-manager-health
tags:
- task manager
responses:
'200':
description: Indicates a successful call
content:
application/json:
schema:
$ref: 'components/schemas/health_response.yaml'
examples:
taskManagerHealthResponse1:
$ref: 'components/examples/health_200response.yaml'

View file

@ -0,0 +1,36 @@
openapi: 3.0.3
info:
title: Task manager health Serverless APIs
description: Kibana APIs for the task manager feature
version: 1.0.0
license:
name: Elastic License 2.0
url: https://www.elastic.co/licensing/elastic-license
tags:
- name: task manager
x-displayName: Task manager
externalDocs:
url: >-
https://www.elastic.co/guide/en/kibana/current/task-manager-production-considerations.html
description: Task manager
servers:
- url: /
paths:
/api/task_manager/_health:
get:
summary: Get the task manager health
description: |
Get the health status of the Kibana task manager.
operationId: task-manager-health
tags:
- task manager
responses:
'200':
description: Indicates a successful call
content:
application/json:
schema:
$ref: 'components/schemas/health_response_serverless.yaml'
examples:
taskManagerHealthResponse1:
$ref: 'components/examples/health_200response_serverless.yaml'

File diff suppressed because it is too large Load diff