From 6b452c79ae2947e91c613dbc1417cc60f1aba157 Mon Sep 17 00:00:00 2001 From: Mykola Harmash Date: Thu, 26 Jun 2025 15:28:54 +0200 Subject: [PATCH] [Oblt Onboarding] Add more errors logging for Auto Detect flow (#222948) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes https://github.com/elastic/kibana/issues/218433 This change adds more descriptive error messages inside telemetry events in the auto detect onboarding flow. The extended error message be shown to the user as well. * New telemetry event for the case when user is not running the script as `root` * If Agent fails to download, bot the original error from `curl` and the download URL will be logged * If Agent fails to extract, error message from `tar` command will be logged * If Agent fails to install, error message from `elastic-agent install` command will be logged * Error response from the integrations install endpoint will be logged Example telemetry event: ![CleanShot 2025-06-06 at 11 29 20@2x](https://github.com/user-attachments/assets/700a3d21-8d1d-43e0-b084-198da7871dec) Example terminal output: ![CleanShot 2025-06-05 at 15 10 38@2x](https://github.com/user-attachments/assets/702e641e-ab4d-4820-8f4d-e551dde435e8) ## How to test In order to simulate errors for all scenarios, you going to need to modify the `auto_detect.sh` script to make it intentionally fail, like adding extra character to the EA download URL. You can modify it directly in the source if you're running Kibana locally, or you can modify it after it was downloaded and before running it. To test just one use-case without the script manipulations, you can modify the EA download URL inside the code snippet provided by Kibana. **Whatever way you choose, still go though the flow and make sure the successful path works as expected.** You can check the telemetry events emitted from the flow on [the staging telemetry cluster](https://telemetry-v2-staging.elastic.dev/s/observability-ux/app/discover#/?_g=(filters:!(),refreshInterval:(pause:!t,value:60000),time:(from:now-24h%2Fh,to:now))&_a=(columns:!(),dataSource:(dataViewId:b4befdd3-9a15-40c2-be4d-3ac1084d6182,type:dataView),filters:!(),interval:auto,query:(language:kuery,query:'event_type%20:%20%22observability_onboarding%22'),sort:!(!('@timestamp',desc)))), but keep in mind that events delivered there with a delay of a couple of hours 😢 --------- Co-authored-by: Joe Reuter --- .../public/assets/auto_detect.sh | 45 ++++++++++--------- .../server/routes/flow/route.ts | 10 ++++- .../onboarding/update_progress.ts | 5 ++- 3 files changed, 35 insertions(+), 25 deletions(-) diff --git a/x-pack/solutions/observability/plugins/observability_onboarding/public/assets/auto_detect.sh b/x-pack/solutions/observability/plugins/observability_onboarding/public/assets/auto_detect.sh index d9b41180cf91..a80e76108110 100755 --- a/x-pack/solutions/observability/plugins/observability_onboarding/public/assets/auto_detect.sh +++ b/x-pack/solutions/observability/plugins/observability_onboarding/public/assets/auto_detect.sh @@ -1,7 +1,7 @@ #!/bin/bash fail() { - printf "%s\n" "$@" >&2 + printf "\n\033[33m%s\e[0m\n\e[2m%s\e[0m\n" "$1" "$2" >&2 exit 1 } @@ -46,11 +46,6 @@ ensure_argument() { fi } -if [ "$EUID" -ne 0 ]; then - echo "Error: This script must be run as root." - help -fi - # Parse command line arguments for i in "$@"; do case $i in @@ -97,13 +92,14 @@ update_step_progress() { local PAYLOAD=${4:-} local data="" - MESSAGE=$(echo "$MESSAGE" | sed 's/"/\\"/g') + MESSAGE=$(printf "%s" "$MESSAGE" | base64 --wrap=0) if [ -z "$PAYLOAD" ]; then data="{\"status\":\"${STATUS}\", \"message\":\"${MESSAGE}\"}" else data="{\"status\":\"${STATUS}\", \"message\":\"${MESSAGE}\", \"payload\":${PAYLOAD}}" fi + curl --request POST \ --url "${kibana_api_endpoint}/internal/observability_onboarding/flow/${onboarding_flow_id}/step/${STEPNAME}" \ --header "Authorization: ApiKey ${install_api_key_encoded}" \ @@ -117,6 +113,12 @@ update_step_progress() { --fail } +if [ "$EUID" -ne 0 ]; then + echo "Error: This script must be run as root." + update_step_progress "logs-detect" "danger" "The user running the script doesn't have root privileges." + help +fi + update_step_progress "logs-detect" "initialize" known_integrations_list_string="" @@ -159,38 +161,39 @@ elastic_agent_artifact_name="elastic-agent-${elastic_agent_version}-${os}-${arch download_elastic_agent() { local download_url="https://artifacts.elastic.co/downloads/beats/elastic-agent/${elastic_agent_artifact_name}.tar.gz" rm -rf "./${elastic_agent_artifact_name}" "./${elastic_agent_artifact_name}.tar.gz" - curl -L -O "$download_url" --silent --fail + agent_download_result=$(curl -L -O "$download_url" --silent --show-error --fail 2>&1) + local download_exit_code=$? - if [ "$?" -eq 0 ]; then + if [ $download_exit_code -eq 0 ]; then printf "\e[32;1m✓\e[0m %s\n" "Elastic Agent downloaded to $(pwd)/$elastic_agent_artifact_name.tar.gz" update_step_progress "ea-download" "complete" else - update_step_progress "ea-download" "danger" "Failed to download Elastic Agent, see script output for error." - fail "Failed to download Elastic Agent" + update_step_progress "ea-download" "danger" "Failed to download Elastic Agent. Curl error: $agent_download_result.\nURL: $download_url" + fail "Failed to download Elastic Agent" "$agent_download_result" fi } extract_elastic_agent() { - tar -xzf "${elastic_agent_artifact_name}.tar.gz" + agent_extract_result=$(tar -xzf "${elastic_agent_artifact_name}.tar.gz" 2>&1) if [ "$?" -eq 0 ]; then printf "\e[32;1m✓\e[0m %s\n" "Archive extracted" update_step_progress "ea-extract" "complete" else - update_step_progress "ea-extract" "danger" "Failed to extract Elastic Agent, see script output for error." - fail "Failed to extract Elastic Agent" + update_step_progress "ea-extract" "danger" "Failed to extract Elastic Agent. Tar Error: $agent_extract_result" + fail "Failed to extract Elastic Agent" "$agent_extract_result" fi } install_elastic_agent() { - "./${elastic_agent_artifact_name}/elastic-agent" install -f -n >/dev/null + agent_install_result=$("./${elastic_agent_artifact_name}/elastic-agent" install -f -n 2>&1) if [ "$?" -eq 0 ]; then printf "\e[32;1m✓\e[0m %s\n" "Elastic Agent installed to $(dirname "$elastic_agent_config_path")" update_step_progress "ea-install" "complete" else - update_step_progress "ea-install" "danger" "Failed to install Elastic Agent, see script output for error." - fail "Failed to install Elastic Agent" + update_step_progress "ea-install" "danger" "Failed to install Elastic Agent. Elastic Agent install error: $agent_install_result" + fail "Failed to install Elastic Agent" "$agent_install_result" fi } @@ -292,7 +295,7 @@ install_integrations() { install_integrations_api_body_string+="$integration_name\tcustom\t$item\n" done - curl --request POST \ + install_integrations_result=$(curl --request POST \ --url "$kibana_api_endpoint/internal/observability_onboarding/flow/$onboarding_flow_id/integrations/install" \ --header "Authorization: ApiKey $install_api_key_encoded" \ --header "Content-Type: text/tab-separated-values" \ @@ -303,13 +306,13 @@ install_integrations() { --silent \ --show-error \ --fail \ - --output "$elastic_agent_tmp_config_path" + --output "$elastic_agent_tmp_config_path" 2>&1) if [ "$?" -eq 0 ]; then printf "\n\e[32;1m✓\e[0m %s\n" "Integrations installed" else - update_step_progress "install-integrations" "danger" "Failed to install integrations" - fail "Failed to install integrations" + update_step_progress "install-integrations" "danger" "Failed to install integrations.\nCurl error: $install_integrations_result.\nIntegrations: $install_integrations_api_body_string" + fail "Failed to install integrations" "$install_integrations_result" fi } diff --git a/x-pack/solutions/observability/plugins/observability_onboarding/server/routes/flow/route.ts b/x-pack/solutions/observability/plugins/observability_onboarding/server/routes/flow/route.ts index 054e0f683c32..d8b27f867f45 100644 --- a/x-pack/solutions/observability/plugins/observability_onboarding/server/routes/flow/route.ts +++ b/x-pack/solutions/observability/plugins/observability_onboarding/server/routes/flow/route.ts @@ -61,6 +61,12 @@ const stepProgressUpdateRoute = createObservabilityOnboardingServerRoute({ core, } = resources; + /** + * Message is base64 encoded as it might include arbitrary error messages + * from user's terminal containing special characters that would otherwise + * break the request. + */ + const decodedMessage = Buffer.from(message ?? '', 'base64').toString('utf-8'); const coreStart = await core.start(); const savedObjectsClient = coreStart.savedObjects.createInternalRepository(); @@ -88,7 +94,7 @@ const stepProgressUpdateRoute = createObservabilityOnboardingServerRoute({ ...observabilityOnboardingState.progress, [name]: { status, - message, + message: decodedMessage, payload, }, }, @@ -100,7 +106,7 @@ const stepProgressUpdateRoute = createObservabilityOnboardingServerRoute({ flow_id: id, step: name, step_status: status, - step_message: message, + step_message: decodedMessage, payload, }); diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/onboarding/update_progress.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/onboarding/update_progress.ts index 5bc1b5f3bf20..90b3775930dd 100644 --- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/onboarding/update_progress.ts +++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/onboarding/update_progress.ts @@ -93,10 +93,11 @@ export default function ({ getService }: DeploymentAgnosticFtrProviderContext) { }); it('updates step status with message', async () => { + const message = 'Download failed'; const step = { name: 'ea-download', status: 'danger', - message: 'Download failed', + message: Buffer.from(message, 'utf8').toString('base64'), }; const response = await adminClientWithAPIKey .post(`/internal/observability_onboarding/flow/${onboardingId}/step/${step.name}`) @@ -114,7 +115,7 @@ export default function ({ getService }: DeploymentAgnosticFtrProviderContext) { const stepProgress = savedState.attributes.progress?.[step.name]; expect(stepProgress).to.have.property('status', step.status); - expect(stepProgress).to.have.property('message', step.message); + expect(stepProgress).to.have.property('message', message); }); }); });