Add pipeline to run scalability tests from APM traces (#139548)

* add scalability pipeline to run scenarios from APM traces * update runner description * remove retry-all-errors curl flag * add flag for curl to ignore connection refused * override ES_HOST for gatling runner * exclude config from regular CI, remove bail flag * fix uploaded scalability traces * fix text and remove space * add validation in config * use functions instead of scripts * renaming var in loop * add step timeout * define functions before call * use trap for stopping ES * fix path for artifacts extraction * update serverArgs * add pre-build step * add pre-build step * use default pre-build step * delete step * print BUILDKITE_PIPELINE_SLUG * disable telemetry * remove log * enable telemetry * add step to upload test results * move trap after pid * upload test reports to gcs * fix script * Revert "fix script" This reverts commit 1c6bc3f45c. * Revert "upload test reports to gcs" This reverts commit c957a31c32.
2025-04-24 01:38:56 -04:00 · 2022-09-05 18:30:44 +02:00 · 2022-09-05 18:30:44 +02:00 · f0fe485e7d
commit f0fe485e7d
parent e461ad5cc3
6 changed files with 300 additions and 2 deletions
--- a/.buildkite/ftr_configs.yml
+++ b/.buildkite/ftr_configs.yml
@ -58,6 +58,9 @@ disabled:
  - x-pack/test/screenshot_creation/config.ts
  - x-pack/test/fleet_packages/config.ts

+  # Scalability testing config that we run in its own pipeline
+  - x-pack/test/performance/scalability/config.ts
+
 defaultQueue: 'n2-4-spot'
 enabled:
  - test/accessibility/config.ts
--- a/.buildkite/pipelines/scalability/daily.yml
+++ b/.buildkite/pipelines/scalability/daily.yml
@ -0,0 +1,23 @@
+steps:
+  - label: ':male-mechanic::skin-tone-2: Pre-Build'
+    command: .buildkite/scripts/lifecycle/pre_build.sh
+    agents:
+      queue: kibana-default
+    timeout_in_minutes: 10
+
+  - wait
+
+  - label: ':kibana: Scalability Tests'
+    command: .buildkite/scripts/steps/scalability/benchmarking.sh
+    agents:
+      queue: kb-static-scalability
+    timeout_in_minutes: 90
+
+  - wait: ~
+    continue_on_failure: true
+
+  - label: ':male_superhero::skin-tone-2: Post-Build'
+    command: .buildkite/scripts/lifecycle/post_build.sh
+    agents:
+      queue: kibana-default
+    timeout_in_minutes: 10
--- a/.buildkite/scripts/steps/functional/scalability_dataset_extraction.sh
+++ b/.buildkite/scripts/steps/functional/scalability_dataset_extraction.sh
@ -51,6 +51,6 @@ cd -

 echo "--- Promoting '${BUILD_ID}' dataset to LATEST"
 cd "${OUTPUT_DIR}/.."
-echo "${BUILD_ID}" > LATEST
-gsutil cp LATEST "${GCS_BUCKET}"
+echo "${BUILD_ID}" > latest
+gsutil cp latest "${GCS_BUCKET}"
 cd -
--- a/.buildkite/scripts/steps/scalability/benchmarking.sh
+++ b/.buildkite/scripts/steps/scalability/benchmarking.sh
@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+source .buildkite/scripts/common/util.sh
+
+#.buildkite/scripts/bootstrap.sh
+echo "--- yarn kbn reset && yarn kbn bootstrap"
+yarn kbn reset && yarn kbn bootstrap
+
+GCS_BUCKET="gs://kibana-performance/scalability-tests"
+GCS_ARTIFACTS_REL="gcs_artifacts"
+GCS_ARTIFACTS_DIR="${WORKSPACE}/${GCS_ARTIFACTS_REL}"
+KIBANA_LOAD_TESTING_DIR="${KIBANA_DIR}/kibana-load-testing"
+
+# These tests are running on static workers so we must delete previous build, load runner and scalability artifacts
+rm -rf "${KIBANA_BUILD_LOCATION}"
+rm -rf "${KIBANA_LOAD_TESTING_DIR}"
+rm -rf "${GCS_ARTIFACTS_DIR}"
+
+download_artifacts() {
+  mkdir -p "${GCS_ARTIFACTS_DIR}"
+
+  gsutil cp "$GCS_BUCKET/latest" "${GCS_ARTIFACTS_DIR}/"
+  HASH=`cat ${GCS_ARTIFACTS_DIR}/latest`
+  gsutil cp -r "$GCS_BUCKET/$HASH" "${GCS_ARTIFACTS_DIR}/"
+
+  export LATEST_RUN_ARTIFACTS_DIR="${GCS_ARTIFACTS_DIR}/${HASH}"
+
+  echo "Unzip kibana build, plugins and scalability traces"
+  cd "$WORKSPACE"
+  mkdir -p "$KIBANA_BUILD_LOCATION"
+  tar -xzf "${LATEST_RUN_ARTIFACTS_DIR}/kibana-default.tar.gz" -C "$KIBANA_BUILD_LOCATION" --strip=1
+
+  cd "$KIBANA_DIR"
+  tar -xzf "${LATEST_RUN_ARTIFACTS_DIR}/kibana-default-plugins.tar.gz"
+  tar -xzf "${LATEST_RUN_ARTIFACTS_DIR}/scalability_traces.tar.gz"
+}
+
+checkout_and_compile_load_runner() {
+  mkdir -p "${KIBANA_LOAD_TESTING_DIR}" && cd "${KIBANA_LOAD_TESTING_DIR}"
+
+  if [[ ! -d .git ]]; then
+    git init
+    git remote add origin https://github.com/elastic/kibana-load-testing.git
+  fi
+  git fetch origin --depth 1 "main"
+  git reset --hard FETCH_HEAD
+
+  KIBANA_LOAD_TESTING_GIT_COMMIT="$(git rev-parse HEAD)"
+  export KIBANA_LOAD_TESTING_GIT_COMMIT
+
+  mvn -q test-compile
+  echo "Set 'GATLING_PROJECT_PATH' env var for ScalabilityTestRunner"
+  export GATLING_PROJECT_PATH="$(pwd)"
+}
+
+upload_test_results() {
+  cd "${KIBANA_DIR}"
+  echo "--- Archive Gatling reports and upload as build artifacts"
+  tar -czf "scalability_test_report.tar.gz" --exclude=simulation.log -C kibana-load-testing/target gatling
+  buildkite-agent artifact upload "scalability_test_report.tar.gz"
+  cd "${LATEST_RUN_ARTIFACTS_DIR}"
+  echo "Upload scalability traces as build artifacts"
+  buildkite-agent artifact upload "scalability_traces.tar.gz"
+}
+
+echo "--- Download the latest artifacts from single user performance pipeline"
+download_artifacts
+
+echo "--- Clone kibana-load-testing repo and compile project"
+checkout_and_compile_load_runner
+
+echo "--- Run Scalability Tests with Elasticsearch started only once and Kibana restart before each journey"
+cd "$KIBANA_DIR"
+node scripts/es snapshot&
+
+esPid=$!
+# Set trap on EXIT to stop Elasticsearch process
+trap "kill -9 $esPid" EXIT
+
+# unset env vars defined in other parts of CI for automatic APM collection of
+# Kibana. We manage APM config in our FTR config and performance service, and
+# APM treats config in the ENV with a very high precedence.
+unset ELASTIC_APM_ENVIRONMENT
+unset ELASTIC_APM_TRANSACTION_SAMPLE_RATE
+unset ELASTIC_APM_SERVER_URL
+unset ELASTIC_APM_SECRET_TOKEN
+unset ELASTIC_APM_ACTIVE
+unset ELASTIC_APM_CONTEXT_PROPAGATION_ONLY
+unset ELASTIC_APM_GLOBAL_LABELS
+unset ELASTIC_APM_MAX_QUEUE_SIZE
+unset ELASTIC_APM_METRICS_INTERVAL
+unset ELASTIC_APM_CAPTURE_SPAN_STACK_TRACES
+unset ELASTIC_APM_BREAKDOWN_METRICS
+
+
+export TEST_ES_DISABLE_STARTUP=true
+ES_HOST="localhost:9200"
+export TEST_ES_URL="http://elastic:changeme@${ES_HOST}"
+# Overriding Gatling default configuration
+export ES_URL="http://${ES_HOST}"
+
+# Pings the ES server every second for 2 mins until its status is green
+curl --retry 120 \
+  --retry-delay 1 \
+  --retry-connrefused \
+  -I -XGET "${TEST_ES_URL}/_cluster/health?wait_for_nodes=>=1&wait_for_status=yellow"
+
+export ELASTIC_APM_ACTIVE=true
+
+for journey in scalability_traces/server/*; do
+    export SCALABILITY_JOURNEY_PATH="$KIBANA_DIR/$journey"
+    echo "--- Run scalability file: $SCALABILITY_JOURNEY_PATH"
+    node scripts/functional_tests \
+      --config x-pack/test/performance/scalability/config.ts \
+      --kibana-install-dir "$KIBANA_BUILD_LOCATION" \
+      --debug
+done
+
+echo "--- Upload test results"
+upload_test_results
--- a/x-pack/test/performance/scalability/config.ts
+++ b/x-pack/test/performance/scalability/config.ts
@ -0,0 +1,109 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { FtrConfigProviderContext } from '@kbn/test';
+import fs from 'fs';
+import path from 'path';
+import { REPO_ROOT } from '@kbn/utils';
+import { createFlagError } from '@kbn/dev-cli-errors';
+import { serializeApmGlobalLabels } from '../utils';
+import { ScalabilityTestRunner } from './runner';
+import { FtrProviderContext } from '../ftr_provider_context';
+
+// These "secret" values are intentionally written in the source.
+const APM_SERVER_URL = 'https://142fea2d3047486e925eb8b223559cae.apm.europe-west1.gcp.cloud.es.io';
+const APM_PUBLIC_TOKEN = 'pWFFEym07AKBBhUE2i';
+const AGGS_SHARD_DELAY = process.env.LOAD_TESTING_SHARD_DELAY;
+const DISABLE_PLUGINS = process.env.LOAD_TESTING_DISABLE_PLUGINS;
+const scalabilityJsonPath = process.env.SCALABILITY_JOURNEY_PATH;
+const gatlingProjectRootPath: string =
+  process.env.GATLING_PROJECT_PATH || path.resolve(REPO_ROOT, '../kibana-load-testing');
+
+const readScalabilityJourney = (filePath: string): ScalabilityJourney => {
+  if (path.extname(filePath) !== '.json') {
+    throw createFlagError(`Path to scalability journey json is non-json file: '${filePath}'`);
+  }
+  try {
+    return JSON.parse(fs.readFileSync(filePath, 'utf-8'));
+  } catch (error) {
+    if (error.code === 'ENOENT') {
+      throw createFlagError(`Path to scalability journey json is invalid: ${filePath}`);
+    }
+    throw createFlagError(`Invalid JSON provided: '${filePath}', ${error}`);
+  }
+};
+
+interface ScalabilityJourney {
+  journeyName: string;
+}
+
+export default async function ({ readConfigFile }: FtrConfigProviderContext) {
+  const performanceConfig = await readConfigFile(require.resolve('../journeys/base.config.ts'));
+
+  if (!fs.existsSync(gatlingProjectRootPath)) {
+    throw createFlagError(
+      `Incorrect path to load testing project: '${gatlingProjectRootPath}'\n
+    Clone 'elastic/kibana-load-testing' and set path using 'GATLING_PROJECT_PATH' env var`
+    );
+  }
+
+  if (!scalabilityJsonPath) {
+    throw createFlagError(
+      `Set path to scalability journey json using 'SCALABILITY_JOURNEY_PATH' env var`
+    );
+  }
+  const scalabilityJourney = readScalabilityJourney(scalabilityJsonPath);
+
+  const apmGlobalLabels = {
+    ...performanceConfig.get('kbnTestServer').env.ELASTIC_APM_GLOBAL_LABELS,
+    journeyFilePath: path.basename(scalabilityJsonPath),
+    journeyName: scalabilityJourney.journeyName,
+  };
+
+  return {
+    ...performanceConfig.getAll(),
+
+    testRunner: (context: FtrProviderContext) =>
+      ScalabilityTestRunner(context, scalabilityJsonPath, gatlingProjectRootPath),
+
+    esTestCluster: {
+      ...performanceConfig.get('esTestCluster'),
+      serverArgs: [...performanceConfig.get('esTestCluster.serverArgs')],
+      esJavaOpts: '-Xms8g -Xmx8g',
+    },
+
+    kbnTestServer: {
+      ...performanceConfig.get('kbnTestServer'),
+      sourceArgs: [
+        ...performanceConfig.get('kbnTestServer.sourceArgs'),
+        '--no-base-path',
+        '--env.name=development',
+        ...(!!AGGS_SHARD_DELAY ? ['--data.search.aggs.shardDelay.enabled=true'] : []),
+        ...(!!DISABLE_PLUGINS ? ['--plugins.initialize=false'] : []),
+      ],
+      serverArgs: [
+        ...performanceConfig.get('kbnTestServer.serverArgs'),
+        `--telemetry.labels.journeyName=${scalabilityJourney.journeyName}`,
+      ],
+      env: {
+        ELASTIC_APM_ACTIVE: process.env.ELASTIC_APM_ACTIVE,
+        ELASTIC_APM_CONTEXT_PROPAGATION_ONLY: 'false',
+        ELASTIC_APM_ENVIRONMENT: process.env.CI ? 'ci' : 'development',
+        ELASTIC_APM_TRANSACTION_SAMPLE_RATE: '1.0',
+        ELASTIC_APM_SERVER_URL: APM_SERVER_URL,
+        ELASTIC_APM_SECRET_TOKEN: APM_PUBLIC_TOKEN,
+        ELASTIC_APM_BREAKDOWN_METRICS: false,
+        ELASTIC_APM_CAPTURE_SPAN_STACK_TRACES: false,
+        ELASTIC_APM_METRICS_INTERVAL: '80s',
+        ELASTIC_APM_MAX_QUEUE_SIZE: 20480,
+        ELASTIC_APM_GLOBAL_LABELS: serializeApmGlobalLabels(apmGlobalLabels),
+      },
+      // delay shutdown to ensure that APM can report the data it collects during test execution
+      delayShutdown: 90_000,
+    },
+  };
+}
--- a/x-pack/test/performance/scalability/runner.ts
+++ b/x-pack/test/performance/scalability/runner.ts
@ -0,0 +1,41 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+import { withProcRunner } from '@kbn/dev-proc-runner';
+import { FtrProviderContext } from '../ftr_provider_context';
+
+/**
+ * ScalabilityTestRunner is used to run load simulation against local Kibana instance
+ * scalabilityJsonPath defines path to the file, parsed and executed by Gatling runner
+ * gatlingProjectRootPath defines root path to the kibana-load-testing repo
+ */
+export async function ScalabilityTestRunner(
+  { getService }: FtrProviderContext,
+  scalabilityJsonPath: string,
+  gatlingProjectRootPath: string
+) {
+  const log = getService('log');
+
+  log.info(`Running scalability test with json file: '${scalabilityJsonPath}'`);
+
+  await withProcRunner(log, async (procs) => {
+    await procs.run('gatling: test', {
+      cmd: 'mvn',
+      args: [
+        'gatling:test',
+        '-q',
+        '-Dgatling.simulationClass=org.kibanaLoadTest.simulation.generic.GenericJourney',
+        `-DjourneyPath=${scalabilityJsonPath}`,
+      ],
+      cwd: gatlingProjectRootPath,
+      env: {
+        ...process.env,
+      },
+      wait: true,
+    });
+  });
+}