From abaa9379fd9a3414be65bcf5287fbd0ea415f8b0 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 05:34:53 -0700 Subject: [PATCH] tests: make integration split quantity configurable (#17219) (#17368) * tests: make integration split quantity configurable Refactors shared splitter bash function to take a list of files on stdin and split into a configurable number of partitions, emitting only those from the currently-selected partition to stdout. Also refactors the only caller in the integration_tests launcher script to accept an optional partition_count parameter (defaulting to `2` for backward- compatibility), to provide the list of specs to the function's stdin, and to output relevant information about the quantity of partition splits and which was selected. * ci: run integration tests in 3 parts (cherry picked from commit 3e0f488df29c3602a6890a071f34f05e34463cb2) Co-authored-by: Rye Biesemeyer --- .buildkite/aarch64_pipeline.yml | 47 ++++++++--- .buildkite/pull_request_pipeline.yml | 61 ++++++++++++--- .../jdk-matrix-tests/generate-steps.py | 50 +++++------- ci/get-test-half.sh | 27 ------- ci/integration_tests.sh | 20 +++-- ci/partition-files.lib.sh | 78 +++++++++++++++++++ 6 files changed, 192 insertions(+), 91 deletions(-) delete mode 100644 ci/get-test-half.sh create mode 100755 ci/partition-files.lib.sh diff --git a/.buildkite/aarch64_pipeline.yml b/.buildkite/aarch64_pipeline.yml index fbbe1260f..8c66116fe 100644 --- a/.buildkite/aarch64_pipeline.yml +++ b/.buildkite/aarch64_pipeline.yml @@ -35,48 +35,71 @@ steps: automatic: - limit: 3 - - label: ":lab_coat: Integration Tests / part 1" - key: "integration-tests-part-1" + - label: ":lab_coat: Integration Tests / part 1-of-3" + key: "integration-tests-part-1-of-3" command: | set -euo pipefail source .buildkite/scripts/common/vm-agent.sh - ci/integration_tests.sh split 0 + ci/integration_tests.sh split 0 3 retry: automatic: - limit: 3 - - label: ":lab_coat: Integration Tests / part 2" - key: "integration-tests-part-2" + - label: ":lab_coat: Integration Tests / part 2-of-3" + key: "integration-tests-part-2-of-3" command: | set -euo pipefail source .buildkite/scripts/common/vm-agent.sh - ci/integration_tests.sh split 1 + ci/integration_tests.sh split 1 3 retry: automatic: - limit: 3 - - label: ":lab_coat: IT Persistent Queues / part 1" - key: "integration-tests-qa-part-1" + - label: ":lab_coat: Integration Tests / part 3-of-3" + key: "integration-tests-part-3-of-3" + command: | + set -euo pipefail + + source .buildkite/scripts/common/vm-agent.sh + ci/integration_tests.sh split 2 3 + retry: + automatic: + - limit: 3 + + - label: ":lab_coat: IT Persistent Queues / part 1-of-3" + key: "integration-tests-qa-part-1-of-3" command: | set -euo pipefail source .buildkite/scripts/common/vm-agent.sh export FEATURE_FLAG=persistent_queues - ci/integration_tests.sh split 0 + ci/integration_tests.sh split 0 3 retry: automatic: - limit: 3 - - label: ":lab_coat: IT Persistent Queues / part 2" - key: "integration-tests-qa-part-2" + - label: ":lab_coat: IT Persistent Queues / part 2-of-3" + key: "integration-tests-qa-part-2-of-3" command: | set -euo pipefail source .buildkite/scripts/common/vm-agent.sh export FEATURE_FLAG=persistent_queues - ci/integration_tests.sh split 1 + ci/integration_tests.sh split 1 3 + retry: + automatic: + - limit: 3 + + - label: ":lab_coat: IT Persistent Queues / part 3-of-3" + key: "integration-tests-qa-part-3-of-3" + command: | + set -euo pipefail + + source .buildkite/scripts/common/vm-agent.sh + export FEATURE_FLAG=persistent_queues + ci/integration_tests.sh split 2 3 retry: automatic: - limit: 3 diff --git a/.buildkite/pull_request_pipeline.yml b/.buildkite/pull_request_pipeline.yml index 9209144da..6cf3ed356 100644 --- a/.buildkite/pull_request_pipeline.yml +++ b/.buildkite/pull_request_pipeline.yml @@ -79,8 +79,8 @@ steps: manual: allowed: true - - label: ":lab_coat: Integration Tests / part 1" - key: "integration-tests-part-1" + - label: ":lab_coat: Integration Tests / part 1-of-3" + key: "integration-tests-part-1-of-3" agents: image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root" cpu: "8" @@ -95,10 +95,10 @@ steps: set -euo pipefail source .buildkite/scripts/common/container-agent.sh - ci/integration_tests.sh split 0 + ci/integration_tests.sh split 0 3 - - label: ":lab_coat: Integration Tests / part 2" - key: "integration-tests-part-2" + - label: ":lab_coat: Integration Tests / part 2-of-3" + key: "integration-tests-part-2-of-3" agents: image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root" cpu: "8" @@ -113,10 +113,28 @@ steps: set -euo pipefail source .buildkite/scripts/common/container-agent.sh - ci/integration_tests.sh split 1 + ci/integration_tests.sh split 1 3 - - label: ":lab_coat: IT Persistent Queues / part 1" - key: "integration-tests-qa-part-1" + - label: ":lab_coat: Integration Tests / part 3-of-3" + key: "integration-tests-part-3-of-3" + agents: + image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root" + cpu: "8" + memory: "16Gi" + ephemeralStorage: "100Gi" + # Run as a non-root user + imageUID: "1002" + retry: + automatic: + - limit: 3 + command: | + set -euo pipefail + + source .buildkite/scripts/common/container-agent.sh + ci/integration_tests.sh split 2 3 + + - label: ":lab_coat: IT Persistent Queues / part 1-of-3" + key: "integration-tests-qa-part-1-of-3" agents: image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root" cpu: "8" @@ -132,10 +150,10 @@ steps: source .buildkite/scripts/common/container-agent.sh export FEATURE_FLAG=persistent_queues - ci/integration_tests.sh split 0 + ci/integration_tests.sh split 0 3 - - label: ":lab_coat: IT Persistent Queues / part 2" - key: "integration-tests-qa-part-2" + - label: ":lab_coat: IT Persistent Queues / part 2-of-3" + key: "integration-tests-qa-part-2-of-3" agents: image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root" cpu: "8" @@ -151,7 +169,26 @@ steps: source .buildkite/scripts/common/container-agent.sh export FEATURE_FLAG=persistent_queues - ci/integration_tests.sh split 1 + ci/integration_tests.sh split 1 3 + + - label: ":lab_coat: IT Persistent Queues / part 3-of-3" + key: "integration-tests-qa-part-3-of-3" + agents: + image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root" + cpu: "8" + memory: "16Gi" + ephemeralStorage: "100Gi" + # Run as non root (logstash) user. UID is hardcoded in image. + imageUID: "1002" + retry: + automatic: + - limit: 3 + command: | + set -euo pipefail + + source .buildkite/scripts/common/container-agent.sh + export FEATURE_FLAG=persistent_queues + ci/integration_tests.sh split 2 3 - label: ":lab_coat: x-pack unit tests" key: "x-pack-unit-tests" diff --git a/.buildkite/scripts/jdk-matrix-tests/generate-steps.py b/.buildkite/scripts/jdk-matrix-tests/generate-steps.py index 681272b85..b948f0740 100644 --- a/.buildkite/scripts/jdk-matrix-tests/generate-steps.py +++ b/.buildkite/scripts/jdk-matrix-tests/generate-steps.py @@ -177,17 +177,15 @@ class LinuxJobs(Jobs): super().__init__(os=os, jdk=jdk, group_key=group_key, agent=agent) def all_jobs(self) -> list[typing.Callable[[], JobRetValues]]: - return [ - self.init_annotation, - self.java_unit_test, - self.ruby_unit_test, - self.integration_tests_part_1, - self.integration_tests_part_2, - self.pq_integration_tests_part_1, - self.pq_integration_tests_part_2, - self.x_pack_unit_tests, - self.x_pack_integration, - ] + jobs=list() + jobs.append(self.init_annotation) + jobs.append(self.java_unit_test) + jobs.append(self.ruby_unit_test) + jobs.extend(self.integration_test_parts(3)) + jobs.extend(self.pq_integration_test_parts(3)) + jobs.append(self.x_pack_unit_tests) + jobs.append(self.x_pack_integration) + return jobs def prepare_shell(self) -> str: jdk_dir = f"/opt/buildkite-agent/.java/{self.jdk}" @@ -259,17 +257,14 @@ ci/unit_tests.sh ruby retry=copy.deepcopy(ENABLED_RETRIES), ) - def integration_tests_part_1(self) -> JobRetValues: - return self.integration_tests(part=1) + def integration_test_parts(self, parts) -> list[JobRetValues]: + return list(map(lambda idx: integration_tests(self, idx+1, parts), range(parts)) - def integration_tests_part_2(self) -> JobRetValues: - return self.integration_tests(part=2) - - def integration_tests(self, part: int) -> JobRetValues: - step_name_human = f"Integration Tests - {part}" - step_key = f"{self.group_key}-integration-tests-{part}" + def integration_tests(self, part: int, parts: int) -> JobRetValues: + step_name_human = f"Integration Tests - {part}/{parts}" + step_key = f"{self.group_key}-integration-tests-{part}-of-{parts}" test_command = f""" -ci/integration_tests.sh split {part-1} +ci/integration_tests.sh split {part-1} {parts} """ return JobRetValues( @@ -281,18 +276,15 @@ ci/integration_tests.sh split {part-1} retry=copy.deepcopy(ENABLED_RETRIES), ) - def pq_integration_tests_part_1(self) -> JobRetValues: - return self.pq_integration_tests(part=1) + def pq_integration_test_parts(self, parts) -> list[JobRetValues]: + return list(map(lambda idx: pq_integration_tests(self, idx+1, parts), range(parts)) - def pq_integration_tests_part_2(self) -> JobRetValues: - return self.pq_integration_tests(part=2) - - def pq_integration_tests(self, part: int) -> JobRetValues: - step_name_human = f"IT Persistent Queues - {part}" - step_key = f"{self.group_key}-it-persistent-queues-{part}" + def pq_integration_tests(self, part: int, parts: int) -> JobRetValues: + step_name_human = f"IT Persistent Queues - {part}/{parts}" + step_key = f"{self.group_key}-it-persistent-queues-{part}-of-{parts}" test_command = f""" export FEATURE_FLAG=persistent_queues -ci/integration_tests.sh split {part-1} +ci/integration_tests.sh split {part-1} {parts} """ return JobRetValues( diff --git a/ci/get-test-half.sh b/ci/get-test-half.sh deleted file mode 100644 index 147722540..000000000 --- a/ci/get-test-half.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -# get_test_half returns either the first or second half of integration tests -# Usage: get_test_half -# half_number: 0 for first half, 1 for second half -get_test_half() { - local half_number=$1 - # Ensure only spec files go to stdout - pushd qa/integration >/dev/null 2>&1 - - # Collect all spec files - local glob1=(specs/*spec.rb) - local glob2=(specs/**/*spec.rb) - local all_specs=("${glob1[@]}" "${glob2[@]}") - - # Calculate the split point - local split_point=$((${#all_specs[@]} / 2)) - - # Get the requested half (:: is "up to", : is "from") - if [[ $half_number -eq 0 ]]; then - local specs="${all_specs[@]::$split_point}" - else - local specs="${all_specs[@]:$split_point}" - fi - popd >/dev/null 2>&1 - echo "$specs" -} \ No newline at end of file diff --git a/ci/integration_tests.sh b/ci/integration_tests.sh index 43573341f..318660bc9 100755 --- a/ci/integration_tests.sh +++ b/ci/integration_tests.sh @@ -10,9 +10,6 @@ export GRADLE_OPTS="-Xmx2g -Dorg.gradle.jvmargs=-Xmx2g -Dorg.gradle.daemon=false export SPEC_OPTS="--order rand --format documentation" export CI=true -# Source shared function for splitting integration tests -source "$(dirname "${BASH_SOURCE[0]}")/get-test-half.sh" - if [ -n "$BUILD_JAVA_HOME" ]; then GRADLE_OPTS="$GRADLE_OPTS -Dorg.gradle.java.home=$BUILD_JAVA_HOME" fi @@ -22,14 +19,15 @@ if [[ $1 = "setup" ]]; then exit 0 elif [[ $1 == "split" ]]; then - if [[ $2 =~ ^[01]$ ]]; then - specs=$(get_test_half "$2") - echo "Running half $2 of integration specs: $specs" - ./gradlew runIntegrationTests -PrubyIntegrationSpecs="$specs" --console=plain - else - echo "Error, must specify 0 or 1 after the split. For example ci/integration_tests.sh split 0" - exit 1 - fi + # Source shared function for splitting integration tests + source "$(dirname "${BASH_SOURCE[0]}")/partition-files.lib.sh" + + index="${2:?index}" + count="${3:-2}" + specs=($(cd qa/integration; partition_files "${index}" "${count}" < <(find specs -name '*_spec.rb') )) + + echo "Running integration tests partition[${index}] of ${count}: ${specs[*]}" + ./gradlew runIntegrationTests -PrubyIntegrationSpecs="${specs[*]}" --console=plain elif [[ ! -z $@ ]]; then echo "Running integration tests 'rspec $@'" diff --git a/ci/partition-files.lib.sh b/ci/partition-files.lib.sh new file mode 100755 index 000000000..c974921da --- /dev/null +++ b/ci/partition-files.lib.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +# partition_files returns a consistent partition of the filenames given on stdin +# Usage: partition_files < <(ls files) +# partition_index: the zero-based index of the partition to select `[0,partition_count)` +# partition_count: the number of partitions `[2,#files]` +partition_files() ( + set -e + + local files + # ensure files is consistently sorted and distinct + IFS=$'\n' read -ra files -d '' <<<"$(cat - | sort | uniq)" || true + + local partition_index="${1:?}" + local partition_count="${2:?}" + + _error () { >&2 echo "ERROR: ${1:-UNSPECIFIED}"; exit 1; } + + # safeguard against nonsense invocations + if (( ${#files[@]} < 2 )); then + _error "#files(${#files[@]}) must be at least 2 in order to partition" + elif ( ! [[ "${partition_count}" =~ ^[0-9]+$ ]] ) || (( partition_count < 2 )) || (( partition_count > ${#files[@]})); then + _error "partition_count(${partition_count}) must be a number that is at least 2 and not greater than #files(${#files[@]})" + elif ( ! [[ "${partition_index}" =~ ^[0-9]+$ ]] ) || (( partition_index < 0 )) || (( partition_index >= $partition_count )) ; then + _error "partition_index(${partition_index}) must be a number that is greater 0 and less than partition_count(${partition_count})" + fi + + # round-robbin emit those in our selected partition + for index in "${!files[@]}"; do + partition="$(( index % partition_count ))" + if (( partition == partition_index )); then + echo "${files[$index]}" + fi + done +) + +if [[ "$0" == "${BASH_SOURCE[0]}" ]]; then + if [[ "$1" == "test" ]]; then + status=0 + + SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + file_list="$( cd "${SCRIPT_DIR}"; find . -type f )" + + # for any legal partitioning into N partitions, we ensure that + # the combined output of `partition_files I N` where `I` is all numbers in + # the range `[0,N)` produces no repeats and no omissions, even if the + # input list is not consistently ordered. + for n in $(seq 2 $(wc -l <<<"${file_list}")); do + result="" + for i in $(seq 0 $(( n - 1 ))); do + for file in $(partition_files $i $n <<<"$( shuf <<<"${file_list}" )"); do + result+="${file}"$'\n' + done + done + + repeated="$( uniq --repeated <<<"$( sort <<<"${result}" )" )" + if (( $(printf "${repeated}" | wc -l) > 0 )); then + status=1 + echo "[n=${n}]FAIL(repeated):"$'\n'"${repeated}" + fi + + missing=$( comm -23 <(sort <<<"${file_list}") <( sort <<<"${result}" ) ) + if (( $(printf "${missing}" | wc -l) > 0 )); then + status=1 + echo "[n=${n}]FAIL(omitted):"$'\n'"${missing}" + fi + done + + if (( status > 0 )); then + echo "There were failures. The input list was:" + echo "${file_list}" + fi + + exit "${status}" + else + partition_files $@ + fi +fi \ No newline at end of file