tests: make integration split quantity configurable (#17219)

* tests: make integration split quantity configurable

Refactors shared splitter bash function to take a list of files on stdin
and split into a configurable number of partitions, emitting only those from
the currently-selected partition to stdout.

Also refactors the only caller in the integration_tests launcher script to
accept an optional partition_count parameter (defaulting to `2` for backward-
compatibility), to provide the list of specs to the function's stdin, and to
output relevant information about the quantity of partition splits and which
was selected.

* ci: run integration tests in 3 parts
This commit is contained in:
Rye Biesemeyer 2025-03-19 16:37:27 -07:00 committed by GitHub
parent 7683983168
commit 3e0f488df2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 192 additions and 91 deletions

View file

@ -35,48 +35,71 @@ steps:
automatic:
- limit: 3
- label: ":lab_coat: Integration Tests / part 1"
key: "integration-tests-part-1"
- label: ":lab_coat: Integration Tests / part 1-of-3"
key: "integration-tests-part-1-of-3"
command: |
set -euo pipefail
source .buildkite/scripts/common/vm-agent.sh
ci/integration_tests.sh split 0
ci/integration_tests.sh split 0 3
retry:
automatic:
- limit: 3
- label: ":lab_coat: Integration Tests / part 2"
key: "integration-tests-part-2"
- label: ":lab_coat: Integration Tests / part 2-of-3"
key: "integration-tests-part-2-of-3"
command: |
set -euo pipefail
source .buildkite/scripts/common/vm-agent.sh
ci/integration_tests.sh split 1
ci/integration_tests.sh split 1 3
retry:
automatic:
- limit: 3
- label: ":lab_coat: IT Persistent Queues / part 1"
key: "integration-tests-qa-part-1"
- label: ":lab_coat: Integration Tests / part 3-of-3"
key: "integration-tests-part-3-of-3"
command: |
set -euo pipefail
source .buildkite/scripts/common/vm-agent.sh
ci/integration_tests.sh split 2 3
retry:
automatic:
- limit: 3
- label: ":lab_coat: IT Persistent Queues / part 1-of-3"
key: "integration-tests-qa-part-1-of-3"
command: |
set -euo pipefail
source .buildkite/scripts/common/vm-agent.sh
export FEATURE_FLAG=persistent_queues
ci/integration_tests.sh split 0
ci/integration_tests.sh split 0 3
retry:
automatic:
- limit: 3
- label: ":lab_coat: IT Persistent Queues / part 2"
key: "integration-tests-qa-part-2"
- label: ":lab_coat: IT Persistent Queues / part 2-of-3"
key: "integration-tests-qa-part-2-of-3"
command: |
set -euo pipefail
source .buildkite/scripts/common/vm-agent.sh
export FEATURE_FLAG=persistent_queues
ci/integration_tests.sh split 1
ci/integration_tests.sh split 1 3
retry:
automatic:
- limit: 3
- label: ":lab_coat: IT Persistent Queues / part 3-of-3"
key: "integration-tests-qa-part-3-of-3"
command: |
set -euo pipefail
source .buildkite/scripts/common/vm-agent.sh
export FEATURE_FLAG=persistent_queues
ci/integration_tests.sh split 2 3
retry:
automatic:
- limit: 3

View file

@ -81,8 +81,8 @@ steps:
manual:
allowed: true
- label: ":lab_coat: Integration Tests / part 1"
key: "integration-tests-part-1"
- label: ":lab_coat: Integration Tests / part 1-of-3"
key: "integration-tests-part-1-of-3"
agents:
image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root"
cpu: "8"
@ -97,10 +97,10 @@ steps:
set -euo pipefail
source .buildkite/scripts/common/container-agent.sh
ci/integration_tests.sh split 0
ci/integration_tests.sh split 0 3
- label: ":lab_coat: Integration Tests / part 2"
key: "integration-tests-part-2"
- label: ":lab_coat: Integration Tests / part 2-of-3"
key: "integration-tests-part-2-of-3"
agents:
image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root"
cpu: "8"
@ -115,10 +115,28 @@ steps:
set -euo pipefail
source .buildkite/scripts/common/container-agent.sh
ci/integration_tests.sh split 1
ci/integration_tests.sh split 1 3
- label: ":lab_coat: IT Persistent Queues / part 1"
key: "integration-tests-qa-part-1"
- label: ":lab_coat: Integration Tests / part 3-of-3"
key: "integration-tests-part-3-of-3"
agents:
image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root"
cpu: "8"
memory: "16Gi"
ephemeralStorage: "100Gi"
# Run as a non-root user
imageUID: "1002"
retry:
automatic:
- limit: 3
command: |
set -euo pipefail
source .buildkite/scripts/common/container-agent.sh
ci/integration_tests.sh split 2 3
- label: ":lab_coat: IT Persistent Queues / part 1-of-3"
key: "integration-tests-qa-part-1-of-3"
agents:
image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root"
cpu: "8"
@ -134,10 +152,10 @@ steps:
source .buildkite/scripts/common/container-agent.sh
export FEATURE_FLAG=persistent_queues
ci/integration_tests.sh split 0
ci/integration_tests.sh split 0 3
- label: ":lab_coat: IT Persistent Queues / part 2"
key: "integration-tests-qa-part-2"
- label: ":lab_coat: IT Persistent Queues / part 2-of-3"
key: "integration-tests-qa-part-2-of-3"
agents:
image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root"
cpu: "8"
@ -153,7 +171,26 @@ steps:
source .buildkite/scripts/common/container-agent.sh
export FEATURE_FLAG=persistent_queues
ci/integration_tests.sh split 1
ci/integration_tests.sh split 1 3
- label: ":lab_coat: IT Persistent Queues / part 3-of-3"
key: "integration-tests-qa-part-3-of-3"
agents:
image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-logstash-ci-no-root"
cpu: "8"
memory: "16Gi"
ephemeralStorage: "100Gi"
# Run as non root (logstash) user. UID is hardcoded in image.
imageUID: "1002"
retry:
automatic:
- limit: 3
command: |
set -euo pipefail
source .buildkite/scripts/common/container-agent.sh
export FEATURE_FLAG=persistent_queues
ci/integration_tests.sh split 2 3
- label: ":lab_coat: x-pack unit tests"
key: "x-pack-unit-tests"

View file

@ -177,17 +177,15 @@ class LinuxJobs(Jobs):
super().__init__(os=os, jdk=jdk, group_key=group_key, agent=agent)
def all_jobs(self) -> list[typing.Callable[[], JobRetValues]]:
return [
self.init_annotation,
self.java_unit_test,
self.ruby_unit_test,
self.integration_tests_part_1,
self.integration_tests_part_2,
self.pq_integration_tests_part_1,
self.pq_integration_tests_part_2,
self.x_pack_unit_tests,
self.x_pack_integration,
]
jobs=list()
jobs.append(self.init_annotation)
jobs.append(self.java_unit_test)
jobs.append(self.ruby_unit_test)
jobs.extend(self.integration_test_parts(3))
jobs.extend(self.pq_integration_test_parts(3))
jobs.append(self.x_pack_unit_tests)
jobs.append(self.x_pack_integration)
return jobs
def prepare_shell(self) -> str:
jdk_dir = f"/opt/buildkite-agent/.java/{self.jdk}"
@ -259,17 +257,14 @@ ci/unit_tests.sh ruby
retry=copy.deepcopy(ENABLED_RETRIES),
)
def integration_tests_part_1(self) -> JobRetValues:
return self.integration_tests(part=1)
def integration_test_parts(self, parts) -> list[JobRetValues]:
return list(map(lambda idx: integration_tests(self, idx+1, parts), range(parts))
def integration_tests_part_2(self) -> JobRetValues:
return self.integration_tests(part=2)
def integration_tests(self, part: int) -> JobRetValues:
step_name_human = f"Integration Tests - {part}"
step_key = f"{self.group_key}-integration-tests-{part}"
def integration_tests(self, part: int, parts: int) -> JobRetValues:
step_name_human = f"Integration Tests - {part}/{parts}"
step_key = f"{self.group_key}-integration-tests-{part}-of-{parts}"
test_command = f"""
ci/integration_tests.sh split {part-1}
ci/integration_tests.sh split {part-1} {parts}
"""
return JobRetValues(
@ -281,18 +276,15 @@ ci/integration_tests.sh split {part-1}
retry=copy.deepcopy(ENABLED_RETRIES),
)
def pq_integration_tests_part_1(self) -> JobRetValues:
return self.pq_integration_tests(part=1)
def pq_integration_test_parts(self, parts) -> list[JobRetValues]:
return list(map(lambda idx: pq_integration_tests(self, idx+1, parts), range(parts))
def pq_integration_tests_part_2(self) -> JobRetValues:
return self.pq_integration_tests(part=2)
def pq_integration_tests(self, part: int) -> JobRetValues:
step_name_human = f"IT Persistent Queues - {part}"
step_key = f"{self.group_key}-it-persistent-queues-{part}"
def pq_integration_tests(self, part: int, parts: int) -> JobRetValues:
step_name_human = f"IT Persistent Queues - {part}/{parts}"
step_key = f"{self.group_key}-it-persistent-queues-{part}-of-{parts}"
test_command = f"""
export FEATURE_FLAG=persistent_queues
ci/integration_tests.sh split {part-1}
ci/integration_tests.sh split {part-1} {parts}
"""
return JobRetValues(

View file

@ -1,27 +0,0 @@
#!/bin/bash
# get_test_half returns either the first or second half of integration tests
# Usage: get_test_half <half_number>
# half_number: 0 for first half, 1 for second half
get_test_half() {
local half_number=$1
# Ensure only spec files go to stdout
pushd qa/integration >/dev/null 2>&1
# Collect all spec files
local glob1=(specs/*spec.rb)
local glob2=(specs/**/*spec.rb)
local all_specs=("${glob1[@]}" "${glob2[@]}")
# Calculate the split point
local split_point=$((${#all_specs[@]} / 2))
# Get the requested half (:: is "up to", : is "from")
if [[ $half_number -eq 0 ]]; then
local specs="${all_specs[@]::$split_point}"
else
local specs="${all_specs[@]:$split_point}"
fi
popd >/dev/null 2>&1
echo "$specs"
}

View file

@ -10,9 +10,6 @@ export GRADLE_OPTS="-Xmx2g -Dorg.gradle.jvmargs=-Xmx2g -Dorg.gradle.daemon=false
export SPEC_OPTS="--order rand --format documentation"
export CI=true
# Source shared function for splitting integration tests
source "$(dirname "${BASH_SOURCE[0]}")/get-test-half.sh"
if [ -n "$BUILD_JAVA_HOME" ]; then
GRADLE_OPTS="$GRADLE_OPTS -Dorg.gradle.java.home=$BUILD_JAVA_HOME"
fi
@ -22,14 +19,15 @@ if [[ $1 = "setup" ]]; then
exit 0
elif [[ $1 == "split" ]]; then
if [[ $2 =~ ^[01]$ ]]; then
specs=$(get_test_half "$2")
echo "Running half $2 of integration specs: $specs"
./gradlew runIntegrationTests -PrubyIntegrationSpecs="$specs" --console=plain
else
echo "Error, must specify 0 or 1 after the split. For example ci/integration_tests.sh split 0"
exit 1
fi
# Source shared function for splitting integration tests
source "$(dirname "${BASH_SOURCE[0]}")/partition-files.lib.sh"
index="${2:?index}"
count="${3:-2}"
specs=($(cd qa/integration; partition_files "${index}" "${count}" < <(find specs -name '*_spec.rb') ))
echo "Running integration tests partition[${index}] of ${count}: ${specs[*]}"
./gradlew runIntegrationTests -PrubyIntegrationSpecs="${specs[*]}" --console=plain
elif [[ ! -z $@ ]]; then
echo "Running integration tests 'rspec $@'"

78
ci/partition-files.lib.sh Executable file
View file

@ -0,0 +1,78 @@
#!/bin/bash
# partition_files returns a consistent partition of the filenames given on stdin
# Usage: partition_files <partition_index> <partition_count=2> < <(ls files)
# partition_index: the zero-based index of the partition to select `[0,partition_count)`
# partition_count: the number of partitions `[2,#files]`
partition_files() (
set -e
local files
# ensure files is consistently sorted and distinct
IFS=$'\n' read -ra files -d '' <<<"$(cat - | sort | uniq)" || true
local partition_index="${1:?}"
local partition_count="${2:?}"
_error () { >&2 echo "ERROR: ${1:-UNSPECIFIED}"; exit 1; }
# safeguard against nonsense invocations
if (( ${#files[@]} < 2 )); then
_error "#files(${#files[@]}) must be at least 2 in order to partition"
elif ( ! [[ "${partition_count}" =~ ^[0-9]+$ ]] ) || (( partition_count < 2 )) || (( partition_count > ${#files[@]})); then
_error "partition_count(${partition_count}) must be a number that is at least 2 and not greater than #files(${#files[@]})"
elif ( ! [[ "${partition_index}" =~ ^[0-9]+$ ]] ) || (( partition_index < 0 )) || (( partition_index >= $partition_count )) ; then
_error "partition_index(${partition_index}) must be a number that is greater 0 and less than partition_count(${partition_count})"
fi
# round-robbin emit those in our selected partition
for index in "${!files[@]}"; do
partition="$(( index % partition_count ))"
if (( partition == partition_index )); then
echo "${files[$index]}"
fi
done
)
if [[ "$0" == "${BASH_SOURCE[0]}" ]]; then
if [[ "$1" == "test" ]]; then
status=0
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
file_list="$( cd "${SCRIPT_DIR}"; find . -type f )"
# for any legal partitioning into N partitions, we ensure that
# the combined output of `partition_files I N` where `I` is all numbers in
# the range `[0,N)` produces no repeats and no omissions, even if the
# input list is not consistently ordered.
for n in $(seq 2 $(wc -l <<<"${file_list}")); do
result=""
for i in $(seq 0 $(( n - 1 ))); do
for file in $(partition_files $i $n <<<"$( shuf <<<"${file_list}" )"); do
result+="${file}"$'\n'
done
done
repeated="$( uniq --repeated <<<"$( sort <<<"${result}" )" )"
if (( $(printf "${repeated}" | wc -l) > 0 )); then
status=1
echo "[n=${n}]FAIL(repeated):"$'\n'"${repeated}"
fi
missing=$( comm -23 <(sort <<<"${file_list}") <( sort <<<"${result}" ) )
if (( $(printf "${missing}" | wc -l) > 0 )); then
status=1
echo "[n=${n}]FAIL(omitted):"$'\n'"${missing}"
fi
done
if (( status > 0 )); then
echo "There were failures. The input list was:"
echo "${file_list}"
fi
exit "${status}"
else
partition_files $@
fi
fi