logstash/ci/partition-files.lib.sh
mergify[bot] f679ecb374
tests: make integration split quantity configurable (#17219) (#17369)
* tests: make integration split quantity configurable

Refactors shared splitter bash function to take a list of files on stdin
and split into a configurable number of partitions, emitting only those from
the currently-selected partition to stdout.

Also refactors the only caller in the integration_tests launcher script to
accept an optional partition_count parameter (defaulting to `2` for backward-
compatibility), to provide the list of specs to the function's stdin, and to
output relevant information about the quantity of partition splits and which
was selected.

* ci: run integration tests in 3 parts

(cherry picked from commit 3e0f488df2)

Co-authored-by: Rye Biesemeyer <yaauie@users.noreply.github.com>
2025-03-20 05:34:51 -07:00

78 lines
No EOL
2.9 KiB
Bash
Executable file

#!/bin/bash
# partition_files returns a consistent partition of the filenames given on stdin
# Usage: partition_files <partition_index> <partition_count=2> < <(ls files)
# partition_index: the zero-based index of the partition to select `[0,partition_count)`
# partition_count: the number of partitions `[2,#files]`
partition_files() (
set -e
local files
# ensure files is consistently sorted and distinct
IFS=$'\n' read -ra files -d '' <<<"$(cat - | sort | uniq)" || true
local partition_index="${1:?}"
local partition_count="${2:?}"
_error () { >&2 echo "ERROR: ${1:-UNSPECIFIED}"; exit 1; }
# safeguard against nonsense invocations
if (( ${#files[@]} < 2 )); then
_error "#files(${#files[@]}) must be at least 2 in order to partition"
elif ( ! [[ "${partition_count}" =~ ^[0-9]+$ ]] ) || (( partition_count < 2 )) || (( partition_count > ${#files[@]})); then
_error "partition_count(${partition_count}) must be a number that is at least 2 and not greater than #files(${#files[@]})"
elif ( ! [[ "${partition_index}" =~ ^[0-9]+$ ]] ) || (( partition_index < 0 )) || (( partition_index >= $partition_count )) ; then
_error "partition_index(${partition_index}) must be a number that is greater 0 and less than partition_count(${partition_count})"
fi
# round-robbin emit those in our selected partition
for index in "${!files[@]}"; do
partition="$(( index % partition_count ))"
if (( partition == partition_index )); then
echo "${files[$index]}"
fi
done
)
if [[ "$0" == "${BASH_SOURCE[0]}" ]]; then
if [[ "$1" == "test" ]]; then
status=0
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
file_list="$( cd "${SCRIPT_DIR}"; find . -type f )"
# for any legal partitioning into N partitions, we ensure that
# the combined output of `partition_files I N` where `I` is all numbers in
# the range `[0,N)` produces no repeats and no omissions, even if the
# input list is not consistently ordered.
for n in $(seq 2 $(wc -l <<<"${file_list}")); do
result=""
for i in $(seq 0 $(( n - 1 ))); do
for file in $(partition_files $i $n <<<"$( shuf <<<"${file_list}" )"); do
result+="${file}"$'\n'
done
done
repeated="$( uniq --repeated <<<"$( sort <<<"${result}" )" )"
if (( $(printf "${repeated}" | wc -l) > 0 )); then
status=1
echo "[n=${n}]FAIL(repeated):"$'\n'"${repeated}"
fi
missing=$( comm -23 <(sort <<<"${file_list}") <( sort <<<"${result}" ) )
if (( $(printf "${missing}" | wc -l) > 0 )); then
status=1
echo "[n=${n}]FAIL(omitted):"$'\n'"${missing}"
fi
done
if (( status > 0 )); then
echo "There were failures. The input list was:"
echo "${file_list}"
fi
exit "${status}"
else
partition_files $@
fi
fi