From 6afd3ecc58c232cdbb82fda9f279d32d6cf12244 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 10 Mar 2025 12:12:42 +0100 Subject: [PATCH] Avoid reading unnecessary dimension values when downsampling (#124451) Read dimension values once per tsid/bucket docid range instead of for each document being processed. The dimension value within a bucket-interval docid range is always to same and this avoids unnecessary reads. Latency of downsampling the tsdb track index into a 1 hour interval downsample index drop by ~16% (running on my local machine). --- docs/changelog/124451.yaml | 5 +++ .../downsample/DimensionFieldProducer.java | 36 ++++++++++++++----- 2 files changed, 32 insertions(+), 9 deletions(-) create mode 100644 docs/changelog/124451.yaml diff --git a/docs/changelog/124451.yaml b/docs/changelog/124451.yaml new file mode 100644 index 000000000000..15e3ea2a626f --- /dev/null +++ b/docs/changelog/124451.yaml @@ -0,0 +1,5 @@ +pr: 124451 +summary: Improve downsample performance by avoiding to read unnecessary dimension values when downsampling. +area: Downsampling +type: bug +issues: [] diff --git a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java index 69493e6de442..43fee5b2505b 100644 --- a/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java +++ b/x-pack/plugin/downsample/src/main/java/org/elasticsearch/xpack/downsample/DimensionFieldProducer.java @@ -44,16 +44,29 @@ public class DimensionFieldProducer extends AbstractDownsampleFieldProducer { isEmpty = true; } - void collect(final Object value) { + void collectOnce(final Object value) { + assert isEmpty; Objects.requireNonNull(value); - if (isEmpty) { - this.value = value; - this.isEmpty = false; - return; - } - if (value.equals(this.value) == false) { - throw new IllegalArgumentException("Dimension value changed without tsid change [" + value + "] != [" + this.value + "]"); + this.value = value; + this.isEmpty = false; + } + + /** + * This is an expensive check, that slows down downsampling significantly. + * Given that index is sorted by tsid as primary key, this shouldn't really happen. + */ + boolean validate(FormattedDocValues docValues, int docId) throws IOException { + if (docValues.advanceExact(docId)) { + int docValueCount = docValues.docValueCount(); + for (int i = 0; i < docValueCount; i++) { + var value = docValues.nextValue(); + if (value.equals(this.value) == false) { + assert false : "Dimension value changed without tsid change [" + value + "] != [" + this.value + "]"; + } + } } + + return true; } } @@ -69,12 +82,17 @@ public class DimensionFieldProducer extends AbstractDownsampleFieldProducer { @Override public void collect(FormattedDocValues docValues, int docId) throws IOException { + if (dimension.isEmpty == false) { + assert dimension.validate(docValues, docId); + return; + } + if (docValues.advanceExact(docId) == false) { return; } int docValueCount = docValues.docValueCount(); for (int i = 0; i < docValueCount; i++) { - this.dimension.collect(docValues.nextValue()); + this.dimension.collectOnce(docValues.nextValue()); } }