From 7ebb09b9f3095de1fcadcebf6a995bd13e99dc5b Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 21 Sep 2022 11:17:11 -0400 Subject: [PATCH] Speed `getIntLE` from `BytesReference` (#90147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This speeds up `getIntLE` from `BytesReference` which we'll be using in the upcoming dense representations for aggregations. Here's the performance: ``` (type) Mode Cnt Before Error After Error Units array avgt 7 1.036 ± 0.062 0.261 ± 0.022 ns/op paged_bytes_array avgt 7 5.189 ± 0.172 5.317 ± 0.196 ns/op composite_256kb avgt 7 30.792 ± 0.834 11.240 ± 0.387 ns/op composite_262344b avgt 7 32.503 ± 1.017 11.155 ± 0.358 ns/op composite_1mb avgt 7 25.189 ± 0.449 8.379 ± 0.193 ns/op ``` The `array` method is how we'll use slices that don't span the edges of a netty buffer. The `paged_bytes_array` method doesn't really change and represents the default for internal stuff. I'll bet we could make it faster too, but I don't know that we use it in the hot path. The `composite_` method is how we'll be reading large slabs from the netty byte buffer. We could probably do better if we relied on the sizes of the buffers being even, but we don't presently do that in the composite bytes array. The different sizes following `composite` show that the performance is dominated by the number of slabs in the composite buffer. `1mb` looks like the largest buffer netty uses. `256kb` is the smallest. The wild number of bytes intentionally doesn't line the int up on sensible values. I don't think we'll use sizes like that but it looks like the performance doesn't make a huge difference. We're dominated by the buffer choice. --- benchmarks/README.md | 21 ++-- .../common/util/IntArrayBenchmark.java | 98 +++++++++++++++++++ .../common/bytes/BytesArray.java | 6 ++ .../common/bytes/CompositeBytesReference.java | 12 +++ .../common/bytes/BytesArrayTests.java | 15 +++ .../bytes/CompositeBytesReferenceTests.java | 14 +++ 6 files changed, 156 insertions(+), 10 deletions(-) create mode 100644 benchmarks/src/main/java/org/elasticsearch/benchmark/common/util/IntArrayBenchmark.java diff --git a/benchmarks/README.md b/benchmarks/README.md index 1e6cc42c8371..d7b324acfef8 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -81,19 +81,20 @@ To get realistic results, you should exercise care when running benchmarks. Here NOTE: Linux only. Sorry Mac and Windows. -Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and FCML's `hsdis`. -`perf` is generally available via `apg-get install perf` or `pacman -S perf`. FCML is a little more involved. This worked +Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and the JDK's `hsdis`. +`perf` is generally available via `apg-get install perf` or `pacman -S perf`. `hsdis` you'll want to compile from source. is a little more involved. This worked on 2020-08-01: ``` -wget https://github.com/swojtasiak/fcml-lib/releases/download/v1.2.2/fcml-1.2.2.tar.gz -tar xf fcml* -cd fcml* -./configure -make -cd example/hsdis -make -sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so +git clone git@github.com:openjdk/jdk.git +cd jdk +git checkout jdk-17-ga +cd src/utils/hsdis +# Get a known good binutils +wget https://ftp.gnu.org/gnu/binutils/binutils-2.35.tar.gz +tar xf binutils-2.35.tar.gz +make BINUTILS=binutils-2.35 ARCH=amd64 +sudo cp build/linux-amd64/hsdis-amd64.so /usr/lib/jvm/java-17-openjdk/lib/server/ ``` If you want to disassemble a single method do something like this: diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/common/util/IntArrayBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/common/util/IntArrayBenchmark.java new file mode 100644 index 000000000000..973155f69eff --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/common/util/IntArrayBenchmark.java @@ -0,0 +1,98 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.benchmark.common.util; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.CompositeBytesReference; +import org.elasticsearch.common.bytes.PagedBytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.IntArray; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5) +@Measurement(iterations = 7) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Fork(1) +@OperationsPerInvocation(2621440) +public class IntArrayBenchmark { + static final int SIZE = IntArrayBenchmark.class.getAnnotation(OperationsPerInvocation.class).value(); + + @Param({ "array", "paged_bytes_array", "composite_256kb", "composite_262344b", "composite_1mb" }) + private String type; + + private IntArray read; + + @Setup + public void init() throws IOException { + IntArray ints = BigArrays.NON_RECYCLING_INSTANCE.newIntArray(SIZE); + for (int i = 0; i < SIZE; i++) { + ints.set(i, i); + } + BytesStreamOutput out = new BytesStreamOutput(); + ints.writeTo(out); + read = IntArray.readFrom(new ReleasableBytesReference(bytesImpl(out.bytes()), () -> {}).streamInput()); + } + + private BytesReference bytesImpl(BytesReference bytes) { + if (type.equals("array")) { + return new BytesArray(bytes.toBytesRef()); + } + if (type.equals("paged_bytes_array")) { + if (bytes instanceof PagedBytesReference == false) { + throw new AssertionError("expected PagedBytesReference but saw [" + bytes.getClass() + "]"); + } + return bytes; + } + if (type.startsWith("composite_")) { + int size = Math.toIntExact(ByteSizeValue.parseBytesSizeValue(type.substring("composite_".length()), "type").getBytes()); + List references = new ArrayList<>(); + for (int from = 0; from < bytes.length(); from += size) { + int sliceSize = Math.min(size, bytes.length() - from); + references.add(new BytesArray(bytes.slice(from, Math.min(from + size, sliceSize)).toBytesRef())); + } + BytesReference ref = CompositeBytesReference.of(references.toArray(BytesReference[]::new)); + if (ref instanceof CompositeBytesReference == false) { + throw new AssertionError("expected CompositeBytesReference but saw [" + bytes.getClass() + "]"); + } + return ref; + } + throw new IllegalArgumentException("unsupported [type] " + type); + } + + @Benchmark + public long read() { + int res = 0; + for (int i = 0; i < SIZE; i++) { + res = res ^ read.get(i); + } + return res; + } +} diff --git a/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java b/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java index 094bf681efc3..afd814a0bdb5 100644 --- a/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java +++ b/server/src/main/java/org/elasticsearch/common/bytes/BytesArray.java @@ -10,6 +10,7 @@ package org.elasticsearch.common.bytes; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.util.ByteUtils; import java.io.IOException; import java.io.OutputStream; @@ -130,4 +131,9 @@ public final class BytesArray extends AbstractBytesReference { public void writeTo(OutputStream os) throws IOException { os.write(bytes, offset, length); } + + @Override + public int getIntLE(int index) { + return ByteUtils.readIntLE(bytes, offset + index); + } } diff --git a/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java b/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java index 52ca397eb8d2..0f1f26809189 100644 --- a/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java +++ b/server/src/main/java/org/elasticsearch/common/bytes/CompositeBytesReference.java @@ -225,4 +225,16 @@ public final class CompositeBytesReference extends AbstractBytesReference { public long ramBytesUsed() { return ramBytesUsed; } + + @Override + public int getIntLE(int index) { + int i = getOffsetIndex(index); + int idx = index - offsets[i]; + int end = idx + 4; + BytesReference wholeIntLivesHere = references[i]; + if (end <= wholeIntLivesHere.length()) { + return wholeIntLivesHere.getIntLE(idx); + } + return super.getIntLE(index); + } } diff --git a/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java b/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java index 9bd01b9be3a5..8dc32978282f 100644 --- a/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java +++ b/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java @@ -12,6 +12,8 @@ import org.hamcrest.Matchers; import java.io.IOException; +import static org.hamcrest.Matchers.equalTo; + public class BytesArrayTests extends AbstractBytesReferenceTestCase { @Override @@ -55,4 +57,17 @@ public class BytesArrayTests extends AbstractBytesReferenceTestCase { BytesArray pbr = (BytesArray) newBytesReferenceWithOffsetOfZero(length); assertEquals(0, pbr.arrayOffset()); } + + public void testGetIntLE() { + BytesReference ref = new BytesArray(new byte[] { 0x00, 0x12, 0x10, 0x12, 0x00, 0x01 }, 1, 5); + assertThat(ref.getIntLE(0), equalTo(0x00121012)); + assertThat(ref.getIntLE(1), equalTo(0x01001210)); + Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> ref.getIntLE(2)); + assertThat(e.getMessage(), equalTo("Index 3 out of bounds for length 3")); + /* + * Wait. 3!? The array has length 6. Well, the var handle stuff + * for arrays just subtracts three - because that's one more than + * the number of bytes in an int. Get it? I'm not sure I do either.... + */ + } } diff --git a/server/src/test/java/org/elasticsearch/common/bytes/CompositeBytesReferenceTests.java b/server/src/test/java/org/elasticsearch/common/bytes/CompositeBytesReferenceTests.java index 4c871fe2d513..efb3f1d30bdc 100644 --- a/server/src/test/java/org/elasticsearch/common/bytes/CompositeBytesReferenceTests.java +++ b/server/src/test/java/org/elasticsearch/common/bytes/CompositeBytesReferenceTests.java @@ -149,4 +149,18 @@ public class CompositeBytesReferenceTests extends AbstractBytesReferenceTestCase equalTo("CompositeBytesReference cannot hold more than 2GB") ); } + + public void testGetIntLE() { + BytesReference[] refs = new BytesReference[] { + new BytesArray(new byte[] { 0x12, 0x10, 0x12, 0x00 }), + new BytesArray(new byte[] { 0x01, 0x02, 0x03, 0x04 }) }; + BytesReference comp = CompositeBytesReference.of(refs); + assertThat(comp.getIntLE(0), equalTo(0x00121012)); + assertThat(comp.getIntLE(1), equalTo(0x01001210)); + assertThat(comp.getIntLE(2), equalTo(0x02010012)); + assertThat(comp.getIntLE(3), equalTo(0x03020100)); + assertThat(comp.getIntLE(4), equalTo(0x04030201)); + Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> comp.getIntLE(5)); + assertThat(e.getMessage(), equalTo("Index 4 out of bounds for length 4")); + } }