Speed getIntLE from BytesReference (#90147)

This speeds up `getIntLE` from `BytesReference` which we'll be using in
the upcoming dense representations for aggregations. Here's the
performance:

```
           (type)  Mode  Cnt   Before  Error  After    Error  Units
            array  avgt    7   1.036 ± 0.062   0.261 ± 0.022  ns/op
paged_bytes_array  avgt    7   5.189 ± 0.172   5.317 ± 0.196  ns/op
  composite_256kb  avgt    7  30.792 ± 0.834  11.240 ± 0.387  ns/op
composite_262344b  avgt    7  32.503 ± 1.017  11.155 ± 0.358  ns/op
    composite_1mb  avgt    7  25.189 ± 0.449   8.379 ± 0.193  ns/op
```

The `array` method is how we'll use slices that don't span the edges of
a netty buffer. The `paged_bytes_array` method doesn't really change and
represents the default for internal stuff. I'll bet we could make it
faster too, but I don't know that we use it in the hot path. The
`composite_<size>` method is how we'll be reading large slabs from the
netty byte buffer. We could probably do better if we relied on the sizes
of the buffers being even, but we don't presently do that in the
composite bytes array. The different sizes following `composite` show
that the performance is dominated by the number of slabs in the
composite buffer. `1mb` looks like the largest buffer netty uses.
`256kb` is the smallest. The wild number of bytes intentionally doesn't
line the int up on sensible values. I don't think we'll use sizes like
that but it looks like the performance doesn't make a huge difference.
We're dominated by the buffer choice.
This commit is contained in:
Nik Everett 2022-09-21 11:17:11 -04:00 committed by GitHub
parent 87880d86aa
commit 7ebb09b9f3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 156 additions and 10 deletions

View file

@ -81,19 +81,20 @@ To get realistic results, you should exercise care when running benchmarks. Here
NOTE: Linux only. Sorry Mac and Windows. NOTE: Linux only. Sorry Mac and Windows.
Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and FCML's `hsdis`. Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and the JDK's `hsdis`.
`perf` is generally available via `apg-get install perf` or `pacman -S perf`. FCML is a little more involved. This worked `perf` is generally available via `apg-get install perf` or `pacman -S perf`. `hsdis` you'll want to compile from source. is a little more involved. This worked
on 2020-08-01: on 2020-08-01:
``` ```
wget https://github.com/swojtasiak/fcml-lib/releases/download/v1.2.2/fcml-1.2.2.tar.gz git clone git@github.com:openjdk/jdk.git
tar xf fcml* cd jdk
cd fcml* git checkout jdk-17-ga
./configure cd src/utils/hsdis
make # Get a known good binutils
cd example/hsdis wget https://ftp.gnu.org/gnu/binutils/binutils-2.35.tar.gz
make tar xf binutils-2.35.tar.gz
sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so make BINUTILS=binutils-2.35 ARCH=amd64
sudo cp build/linux-amd64/hsdis-amd64.so /usr/lib/jvm/java-17-openjdk/lib/server/
``` ```
If you want to disassemble a single method do something like this: If you want to disassemble a single method do something like this:

View file

@ -0,0 +1,98 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.benchmark.common.util;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.bytes.CompositeBytesReference;
import org.elasticsearch.common.bytes.PagedBytesReference;
import org.elasticsearch.common.bytes.ReleasableBytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.IntArray;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
@Warmup(iterations = 5)
@Measurement(iterations = 7)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Fork(1)
@OperationsPerInvocation(2621440)
public class IntArrayBenchmark {
static final int SIZE = IntArrayBenchmark.class.getAnnotation(OperationsPerInvocation.class).value();
@Param({ "array", "paged_bytes_array", "composite_256kb", "composite_262344b", "composite_1mb" })
private String type;
private IntArray read;
@Setup
public void init() throws IOException {
IntArray ints = BigArrays.NON_RECYCLING_INSTANCE.newIntArray(SIZE);
for (int i = 0; i < SIZE; i++) {
ints.set(i, i);
}
BytesStreamOutput out = new BytesStreamOutput();
ints.writeTo(out);
read = IntArray.readFrom(new ReleasableBytesReference(bytesImpl(out.bytes()), () -> {}).streamInput());
}
private BytesReference bytesImpl(BytesReference bytes) {
if (type.equals("array")) {
return new BytesArray(bytes.toBytesRef());
}
if (type.equals("paged_bytes_array")) {
if (bytes instanceof PagedBytesReference == false) {
throw new AssertionError("expected PagedBytesReference but saw [" + bytes.getClass() + "]");
}
return bytes;
}
if (type.startsWith("composite_")) {
int size = Math.toIntExact(ByteSizeValue.parseBytesSizeValue(type.substring("composite_".length()), "type").getBytes());
List<BytesReference> references = new ArrayList<>();
for (int from = 0; from < bytes.length(); from += size) {
int sliceSize = Math.min(size, bytes.length() - from);
references.add(new BytesArray(bytes.slice(from, Math.min(from + size, sliceSize)).toBytesRef()));
}
BytesReference ref = CompositeBytesReference.of(references.toArray(BytesReference[]::new));
if (ref instanceof CompositeBytesReference == false) {
throw new AssertionError("expected CompositeBytesReference but saw [" + bytes.getClass() + "]");
}
return ref;
}
throw new IllegalArgumentException("unsupported [type] " + type);
}
@Benchmark
public long read() {
int res = 0;
for (int i = 0; i < SIZE; i++) {
res = res ^ read.get(i);
}
return res;
}
}

View file

@ -10,6 +10,7 @@ package org.elasticsearch.common.bytes;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.util.ByteUtils;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
@ -130,4 +131,9 @@ public final class BytesArray extends AbstractBytesReference {
public void writeTo(OutputStream os) throws IOException { public void writeTo(OutputStream os) throws IOException {
os.write(bytes, offset, length); os.write(bytes, offset, length);
} }
@Override
public int getIntLE(int index) {
return ByteUtils.readIntLE(bytes, offset + index);
}
} }

View file

@ -225,4 +225,16 @@ public final class CompositeBytesReference extends AbstractBytesReference {
public long ramBytesUsed() { public long ramBytesUsed() {
return ramBytesUsed; return ramBytesUsed;
} }
@Override
public int getIntLE(int index) {
int i = getOffsetIndex(index);
int idx = index - offsets[i];
int end = idx + 4;
BytesReference wholeIntLivesHere = references[i];
if (end <= wholeIntLivesHere.length()) {
return wholeIntLivesHere.getIntLE(idx);
}
return super.getIntLE(index);
}
} }

View file

@ -12,6 +12,8 @@ import org.hamcrest.Matchers;
import java.io.IOException; import java.io.IOException;
import static org.hamcrest.Matchers.equalTo;
public class BytesArrayTests extends AbstractBytesReferenceTestCase { public class BytesArrayTests extends AbstractBytesReferenceTestCase {
@Override @Override
@ -55,4 +57,17 @@ public class BytesArrayTests extends AbstractBytesReferenceTestCase {
BytesArray pbr = (BytesArray) newBytesReferenceWithOffsetOfZero(length); BytesArray pbr = (BytesArray) newBytesReferenceWithOffsetOfZero(length);
assertEquals(0, pbr.arrayOffset()); assertEquals(0, pbr.arrayOffset());
} }
public void testGetIntLE() {
BytesReference ref = new BytesArray(new byte[] { 0x00, 0x12, 0x10, 0x12, 0x00, 0x01 }, 1, 5);
assertThat(ref.getIntLE(0), equalTo(0x00121012));
assertThat(ref.getIntLE(1), equalTo(0x01001210));
Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> ref.getIntLE(2));
assertThat(e.getMessage(), equalTo("Index 3 out of bounds for length 3"));
/*
* Wait. 3!? The array has length 6. Well, the var handle stuff
* for arrays just subtracts three - because that's one more than
* the number of bytes in an int. Get it? I'm not sure I do either....
*/
}
} }

View file

@ -149,4 +149,18 @@ public class CompositeBytesReferenceTests extends AbstractBytesReferenceTestCase
equalTo("CompositeBytesReference cannot hold more than 2GB") equalTo("CompositeBytesReference cannot hold more than 2GB")
); );
} }
public void testGetIntLE() {
BytesReference[] refs = new BytesReference[] {
new BytesArray(new byte[] { 0x12, 0x10, 0x12, 0x00 }),
new BytesArray(new byte[] { 0x01, 0x02, 0x03, 0x04 }) };
BytesReference comp = CompositeBytesReference.of(refs);
assertThat(comp.getIntLE(0), equalTo(0x00121012));
assertThat(comp.getIntLE(1), equalTo(0x01001210));
assertThat(comp.getIntLE(2), equalTo(0x02010012));
assertThat(comp.getIntLE(3), equalTo(0x03020100));
assertThat(comp.getIntLE(4), equalTo(0x04030201));
Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> comp.getIntLE(5));
assertThat(e.getMessage(), equalTo("Index 4 out of bounds for length 4"));
}
} }