mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 01:22:26 -04:00
Speed getIntLE
from BytesReference
(#90147)
This speeds up `getIntLE` from `BytesReference` which we'll be using in the upcoming dense representations for aggregations. Here's the performance: ``` (type) Mode Cnt Before Error After Error Units array avgt 7 1.036 ± 0.062 0.261 ± 0.022 ns/op paged_bytes_array avgt 7 5.189 ± 0.172 5.317 ± 0.196 ns/op composite_256kb avgt 7 30.792 ± 0.834 11.240 ± 0.387 ns/op composite_262344b avgt 7 32.503 ± 1.017 11.155 ± 0.358 ns/op composite_1mb avgt 7 25.189 ± 0.449 8.379 ± 0.193 ns/op ``` The `array` method is how we'll use slices that don't span the edges of a netty buffer. The `paged_bytes_array` method doesn't really change and represents the default for internal stuff. I'll bet we could make it faster too, but I don't know that we use it in the hot path. The `composite_<size>` method is how we'll be reading large slabs from the netty byte buffer. We could probably do better if we relied on the sizes of the buffers being even, but we don't presently do that in the composite bytes array. The different sizes following `composite` show that the performance is dominated by the number of slabs in the composite buffer. `1mb` looks like the largest buffer netty uses. `256kb` is the smallest. The wild number of bytes intentionally doesn't line the int up on sensible values. I don't think we'll use sizes like that but it looks like the performance doesn't make a huge difference. We're dominated by the buffer choice.
This commit is contained in:
parent
87880d86aa
commit
7ebb09b9f3
6 changed files with 156 additions and 10 deletions
|
@ -81,19 +81,20 @@ To get realistic results, you should exercise care when running benchmarks. Here
|
|||
|
||||
NOTE: Linux only. Sorry Mac and Windows.
|
||||
|
||||
Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and FCML's `hsdis`.
|
||||
`perf` is generally available via `apg-get install perf` or `pacman -S perf`. FCML is a little more involved. This worked
|
||||
Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and the JDK's `hsdis`.
|
||||
`perf` is generally available via `apg-get install perf` or `pacman -S perf`. `hsdis` you'll want to compile from source. is a little more involved. This worked
|
||||
on 2020-08-01:
|
||||
|
||||
```
|
||||
wget https://github.com/swojtasiak/fcml-lib/releases/download/v1.2.2/fcml-1.2.2.tar.gz
|
||||
tar xf fcml*
|
||||
cd fcml*
|
||||
./configure
|
||||
make
|
||||
cd example/hsdis
|
||||
make
|
||||
sudo cp .libs/libhsdis.so.0.0.0 /usr/lib/jvm/java-14-adoptopenjdk/lib/hsdis-amd64.so
|
||||
git clone git@github.com:openjdk/jdk.git
|
||||
cd jdk
|
||||
git checkout jdk-17-ga
|
||||
cd src/utils/hsdis
|
||||
# Get a known good binutils
|
||||
wget https://ftp.gnu.org/gnu/binutils/binutils-2.35.tar.gz
|
||||
tar xf binutils-2.35.tar.gz
|
||||
make BINUTILS=binutils-2.35 ARCH=amd64
|
||||
sudo cp build/linux-amd64/hsdis-amd64.so /usr/lib/jvm/java-17-openjdk/lib/server/
|
||||
```
|
||||
|
||||
If you want to disassemble a single method do something like this:
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the Elastic License
|
||||
* 2.0 and the Server Side Public License, v 1; you may not use this file except
|
||||
* in compliance with, at your election, the Elastic License 2.0 or the Server
|
||||
* Side Public License, v 1.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.benchmark.common.util;
|
||||
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.bytes.CompositeBytesReference;
|
||||
import org.elasticsearch.common.bytes.PagedBytesReference;
|
||||
import org.elasticsearch.common.bytes.ReleasableBytesReference;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.IntArray;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OperationsPerInvocation;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Warmup(iterations = 5)
|
||||
@Measurement(iterations = 7)
|
||||
@BenchmarkMode(Mode.AverageTime)
|
||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||
@State(Scope.Thread)
|
||||
@Fork(1)
|
||||
@OperationsPerInvocation(2621440)
|
||||
public class IntArrayBenchmark {
|
||||
static final int SIZE = IntArrayBenchmark.class.getAnnotation(OperationsPerInvocation.class).value();
|
||||
|
||||
@Param({ "array", "paged_bytes_array", "composite_256kb", "composite_262344b", "composite_1mb" })
|
||||
private String type;
|
||||
|
||||
private IntArray read;
|
||||
|
||||
@Setup
|
||||
public void init() throws IOException {
|
||||
IntArray ints = BigArrays.NON_RECYCLING_INSTANCE.newIntArray(SIZE);
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
ints.set(i, i);
|
||||
}
|
||||
BytesStreamOutput out = new BytesStreamOutput();
|
||||
ints.writeTo(out);
|
||||
read = IntArray.readFrom(new ReleasableBytesReference(bytesImpl(out.bytes()), () -> {}).streamInput());
|
||||
}
|
||||
|
||||
private BytesReference bytesImpl(BytesReference bytes) {
|
||||
if (type.equals("array")) {
|
||||
return new BytesArray(bytes.toBytesRef());
|
||||
}
|
||||
if (type.equals("paged_bytes_array")) {
|
||||
if (bytes instanceof PagedBytesReference == false) {
|
||||
throw new AssertionError("expected PagedBytesReference but saw [" + bytes.getClass() + "]");
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
if (type.startsWith("composite_")) {
|
||||
int size = Math.toIntExact(ByteSizeValue.parseBytesSizeValue(type.substring("composite_".length()), "type").getBytes());
|
||||
List<BytesReference> references = new ArrayList<>();
|
||||
for (int from = 0; from < bytes.length(); from += size) {
|
||||
int sliceSize = Math.min(size, bytes.length() - from);
|
||||
references.add(new BytesArray(bytes.slice(from, Math.min(from + size, sliceSize)).toBytesRef()));
|
||||
}
|
||||
BytesReference ref = CompositeBytesReference.of(references.toArray(BytesReference[]::new));
|
||||
if (ref instanceof CompositeBytesReference == false) {
|
||||
throw new AssertionError("expected CompositeBytesReference but saw [" + bytes.getClass() + "]");
|
||||
}
|
||||
return ref;
|
||||
}
|
||||
throw new IllegalArgumentException("unsupported [type] " + type);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public long read() {
|
||||
int res = 0;
|
||||
for (int i = 0; i < SIZE; i++) {
|
||||
res = res ^ read.get(i);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
|
@ -10,6 +10,7 @@ package org.elasticsearch.common.bytes;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.util.ByteUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
@ -130,4 +131,9 @@ public final class BytesArray extends AbstractBytesReference {
|
|||
public void writeTo(OutputStream os) throws IOException {
|
||||
os.write(bytes, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getIntLE(int index) {
|
||||
return ByteUtils.readIntLE(bytes, offset + index);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -225,4 +225,16 @@ public final class CompositeBytesReference extends AbstractBytesReference {
|
|||
public long ramBytesUsed() {
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getIntLE(int index) {
|
||||
int i = getOffsetIndex(index);
|
||||
int idx = index - offsets[i];
|
||||
int end = idx + 4;
|
||||
BytesReference wholeIntLivesHere = references[i];
|
||||
if (end <= wholeIntLivesHere.length()) {
|
||||
return wholeIntLivesHere.getIntLE(idx);
|
||||
}
|
||||
return super.getIntLE(index);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,6 +12,8 @@ import org.hamcrest.Matchers;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class BytesArrayTests extends AbstractBytesReferenceTestCase {
|
||||
|
||||
@Override
|
||||
|
@ -55,4 +57,17 @@ public class BytesArrayTests extends AbstractBytesReferenceTestCase {
|
|||
BytesArray pbr = (BytesArray) newBytesReferenceWithOffsetOfZero(length);
|
||||
assertEquals(0, pbr.arrayOffset());
|
||||
}
|
||||
|
||||
public void testGetIntLE() {
|
||||
BytesReference ref = new BytesArray(new byte[] { 0x00, 0x12, 0x10, 0x12, 0x00, 0x01 }, 1, 5);
|
||||
assertThat(ref.getIntLE(0), equalTo(0x00121012));
|
||||
assertThat(ref.getIntLE(1), equalTo(0x01001210));
|
||||
Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> ref.getIntLE(2));
|
||||
assertThat(e.getMessage(), equalTo("Index 3 out of bounds for length 3"));
|
||||
/*
|
||||
* Wait. 3!? The array has length 6. Well, the var handle stuff
|
||||
* for arrays just subtracts three - because that's one more than
|
||||
* the number of bytes in an int. Get it? I'm not sure I do either....
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
|
|
@ -149,4 +149,18 @@ public class CompositeBytesReferenceTests extends AbstractBytesReferenceTestCase
|
|||
equalTo("CompositeBytesReference cannot hold more than 2GB")
|
||||
);
|
||||
}
|
||||
|
||||
public void testGetIntLE() {
|
||||
BytesReference[] refs = new BytesReference[] {
|
||||
new BytesArray(new byte[] { 0x12, 0x10, 0x12, 0x00 }),
|
||||
new BytesArray(new byte[] { 0x01, 0x02, 0x03, 0x04 }) };
|
||||
BytesReference comp = CompositeBytesReference.of(refs);
|
||||
assertThat(comp.getIntLE(0), equalTo(0x00121012));
|
||||
assertThat(comp.getIntLE(1), equalTo(0x01001210));
|
||||
assertThat(comp.getIntLE(2), equalTo(0x02010012));
|
||||
assertThat(comp.getIntLE(3), equalTo(0x03020100));
|
||||
assertThat(comp.getIntLE(4), equalTo(0x04030201));
|
||||
Exception e = expectThrows(ArrayIndexOutOfBoundsException.class, () -> comp.getIntLE(5));
|
||||
assertThat(e.getMessage(), equalTo("Index 4 out of bounds for length 4"));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue