diff --git a/benchmarks/README.md b/benchmarks/README.md index 0cf95a2e81b9..af72d16d2ad4 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -82,19 +82,21 @@ To get realistic results, you should exercise care when running benchmarks. Here NOTE: Linux only. Sorry Mac and Windows. Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and the JDK's `hsdis`. -`perf` is generally available via `apg-get install perf` or `pacman -S perf`. `hsdis` you'll want to compile from source. is a little more involved. This worked +`perf` is generally available via `apg-get install perf` or `pacman -S perf linux-tools`. `hsdis` you'll want to compile from source. is a little more involved. This worked on 2020-08-01: ``` git clone git@github.com:openjdk/jdk.git cd jdk -git checkout jdk-17-ga -cd src/utils/hsdis +git checkout jdk-24-ga # Get a known good binutils wget https://ftp.gnu.org/gnu/binutils/binutils-2.35.tar.gz tar xf binutils-2.35.tar.gz -make BINUTILS=binutils-2.35 ARCH=amd64 -sudo cp build/linux-amd64/hsdis-amd64.so /usr/lib/jvm/java-17-openjdk/lib/server/ +bash configure --with-hsdis=binutils --with-binutils-src=binutils-2.35 \ + --with-boot-jdk=~/.gradle/jdks/oracle_corporation-24-amd64-linux.2 +make build-hsdis +cp ./build/linux-x86_64-server-release/jdk/lib/hsdis-amd64.so \ + ~/.gradle/jdks/oracle_corporation-24-amd64-linux.2/lib/hsdis.so ``` If you want to disassemble a single method do something like this: @@ -105,6 +107,30 @@ gradlew -p benchmarks run --args ' MemoryStatsBenchmark -jvmArgs "-XX:+UnlockDia If you want `perf` to find the hot methods for you, then do add `-prof perfasm`. +NOTE: `perfasm` will need more access: +``` +sudo bash +echo -1 > /proc/sys/kernel/perf_event_paranoid +exit +``` + +If you get warnings like: +``` +The perf event count is suspiciously low (0). +``` +then check if you are bumping into [this](https://man.archlinux.org/man/perf-stat.1.en#INTEL_HYBRID_SUPPORT) +by running: +``` +perf stat -B dd if=/dev/zero of=/dev/null count=1000000 +``` + +If you see lines like: +``` + 765019980 cpu_atom/cycles/ # 1.728 GHz (0.60%) + 2258845959 cpu_core/cycles/ # 5.103 GHz (99.18%) +``` +then `perf` is just not going to work for you. + ## Async Profiler Note: Linux and Mac only. Sorry Windows. diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ParseIpBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ParseIpBenchmark.java new file mode 100644 index 000000000000..73e3986cc165 --- /dev/null +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ParseIpBenchmark.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.benchmark.compute.operator; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.NoopCircuitBreaker; +import org.elasticsearch.common.network.InetAddresses; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ParseIp; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.net.InetAddress; +import java.util.concurrent.TimeUnit; + +@Warmup(iterations = 5) +@Measurement(iterations = 7) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@State(Scope.Thread) +@Fork(1) +public class ParseIpBenchmark { + private final BytesRef ip = new BytesRef("192.168.0.1"); + private final BreakingBytesRefBuilder scratch = ParseIp.buildScratch(new NoopCircuitBreaker("request")); + + @Benchmark + public BytesRef leadingZerosRejected() { + return ParseIp.leadingZerosRejected(ip, scratch); + } + + @Benchmark + public BytesRef leadingZerosAreDecimal() { + return ParseIp.leadingZerosAreDecimal(ip, scratch); + } + + @Benchmark + public BytesRef leadingZerosAreOctal() { + return ParseIp.leadingZerosAreOctal(ip, scratch); + } + + @Benchmark + public BytesRef original() { + InetAddress inetAddress = InetAddresses.forString(ip.utf8ToString()); + return new BytesRef(InetAddressPoint.encode(inetAddress)); + } +} diff --git a/docs/changelog/126338.yaml b/docs/changelog/126338.yaml new file mode 100644 index 000000000000..b37086dd7495 --- /dev/null +++ b/docs/changelog/126338.yaml @@ -0,0 +1,5 @@ +pr: 126338 +summary: Speed up TO_IP +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosAreDecimalEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosAreDecimalEvaluator.java new file mode 100644 index 000000000000..b4f732dab404 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosAreDecimalEvaluator.java @@ -0,0 +1,149 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.util.function.Function; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ParseIp}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ParseIpLeadingZerosAreDecimalEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private final EvalOperator.ExpressionEvaluator string; + + private final BreakingBytesRefBuilder scratch; + + public ParseIpLeadingZerosAreDecimalEvaluator(Source source, + EvalOperator.ExpressionEvaluator string, BreakingBytesRefBuilder scratch, + DriverContext driverContext) { + super(driverContext, source); + this.string = string; + this.scratch = scratch; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return string; + } + + @Override + public Block evalVector(Vector v) { + BytesRefVector vector = (BytesRefVector) v; + int positionCount = v.getPositionCount(); + BytesRef scratchPad = new BytesRef(); + if (vector.isConstant()) { + try { + return driverContext.blockFactory().newConstantBytesRefBlockWith(evalValue(vector, 0, scratchPad), positionCount); + } catch (IllegalArgumentException e) { + registerException(e); + return driverContext.blockFactory().newConstantNullBlock(positionCount); + } + } + try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + try { + builder.appendBytesRef(evalValue(vector, p, scratchPad)); + } catch (IllegalArgumentException e) { + registerException(e); + builder.appendNull(); + } + } + return builder.build(); + } + } + + private BytesRef evalValue(BytesRefVector container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ParseIp.leadingZerosAreDecimal(value, this.scratch); + } + + @Override + public Block evalBlock(Block b) { + BytesRefBlock block = (BytesRefBlock) b; + int positionCount = block.getPositionCount(); + try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef scratchPad = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + try { + BytesRef value = evalValue(block, i, scratchPad); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendBytesRef(value); + valuesAppended = true; + } catch (IllegalArgumentException e) { + registerException(e); + } + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private BytesRef evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ParseIp.leadingZerosAreDecimal(value, this.scratch); + } + + @Override + public String toString() { + return "ParseIpLeadingZerosAreDecimalEvaluator[" + "string=" + string + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(string, scratch); + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory string; + + private final Function scratch; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory string, + Function scratch) { + this.source = source; + this.string = string; + this.scratch = scratch; + } + + @Override + public ParseIpLeadingZerosAreDecimalEvaluator get(DriverContext context) { + return new ParseIpLeadingZerosAreDecimalEvaluator(source, string.get(context), scratch.apply(context), context); + } + + @Override + public String toString() { + return "ParseIpLeadingZerosAreDecimalEvaluator[" + "string=" + string + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIPFromStringEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosAreOctalEvaluator.java similarity index 74% rename from x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIPFromStringEvaluator.java rename to x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosAreOctalEvaluator.java index 0463f20d4b65..009cdfa0a202 100644 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIPFromStringEvaluator.java +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosAreOctalEvaluator.java @@ -7,32 +7,38 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.convert; import java.lang.IllegalArgumentException; import java.lang.Override; import java.lang.String; +import java.util.function.Function; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; import org.elasticsearch.compute.operator.DriverContext; import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.core.Releasables; import org.elasticsearch.xpack.esql.core.tree.Source; /** - * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToIP}. + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ParseIp}. * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. */ -public final class ToIPFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator { - private final EvalOperator.ExpressionEvaluator asString; +public final class ParseIpLeadingZerosAreOctalEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private final EvalOperator.ExpressionEvaluator string; - public ToIPFromStringEvaluator(Source source, EvalOperator.ExpressionEvaluator asString, + private final BreakingBytesRefBuilder scratch; + + public ParseIpLeadingZerosAreOctalEvaluator(Source source, + EvalOperator.ExpressionEvaluator string, BreakingBytesRefBuilder scratch, DriverContext driverContext) { super(driverContext, source); - this.asString = asString; + this.string = string; + this.scratch = scratch; } @Override public EvalOperator.ExpressionEvaluator next() { - return asString; + return string; } @Override @@ -63,7 +69,7 @@ public final class ToIPFromStringEvaluator extends AbstractConvertFunction.Abstr private BytesRef evalValue(BytesRefVector container, int index, BytesRef scratchPad) { BytesRef value = container.getBytesRef(index, scratchPad); - return ToIP.fromKeyword(value); + return ParseIp.leadingZerosAreOctal(value, this.scratch); } @Override @@ -103,37 +109,41 @@ public final class ToIPFromStringEvaluator extends AbstractConvertFunction.Abstr private BytesRef evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { BytesRef value = container.getBytesRef(index, scratchPad); - return ToIP.fromKeyword(value); + return ParseIp.leadingZerosAreOctal(value, this.scratch); } @Override public String toString() { - return "ToIPFromStringEvaluator[" + "asString=" + asString + "]"; + return "ParseIpLeadingZerosAreOctalEvaluator[" + "string=" + string + "]"; } @Override public void close() { - Releasables.closeExpectNoException(asString); + Releasables.closeExpectNoException(string, scratch); } public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { private final Source source; - private final EvalOperator.ExpressionEvaluator.Factory asString; + private final EvalOperator.ExpressionEvaluator.Factory string; - public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory asString) { + private final Function scratch; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory string, + Function scratch) { this.source = source; - this.asString = asString; + this.string = string; + this.scratch = scratch; } @Override - public ToIPFromStringEvaluator get(DriverContext context) { - return new ToIPFromStringEvaluator(source, asString.get(context), context); + public ParseIpLeadingZerosAreOctalEvaluator get(DriverContext context) { + return new ParseIpLeadingZerosAreOctalEvaluator(source, string.get(context), scratch.apply(context), context); } @Override public String toString() { - return "ToIPFromStringEvaluator[" + "asString=" + asString + "]"; + return "ParseIpLeadingZerosAreOctalEvaluator[" + "string=" + string + "]"; } } } diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosRejectedEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosRejectedEvaluator.java new file mode 100644 index 000000000000..f826ec26e8e9 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpLeadingZerosRejectedEvaluator.java @@ -0,0 +1,149 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.util.function.Function; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.Vector; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link ParseIp}. + * This class is generated. Edit {@code ConvertEvaluatorImplementer} instead. + */ +public final class ParseIpLeadingZerosRejectedEvaluator extends AbstractConvertFunction.AbstractEvaluator { + private final EvalOperator.ExpressionEvaluator string; + + private final BreakingBytesRefBuilder scratch; + + public ParseIpLeadingZerosRejectedEvaluator(Source source, + EvalOperator.ExpressionEvaluator string, BreakingBytesRefBuilder scratch, + DriverContext driverContext) { + super(driverContext, source); + this.string = string; + this.scratch = scratch; + } + + @Override + public EvalOperator.ExpressionEvaluator next() { + return string; + } + + @Override + public Block evalVector(Vector v) { + BytesRefVector vector = (BytesRefVector) v; + int positionCount = v.getPositionCount(); + BytesRef scratchPad = new BytesRef(); + if (vector.isConstant()) { + try { + return driverContext.blockFactory().newConstantBytesRefBlockWith(evalValue(vector, 0, scratchPad), positionCount); + } catch (IllegalArgumentException e) { + registerException(e); + return driverContext.blockFactory().newConstantNullBlock(positionCount); + } + } + try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + for (int p = 0; p < positionCount; p++) { + try { + builder.appendBytesRef(evalValue(vector, p, scratchPad)); + } catch (IllegalArgumentException e) { + registerException(e); + builder.appendNull(); + } + } + return builder.build(); + } + } + + private BytesRef evalValue(BytesRefVector container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ParseIp.leadingZerosRejected(value, this.scratch); + } + + @Override + public Block evalBlock(Block b) { + BytesRefBlock block = (BytesRefBlock) b; + int positionCount = block.getPositionCount(); + try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) { + BytesRef scratchPad = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + int valueCount = block.getValueCount(p); + int start = block.getFirstValueIndex(p); + int end = start + valueCount; + boolean positionOpened = false; + boolean valuesAppended = false; + for (int i = start; i < end; i++) { + try { + BytesRef value = evalValue(block, i, scratchPad); + if (positionOpened == false && valueCount > 1) { + builder.beginPositionEntry(); + positionOpened = true; + } + builder.appendBytesRef(value); + valuesAppended = true; + } catch (IllegalArgumentException e) { + registerException(e); + } + } + if (valuesAppended == false) { + builder.appendNull(); + } else if (positionOpened) { + builder.endPositionEntry(); + } + } + return builder.build(); + } + } + + private BytesRef evalValue(BytesRefBlock container, int index, BytesRef scratchPad) { + BytesRef value = container.getBytesRef(index, scratchPad); + return ParseIp.leadingZerosRejected(value, this.scratch); + } + + @Override + public String toString() { + return "ParseIpLeadingZerosRejectedEvaluator[" + "string=" + string + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(string, scratch); + } + + public static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory string; + + private final Function scratch; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory string, + Function scratch) { + this.source = source; + this.string = string; + this.scratch = scratch; + } + + @Override + public ParseIpLeadingZerosRejectedEvaluator get(DriverContext context) { + return new ParseIpLeadingZerosRejectedEvaluator(source, string.get(context), scratch.apply(context), context); + } + + @Override + public String toString() { + return "ParseIpLeadingZerosRejectedEvaluator[" + "string=" + string + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIp.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIp.java new file mode 100644 index 000000000000..e83c85614f24 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIp.java @@ -0,0 +1,232 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.network.InetAddresses; +import org.elasticsearch.compute.ann.ConvertEvaluator; +import org.elasticsearch.compute.ann.Fixed; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.compute.operator.EvalOperator; + +import java.net.InetAddress; + +/** + * Fast IP parsing suitable for embedding in an {@link EvalOperator.ExpressionEvaluator} + * because they don't allocate memory on every run. Instead, it converts directly from + * utf-8 encoded strings into {@link InetAddressPoint} encoded ips. + *

+ * This contains three parsing methods to handle the three ways ipv4 addresses + * have historically handled leading 0s, namely, {@link #leadingZerosRejected reject} them, + * treat them as {@link #leadingZerosAreDecimal decimal} numbers, and treat them as + * {@link #leadingZerosAreOctal} numbers. + *

+ *

+ * Note: We say "directly from utf-8" but, really, all of the digits in an ip are + * in the traditional 7-bit ascii range where utf-8 overlaps. So we just treat everything + * as 7-bit ascii. Anything that isn't in the range is an invalid ip anyway. Much love + * for the designers of utf-8 for making it this way. + *

+ */ +public class ParseIp { + private static final byte[] IPV4_PREFIX = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1 }; + + static final AbstractConvertFunction.BuildFactory FROM_KEYWORD_LEADING_ZEROS_REJECTED = (source, field) -> { + return new ParseIpLeadingZerosRejectedEvaluator.Factory(source, field, driverContext -> buildScratch(driverContext.breaker())); + }; + + public static BreakingBytesRefBuilder buildScratch(CircuitBreaker breaker) { + BreakingBytesRefBuilder scratch = new BreakingBytesRefBuilder(breaker, "to_ip", 16); + scratch.setLength(InetAddressPoint.BYTES); + return scratch; + } + + /** + * Parse an IP address, rejecting v4 addresses with leading 0s. This aligns + * exactly with {@link InetAddresses#forString(String)}. + *
    + *
  • 192.168.1.1 : valid
  • + *
  • 192.168.0.1 : valid
  • + *
  • 192.168.01.1 : invalid
  • + *
+ * @param scratch A "scratch" memory space build by {@link #buildScratch} + */ + @ConvertEvaluator(extraName = "LeadingZerosRejected", warnExceptions = { IllegalArgumentException.class }) + public static BytesRef leadingZerosRejected( + BytesRef string, + @Fixed(includeInToString = false, scope = Fixed.Scope.THREAD_LOCAL) BreakingBytesRefBuilder scratch + ) { + /* + * If this is an ipv6 address then delegate to InetAddresses.forString + * because we don't have anything nice for parsing those. + */ + int end = string.offset + string.length; + if (isV6(string, end)) { + InetAddress inetAddress = InetAddresses.forString(string.utf8ToString()); + return new BytesRef(InetAddressPoint.encode(inetAddress)); + } + + System.arraycopy(IPV4_PREFIX, 0, scratch.bytes(), 0, IPV4_PREFIX.length); + int offset = string.offset; + for (int dest = IPV4_PREFIX.length; dest < InetAddressPoint.BYTES; dest++) { + if (offset >= end) { + throw invalid(string); + } + if (string.bytes[offset] == '0') { + // Lone zeros are just 0, but a 0 with numbers after it are invalid + offset++; + if (offset == end || string.bytes[offset] == '.') { + scratch.bytes()[dest] = (byte) 0; + offset++; + continue; + } + throw invalid(string); + } + int v = digit(string, offset++); + while (offset < end && string.bytes[offset] != '.') { + v = v * 10 + digit(string, offset++); + } + offset++; + if (v > 255) { + throw invalid(string); + } + scratch.bytes()[dest] = (byte) v; + } + return scratch.bytesRefView(); + } + + /** + * Parse an IP address, interpreting v4 addresses with leading 0s as + * decimal numbers. + *
    + *
  • 192.168.1.1 : valid
  • + *
  • 192.168.0.1 : valid
  • + *
  • 192.168.01.1 : valid
  • + *
  • 192.168.09.1 : valid
  • + *
  • 192.168.010.1 : valid
  • + *
+ * @param scratch A "scratch" memory space build by {@link #buildScratch} + */ + @ConvertEvaluator(extraName = "LeadingZerosAreDecimal", warnExceptions = { IllegalArgumentException.class }) + public static BytesRef leadingZerosAreDecimal( + BytesRef string, + @Fixed(includeInToString = false, scope = Fixed.Scope.THREAD_LOCAL) BreakingBytesRefBuilder scratch + ) { + /* + * If this is an ipv6 address then delegate to InetAddresses.forString + * because we don't have anything nice for parsing those. + */ + int end = string.offset + string.length; + if (isV6(string, end)) { + InetAddress inetAddress = InetAddresses.forString(string.utf8ToString()); + return new BytesRef(InetAddressPoint.encode(inetAddress)); + } + + System.arraycopy(IPV4_PREFIX, 0, scratch.bytes(), 0, IPV4_PREFIX.length); + int offset = string.offset; + for (int dest = IPV4_PREFIX.length; dest < InetAddressPoint.BYTES; dest++) { + if (offset >= end) { + throw invalid(string); + } + int v = digit(string, offset++); + while (offset < end && string.bytes[offset] != '.') { + v = v * 10 + digit(string, offset++); + } + offset++; + if (v > 255) { + throw invalid(string); + } + scratch.bytes()[dest] = (byte) v; + } + return scratch.bytesRefView(); + } + + /** + * Parse an IP address, interpreting v4 addresses with leading 0s as + * octal numbers. + *
    + *
  • 192.168.1.1 : valid
  • + *
  • 192.168.0.1 : valid
  • + *
  • 192.168.01.1 : valid
  • + *
  • 192.168.09.1 : invalid
  • + *
  • 192.168.010.1 : valid but would print as 192.168.8.1
  • + *
+ * @param scratch A "scratch" memory space build by {@link #buildScratch} + */ + @ConvertEvaluator(extraName = "LeadingZerosAreOctal", warnExceptions = { IllegalArgumentException.class }) + public static BytesRef leadingZerosAreOctal( + BytesRef string, + @Fixed(includeInToString = false, scope = Fixed.Scope.THREAD_LOCAL) BreakingBytesRefBuilder scratch + ) { + /* + * If this is an ipv6 address then delegate to InetAddresses.forString + * because we don't have anything nice for parsing those. + */ + int end = string.offset + string.length; + if (isV6(string, end)) { + InetAddress inetAddress = InetAddresses.forString(string.utf8ToString()); + return new BytesRef(InetAddressPoint.encode(inetAddress)); + } + + System.arraycopy(IPV4_PREFIX, 0, scratch.bytes(), 0, IPV4_PREFIX.length); + int offset = string.offset; + for (int dest = IPV4_PREFIX.length; dest < InetAddressPoint.BYTES; dest++) { + if (offset >= end) { + throw invalid(string); + } + int v; + if (string.bytes[offset] == '0') { + // Octal + offset++; + v = 0; + while (offset < end && string.bytes[offset] != '.') { + v = v * 8 + octalDigit(string, offset++); + } + offset++; + } else { + // Decimal + v = digit(string, offset++); + while (offset < end && string.bytes[offset] != '.') { + v = v * 10 + digit(string, offset++); + } + offset++; + } + scratch.bytes()[dest] = (byte) v; + } + return scratch.bytesRefView(); + } + + private static int digit(BytesRef string, int offset) { + if (string.bytes[offset] < '0' && '9' < string.bytes[offset]) { + throw invalid(string); + } + return string.bytes[offset] - '0'; + } + + private static int octalDigit(BytesRef string, int offset) { + if (string.bytes[offset] < '0' && '7' < string.bytes[offset]) { + throw invalid(string); + } + return string.bytes[offset] - '0'; + } + + private static IllegalArgumentException invalid(BytesRef string) { + return new IllegalArgumentException("'" + string.utf8ToString() + "' is not an IP string literal."); + } + + private static boolean isV6(BytesRef string, int end) { + for (int i = string.offset; i < end; i++) { + if (string.bytes[i] == ':') { + return true; + } + } + return false; + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIP.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIP.java index a3ea6948739c..77f26269b1c7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIP.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIP.java @@ -7,10 +7,8 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.convert; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.compute.ann.ConvertEvaluator; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -26,15 +24,15 @@ import java.util.Map; import static org.elasticsearch.xpack.esql.core.type.DataType.IP; import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; -import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToIP; +import static org.elasticsearch.xpack.esql.expression.function.scalar.convert.ParseIp.FROM_KEYWORD_LEADING_ZEROS_REJECTED; public class ToIP extends AbstractConvertFunction { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "ToIP", ToIP::new); private static final Map EVALUATORS = Map.ofEntries( Map.entry(IP, (source, field) -> field), - Map.entry(KEYWORD, ToIPFromStringEvaluator.Factory::new), - Map.entry(TEXT, ToIPFromStringEvaluator.Factory::new) + Map.entry(KEYWORD, FROM_KEYWORD_LEADING_ZEROS_REJECTED), + Map.entry(TEXT, FROM_KEYWORD_LEADING_ZEROS_REJECTED) ); @FunctionInfo( @@ -90,9 +88,4 @@ public class ToIP extends AbstractConvertFunction { protected NodeInfo info() { return NodeInfo.create(this, ToIP::new, field()); } - - @ConvertEvaluator(extraName = "FromString", warnExceptions = { IllegalArgumentException.class }) - static BytesRef fromKeyword(BytesRef asString) { - return stringToIP(asString); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpTests.java new file mode 100644 index 000000000000..1a7a78a880db --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ParseIpTests.java @@ -0,0 +1,206 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.convert; + +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.document.InetAddressPoint; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.NoopCircuitBreaker; +import org.elasticsearch.common.network.InetAddresses; +import org.elasticsearch.common.network.NetworkAddress; +import org.elasticsearch.compute.operator.BreakingBytesRefBuilder; +import org.elasticsearch.test.ESTestCase; + +import java.net.InetAddress; +import java.util.List; +import java.util.function.BiFunction; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +public class ParseIpTests extends ESTestCase { + @ParametersFactory(argumentFormatting = "%s") + public static Iterable parameters() { + List strs = List.of( + new TestCase("192.168.1.1", true, true, true), + new TestCase("192.168.0.1", true, true, true), + new TestCase("255.255.255.255", true, true, true), + new TestCase("1.1.1.1", true, true, true), + new TestCase("0.0.0.0", true, true, true), + + new TestCase("192.168.01.1", false, true, true), + new TestCase("192.168.0255.1", false, true, true), + + new TestCase("1", false, false, false), + new TestCase("0", false, false, false), + new TestCase("255.1", false, false, false), + new TestCase("255.0", false, false, false), + new TestCase("255.255.1", false, false, false), + new TestCase("255.255.0", false, false, false), + new TestCase(new Supplier<>() { + @Override + public String get() { + return NetworkAddress.format(randomIp(true)); + } + + @Override + public String toString() { + return "v4"; + } + }, true, true, true), + new TestCase(new Supplier<>() { + @Override + public String get() { + return NetworkAddress.format(randomIp(false)); + } + + @Override + public String toString() { + return "v6"; + } + }, true, true, true) + ); + return strs.stream().map(s -> new Object[] { s }).toList(); + } + + private record TestCase( + Supplier str, + boolean validLeadingZerosRejected, + boolean validLeadingZerosAreDecimal, + boolean validLeadingZerosAreOctal + ) { + TestCase(String str, boolean validLeadingZerosRejected, boolean validLeadingZerosAreDecimal, boolean validLeadingZerosAreOctal) { + this(new Supplier<>() { + @Override + public String get() { + return str; + } + + @Override + public String toString() { + return str; + } + }, validLeadingZerosRejected, validLeadingZerosAreDecimal, validLeadingZerosAreOctal); + } + } + + private final TestCase testCase; + private final String str; + + public ParseIpTests(TestCase testCase) { + this.testCase = testCase; + this.str = testCase.str.get(); + } + + public void testLeadingZerosRejecting() { + if (testCase.validLeadingZerosRejected) { + InetAddress inetAddress = InetAddresses.forString(str); + BytesRef expected = new BytesRef(InetAddressPoint.encode(inetAddress)); + success(ParseIp::leadingZerosRejected, expected); + } else { + failure(ParseIp::leadingZerosRejected); + } + } + + public void testLeadingZerosAreDecimal() { + if (testCase.validLeadingZerosAreDecimal) { + InetAddress inetAddress = InetAddresses.forString(leadingZerosAreDecimalToIp(str)); + BytesRef expected = new BytesRef(InetAddressPoint.encode(inetAddress)); + success(ParseIp::leadingZerosAreDecimal, expected); + } else { + failure(ParseIp::leadingZerosAreDecimal); + } + } + + public void testLeadingZerosAreOctal() { + if (testCase.validLeadingZerosAreOctal) { + InetAddress inetAddress = InetAddresses.forString(leadingZerosAreOctalToIp(str)); + BytesRef expected = new BytesRef(InetAddressPoint.encode(inetAddress)); + success(ParseIp::leadingZerosAreOctal, expected); + } else { + failure(ParseIp::leadingZerosAreOctal); + } + } + + private void success(BiFunction fn, BytesRef expected) { + try (BreakingBytesRefBuilder scratch = ParseIp.buildScratch(new NoopCircuitBreaker("request"))) { + assertThat(fn.apply(new BytesRef(str), scratch), equalTo(expected)); + } + } + + private void failure(BiFunction fn) { + try (BreakingBytesRefBuilder scratch = ParseIp.buildScratch(new NoopCircuitBreaker("request"))) { + Exception thrown = expectThrows(IllegalArgumentException.class, () -> fn.apply(new BytesRef(str), scratch)); + assertThat(thrown.getMessage(), equalTo("'" + str + "' is not an IP string literal.")); + } + } + + public static String leadingZerosAreDecimalToIp(String ip) { + if (ip.contains(":")) { + // v6 ip, don't change it. + return ip; + } + StringBuilder b = new StringBuilder(); + boolean lastWasBreak = true; + boolean lastWasZero = false; + for (int i = 0; i < ip.length(); i++) { + char c = ip.charAt(i); + if (lastWasBreak && c == '0') { + lastWasZero = true; + continue; + } + if (c == '.') { + if (lastWasZero) { + b.append('0'); + } + lastWasBreak = true; + } else { + lastWasBreak = false; + } + lastWasZero = false; + b.append(c); + } + if (lastWasZero) { + b.append('0'); + } + return b.toString(); + } + + public static String leadingZerosAreOctalToIp(String ip) { + if (ip.contains(":")) { + // v6 ip, don't change it. + return ip; + } + StringBuilder b = new StringBuilder(); + boolean lastWasBreak = true; + boolean octalMode = false; + int current = 0; + for (int i = 0; i < ip.length(); i++) { + char c = ip.charAt(i); + if (lastWasBreak && c == '0') { + octalMode = true; + continue; + } + if (c == '.') { + lastWasBreak = true; + b.append(current).append('.'); + current = 0; + continue; + } + lastWasBreak = false; + if (octalMode) { + current = current * 8 + (c - '0'); + } else { + current = current * 10 + (c - '0'); + } + } + b.append(current); + return b.toString(); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIPTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIPTests.java index 6f9c5bbfd07c..e666d7c6defe 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIPTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/convert/ToIPTests.java @@ -34,7 +34,7 @@ public class ToIPTests extends AbstractScalarFunctionTestCase { @ParametersFactory public static Iterable parameters() { String read = "Attribute[channel=0]"; - String stringEvaluator = "ToIPFromStringEvaluator[asString=" + read + "]"; + String stringEvaluator = "ParseIpLeadingZerosRejectedEvaluator[string=" + read + "]"; List suppliers = new ArrayList<>(); // convert from IP to IP