ESQL: Speed up TO_IP (#126338)

Speed up the TO_IP method by converting directly from utf-8 encoded
strings to the ip encoding. Previously we did:
```
utf-8 -> String -> INetAddress -> ip encoding
```

In a step towards solving #125460 this creates three IP parsing
functions, one the rejects leading zeros, one that interprets leading
zeros as decimal numbers, and one the interprets leading zeros as octal
numbers. IPs have historically been parsed in all three of those ways.

This plugs the "rejects leading zeros" parser into `TO_IP` because
that's the behavior it had before.

Here is the performance:
```
Benchmark               Score    Error  Units
leadingZerosAreDecimal  14.007 ± 0.093  ns/op
leadingZerosAreOctal    15.020 ± 0.373  ns/op
leadingZerosRejected    14.176 ± 3.861  ns/op
original                32.950 ± 1.062  ns/op
```

So this is roughly 45% faster than what we had.
This commit is contained in:
Nik Everett 2025-04-07 09:34:53 -04:00 committed by GitHub
parent 72066ea49f
commit 7e1e45eaa4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 863 additions and 32 deletions

View file

@ -82,19 +82,21 @@ To get realistic results, you should exercise care when running benchmarks. Here
NOTE: Linux only. Sorry Mac and Windows.
Disassembling is fun! Maybe not always useful, but always fun! Generally, you'll want to install `perf` and the JDK's `hsdis`.
`perf` is generally available via `apg-get install perf` or `pacman -S perf`. `hsdis` you'll want to compile from source. is a little more involved. This worked
`perf` is generally available via `apg-get install perf` or `pacman -S perf linux-tools`. `hsdis` you'll want to compile from source. is a little more involved. This worked
on 2020-08-01:
```
git clone git@github.com:openjdk/jdk.git
cd jdk
git checkout jdk-17-ga
cd src/utils/hsdis
git checkout jdk-24-ga
# Get a known good binutils
wget https://ftp.gnu.org/gnu/binutils/binutils-2.35.tar.gz
tar xf binutils-2.35.tar.gz
make BINUTILS=binutils-2.35 ARCH=amd64
sudo cp build/linux-amd64/hsdis-amd64.so /usr/lib/jvm/java-17-openjdk/lib/server/
bash configure --with-hsdis=binutils --with-binutils-src=binutils-2.35 \
--with-boot-jdk=~/.gradle/jdks/oracle_corporation-24-amd64-linux.2
make build-hsdis
cp ./build/linux-x86_64-server-release/jdk/lib/hsdis-amd64.so \
~/.gradle/jdks/oracle_corporation-24-amd64-linux.2/lib/hsdis.so
```
If you want to disassemble a single method do something like this:
@ -105,6 +107,30 @@ gradlew -p benchmarks run --args ' MemoryStatsBenchmark -jvmArgs "-XX:+UnlockDia
If you want `perf` to find the hot methods for you, then do add `-prof perfasm`.
NOTE: `perfasm` will need more access:
```
sudo bash
echo -1 > /proc/sys/kernel/perf_event_paranoid
exit
```
If you get warnings like:
```
The perf event count is suspiciously low (0).
```
then check if you are bumping into [this](https://man.archlinux.org/man/perf-stat.1.en#INTEL_HYBRID_SUPPORT)
by running:
```
perf stat -B dd if=/dev/zero of=/dev/null count=1000000
```
If you see lines like:
```
765019980 cpu_atom/cycles/ # 1.728 GHz (0.60%)
2258845959 cpu_core/cycles/ # 5.103 GHz (99.18%)
```
then `perf` is just not going to work for you.
## Async Profiler
Note: Linux and Mac only. Sorry Windows.

View file

@ -0,0 +1,61 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.benchmark.compute.operator;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ParseIp;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.net.InetAddress;
import java.util.concurrent.TimeUnit;
@Warmup(iterations = 5)
@Measurement(iterations = 7)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
@Fork(1)
public class ParseIpBenchmark {
private final BytesRef ip = new BytesRef("192.168.0.1");
private final BreakingBytesRefBuilder scratch = ParseIp.buildScratch(new NoopCircuitBreaker("request"));
@Benchmark
public BytesRef leadingZerosRejected() {
return ParseIp.leadingZerosRejected(ip, scratch);
}
@Benchmark
public BytesRef leadingZerosAreDecimal() {
return ParseIp.leadingZerosAreDecimal(ip, scratch);
}
@Benchmark
public BytesRef leadingZerosAreOctal() {
return ParseIp.leadingZerosAreOctal(ip, scratch);
}
@Benchmark
public BytesRef original() {
InetAddress inetAddress = InetAddresses.forString(ip.utf8ToString());
return new BytesRef(InetAddressPoint.encode(inetAddress));
}
}

View file

@ -0,0 +1,5 @@
pr: 126338
summary: Speed up TO_IP
area: ES|QL
type: enhancement
issues: []

View file

@ -0,0 +1,149 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.convert;
import java.lang.IllegalArgumentException;
import java.lang.Override;
import java.lang.String;
import java.util.function.Function;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.xpack.esql.core.tree.Source;
/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link ParseIp}.
* This class is generated. Edit {@code ConvertEvaluatorImplementer} instead.
*/
public final class ParseIpLeadingZerosAreDecimalEvaluator extends AbstractConvertFunction.AbstractEvaluator {
private final EvalOperator.ExpressionEvaluator string;
private final BreakingBytesRefBuilder scratch;
public ParseIpLeadingZerosAreDecimalEvaluator(Source source,
EvalOperator.ExpressionEvaluator string, BreakingBytesRefBuilder scratch,
DriverContext driverContext) {
super(driverContext, source);
this.string = string;
this.scratch = scratch;
}
@Override
public EvalOperator.ExpressionEvaluator next() {
return string;
}
@Override
public Block evalVector(Vector v) {
BytesRefVector vector = (BytesRefVector) v;
int positionCount = v.getPositionCount();
BytesRef scratchPad = new BytesRef();
if (vector.isConstant()) {
try {
return driverContext.blockFactory().newConstantBytesRefBlockWith(evalValue(vector, 0, scratchPad), positionCount);
} catch (IllegalArgumentException e) {
registerException(e);
return driverContext.blockFactory().newConstantNullBlock(positionCount);
}
}
try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
for (int p = 0; p < positionCount; p++) {
try {
builder.appendBytesRef(evalValue(vector, p, scratchPad));
} catch (IllegalArgumentException e) {
registerException(e);
builder.appendNull();
}
}
return builder.build();
}
}
private BytesRef evalValue(BytesRefVector container, int index, BytesRef scratchPad) {
BytesRef value = container.getBytesRef(index, scratchPad);
return ParseIp.leadingZerosAreDecimal(value, this.scratch);
}
@Override
public Block evalBlock(Block b) {
BytesRefBlock block = (BytesRefBlock) b;
int positionCount = block.getPositionCount();
try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
BytesRef scratchPad = new BytesRef();
for (int p = 0; p < positionCount; p++) {
int valueCount = block.getValueCount(p);
int start = block.getFirstValueIndex(p);
int end = start + valueCount;
boolean positionOpened = false;
boolean valuesAppended = false;
for (int i = start; i < end; i++) {
try {
BytesRef value = evalValue(block, i, scratchPad);
if (positionOpened == false && valueCount > 1) {
builder.beginPositionEntry();
positionOpened = true;
}
builder.appendBytesRef(value);
valuesAppended = true;
} catch (IllegalArgumentException e) {
registerException(e);
}
}
if (valuesAppended == false) {
builder.appendNull();
} else if (positionOpened) {
builder.endPositionEntry();
}
}
return builder.build();
}
}
private BytesRef evalValue(BytesRefBlock container, int index, BytesRef scratchPad) {
BytesRef value = container.getBytesRef(index, scratchPad);
return ParseIp.leadingZerosAreDecimal(value, this.scratch);
}
@Override
public String toString() {
return "ParseIpLeadingZerosAreDecimalEvaluator[" + "string=" + string + "]";
}
@Override
public void close() {
Releasables.closeExpectNoException(string, scratch);
}
public static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final Source source;
private final EvalOperator.ExpressionEvaluator.Factory string;
private final Function<DriverContext, BreakingBytesRefBuilder> scratch;
public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory string,
Function<DriverContext, BreakingBytesRefBuilder> scratch) {
this.source = source;
this.string = string;
this.scratch = scratch;
}
@Override
public ParseIpLeadingZerosAreDecimalEvaluator get(DriverContext context) {
return new ParseIpLeadingZerosAreDecimalEvaluator(source, string.get(context), scratch.apply(context), context);
}
@Override
public String toString() {
return "ParseIpLeadingZerosAreDecimalEvaluator[" + "string=" + string + "]";
}
}
}

View file

@ -7,32 +7,38 @@ package org.elasticsearch.xpack.esql.expression.function.scalar.convert;
import java.lang.IllegalArgumentException;
import java.lang.Override;
import java.lang.String;
import java.util.function.Function;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.xpack.esql.core.tree.Source;
/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link ToIP}.
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link ParseIp}.
* This class is generated. Edit {@code ConvertEvaluatorImplementer} instead.
*/
public final class ToIPFromStringEvaluator extends AbstractConvertFunction.AbstractEvaluator {
private final EvalOperator.ExpressionEvaluator asString;
public final class ParseIpLeadingZerosAreOctalEvaluator extends AbstractConvertFunction.AbstractEvaluator {
private final EvalOperator.ExpressionEvaluator string;
public ToIPFromStringEvaluator(Source source, EvalOperator.ExpressionEvaluator asString,
private final BreakingBytesRefBuilder scratch;
public ParseIpLeadingZerosAreOctalEvaluator(Source source,
EvalOperator.ExpressionEvaluator string, BreakingBytesRefBuilder scratch,
DriverContext driverContext) {
super(driverContext, source);
this.asString = asString;
this.string = string;
this.scratch = scratch;
}
@Override
public EvalOperator.ExpressionEvaluator next() {
return asString;
return string;
}
@Override
@ -63,7 +69,7 @@ public final class ToIPFromStringEvaluator extends AbstractConvertFunction.Abstr
private BytesRef evalValue(BytesRefVector container, int index, BytesRef scratchPad) {
BytesRef value = container.getBytesRef(index, scratchPad);
return ToIP.fromKeyword(value);
return ParseIp.leadingZerosAreOctal(value, this.scratch);
}
@Override
@ -103,37 +109,41 @@ public final class ToIPFromStringEvaluator extends AbstractConvertFunction.Abstr
private BytesRef evalValue(BytesRefBlock container, int index, BytesRef scratchPad) {
BytesRef value = container.getBytesRef(index, scratchPad);
return ToIP.fromKeyword(value);
return ParseIp.leadingZerosAreOctal(value, this.scratch);
}
@Override
public String toString() {
return "ToIPFromStringEvaluator[" + "asString=" + asString + "]";
return "ParseIpLeadingZerosAreOctalEvaluator[" + "string=" + string + "]";
}
@Override
public void close() {
Releasables.closeExpectNoException(asString);
Releasables.closeExpectNoException(string, scratch);
}
public static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final Source source;
private final EvalOperator.ExpressionEvaluator.Factory asString;
private final EvalOperator.ExpressionEvaluator.Factory string;
public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory asString) {
private final Function<DriverContext, BreakingBytesRefBuilder> scratch;
public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory string,
Function<DriverContext, BreakingBytesRefBuilder> scratch) {
this.source = source;
this.asString = asString;
this.string = string;
this.scratch = scratch;
}
@Override
public ToIPFromStringEvaluator get(DriverContext context) {
return new ToIPFromStringEvaluator(source, asString.get(context), context);
public ParseIpLeadingZerosAreOctalEvaluator get(DriverContext context) {
return new ParseIpLeadingZerosAreOctalEvaluator(source, string.get(context), scratch.apply(context), context);
}
@Override
public String toString() {
return "ToIPFromStringEvaluator[" + "asString=" + asString + "]";
return "ParseIpLeadingZerosAreOctalEvaluator[" + "string=" + string + "]";
}
}
}

View file

@ -0,0 +1,149 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License
// 2.0; you may not use this file except in compliance with the Elastic License
// 2.0.
package org.elasticsearch.xpack.esql.expression.function.scalar.convert;
import java.lang.IllegalArgumentException;
import java.lang.Override;
import java.lang.String;
import java.util.function.Function;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.compute.data.Block;
import org.elasticsearch.compute.data.BytesRefBlock;
import org.elasticsearch.compute.data.BytesRefVector;
import org.elasticsearch.compute.data.Vector;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.compute.operator.DriverContext;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.core.Releasables;
import org.elasticsearch.xpack.esql.core.tree.Source;
/**
* {@link EvalOperator.ExpressionEvaluator} implementation for {@link ParseIp}.
* This class is generated. Edit {@code ConvertEvaluatorImplementer} instead.
*/
public final class ParseIpLeadingZerosRejectedEvaluator extends AbstractConvertFunction.AbstractEvaluator {
private final EvalOperator.ExpressionEvaluator string;
private final BreakingBytesRefBuilder scratch;
public ParseIpLeadingZerosRejectedEvaluator(Source source,
EvalOperator.ExpressionEvaluator string, BreakingBytesRefBuilder scratch,
DriverContext driverContext) {
super(driverContext, source);
this.string = string;
this.scratch = scratch;
}
@Override
public EvalOperator.ExpressionEvaluator next() {
return string;
}
@Override
public Block evalVector(Vector v) {
BytesRefVector vector = (BytesRefVector) v;
int positionCount = v.getPositionCount();
BytesRef scratchPad = new BytesRef();
if (vector.isConstant()) {
try {
return driverContext.blockFactory().newConstantBytesRefBlockWith(evalValue(vector, 0, scratchPad), positionCount);
} catch (IllegalArgumentException e) {
registerException(e);
return driverContext.blockFactory().newConstantNullBlock(positionCount);
}
}
try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
for (int p = 0; p < positionCount; p++) {
try {
builder.appendBytesRef(evalValue(vector, p, scratchPad));
} catch (IllegalArgumentException e) {
registerException(e);
builder.appendNull();
}
}
return builder.build();
}
}
private BytesRef evalValue(BytesRefVector container, int index, BytesRef scratchPad) {
BytesRef value = container.getBytesRef(index, scratchPad);
return ParseIp.leadingZerosRejected(value, this.scratch);
}
@Override
public Block evalBlock(Block b) {
BytesRefBlock block = (BytesRefBlock) b;
int positionCount = block.getPositionCount();
try (BytesRefBlock.Builder builder = driverContext.blockFactory().newBytesRefBlockBuilder(positionCount)) {
BytesRef scratchPad = new BytesRef();
for (int p = 0; p < positionCount; p++) {
int valueCount = block.getValueCount(p);
int start = block.getFirstValueIndex(p);
int end = start + valueCount;
boolean positionOpened = false;
boolean valuesAppended = false;
for (int i = start; i < end; i++) {
try {
BytesRef value = evalValue(block, i, scratchPad);
if (positionOpened == false && valueCount > 1) {
builder.beginPositionEntry();
positionOpened = true;
}
builder.appendBytesRef(value);
valuesAppended = true;
} catch (IllegalArgumentException e) {
registerException(e);
}
}
if (valuesAppended == false) {
builder.appendNull();
} else if (positionOpened) {
builder.endPositionEntry();
}
}
return builder.build();
}
}
private BytesRef evalValue(BytesRefBlock container, int index, BytesRef scratchPad) {
BytesRef value = container.getBytesRef(index, scratchPad);
return ParseIp.leadingZerosRejected(value, this.scratch);
}
@Override
public String toString() {
return "ParseIpLeadingZerosRejectedEvaluator[" + "string=" + string + "]";
}
@Override
public void close() {
Releasables.closeExpectNoException(string, scratch);
}
public static class Factory implements EvalOperator.ExpressionEvaluator.Factory {
private final Source source;
private final EvalOperator.ExpressionEvaluator.Factory string;
private final Function<DriverContext, BreakingBytesRefBuilder> scratch;
public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory string,
Function<DriverContext, BreakingBytesRefBuilder> scratch) {
this.source = source;
this.string = string;
this.scratch = scratch;
}
@Override
public ParseIpLeadingZerosRejectedEvaluator get(DriverContext context) {
return new ParseIpLeadingZerosRejectedEvaluator(source, string.get(context), scratch.apply(context), context);
}
@Override
public String toString() {
return "ParseIpLeadingZerosRejectedEvaluator[" + "string=" + string + "]";
}
}
}

View file

@ -0,0 +1,232 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.expression.function.scalar.convert;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.breaker.CircuitBreaker;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.compute.ann.ConvertEvaluator;
import org.elasticsearch.compute.ann.Fixed;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.compute.operator.EvalOperator;
import java.net.InetAddress;
/**
* Fast IP parsing suitable for embedding in an {@link EvalOperator.ExpressionEvaluator}
* because they don't allocate memory on every run. Instead, it converts directly from
* utf-8 encoded strings into {@link InetAddressPoint} encoded ips.
* <p>
* This contains three parsing methods to handle the three ways ipv4 addresses
* have historically handled leading 0s, namely, {@link #leadingZerosRejected reject} them,
* treat them as {@link #leadingZerosAreDecimal decimal} numbers, and treat them as
* {@link #leadingZerosAreOctal} numbers.
* </p>
* <p>
* Note: We say "directly from utf-8" but, really, all of the digits in an ip are
* in the traditional 7-bit ascii range where utf-8 overlaps. So we just treat everything
* as 7-bit ascii. Anything that isn't in the range is an invalid ip anyway. Much love
* for the designers of utf-8 for making it this way.
* </p>
*/
public class ParseIp {
private static final byte[] IPV4_PREFIX = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1 };
static final AbstractConvertFunction.BuildFactory FROM_KEYWORD_LEADING_ZEROS_REJECTED = (source, field) -> {
return new ParseIpLeadingZerosRejectedEvaluator.Factory(source, field, driverContext -> buildScratch(driverContext.breaker()));
};
public static BreakingBytesRefBuilder buildScratch(CircuitBreaker breaker) {
BreakingBytesRefBuilder scratch = new BreakingBytesRefBuilder(breaker, "to_ip", 16);
scratch.setLength(InetAddressPoint.BYTES);
return scratch;
}
/**
* Parse an IP address, rejecting v4 addresses with leading 0s. This aligns
* exactly with {@link InetAddresses#forString(String)}.
* <ul>
* <li>192.168.1.1 : valid</li>
* <li>192.168.0.1 : valid</li>
* <li>192.168.01.1 : invalid</li>
* </ul>
* @param scratch A "scratch" memory space build by {@link #buildScratch}
*/
@ConvertEvaluator(extraName = "LeadingZerosRejected", warnExceptions = { IllegalArgumentException.class })
public static BytesRef leadingZerosRejected(
BytesRef string,
@Fixed(includeInToString = false, scope = Fixed.Scope.THREAD_LOCAL) BreakingBytesRefBuilder scratch
) {
/*
* If this is an ipv6 address then delegate to InetAddresses.forString
* because we don't have anything nice for parsing those.
*/
int end = string.offset + string.length;
if (isV6(string, end)) {
InetAddress inetAddress = InetAddresses.forString(string.utf8ToString());
return new BytesRef(InetAddressPoint.encode(inetAddress));
}
System.arraycopy(IPV4_PREFIX, 0, scratch.bytes(), 0, IPV4_PREFIX.length);
int offset = string.offset;
for (int dest = IPV4_PREFIX.length; dest < InetAddressPoint.BYTES; dest++) {
if (offset >= end) {
throw invalid(string);
}
if (string.bytes[offset] == '0') {
// Lone zeros are just 0, but a 0 with numbers after it are invalid
offset++;
if (offset == end || string.bytes[offset] == '.') {
scratch.bytes()[dest] = (byte) 0;
offset++;
continue;
}
throw invalid(string);
}
int v = digit(string, offset++);
while (offset < end && string.bytes[offset] != '.') {
v = v * 10 + digit(string, offset++);
}
offset++;
if (v > 255) {
throw invalid(string);
}
scratch.bytes()[dest] = (byte) v;
}
return scratch.bytesRefView();
}
/**
* Parse an IP address, interpreting v4 addresses with leading 0s as
* <strong>decimal</strong> numbers.
* <ul>
* <li>192.168.1.1 : valid</li>
* <li>192.168.0.1 : valid</li>
* <li>192.168.01.1 : valid</li>
* <li>192.168.09.1 : valid</li>
* <li>192.168.010.1 : valid</li>
* </ul>
* @param scratch A "scratch" memory space build by {@link #buildScratch}
*/
@ConvertEvaluator(extraName = "LeadingZerosAreDecimal", warnExceptions = { IllegalArgumentException.class })
public static BytesRef leadingZerosAreDecimal(
BytesRef string,
@Fixed(includeInToString = false, scope = Fixed.Scope.THREAD_LOCAL) BreakingBytesRefBuilder scratch
) {
/*
* If this is an ipv6 address then delegate to InetAddresses.forString
* because we don't have anything nice for parsing those.
*/
int end = string.offset + string.length;
if (isV6(string, end)) {
InetAddress inetAddress = InetAddresses.forString(string.utf8ToString());
return new BytesRef(InetAddressPoint.encode(inetAddress));
}
System.arraycopy(IPV4_PREFIX, 0, scratch.bytes(), 0, IPV4_PREFIX.length);
int offset = string.offset;
for (int dest = IPV4_PREFIX.length; dest < InetAddressPoint.BYTES; dest++) {
if (offset >= end) {
throw invalid(string);
}
int v = digit(string, offset++);
while (offset < end && string.bytes[offset] != '.') {
v = v * 10 + digit(string, offset++);
}
offset++;
if (v > 255) {
throw invalid(string);
}
scratch.bytes()[dest] = (byte) v;
}
return scratch.bytesRefView();
}
/**
* Parse an IP address, interpreting v4 addresses with leading 0s as
* <strong>octal</strong> numbers.
* <ul>
* <li>192.168.1.1 : valid</li>
* <li>192.168.0.1 : valid</li>
* <li>192.168.01.1 : valid</li>
* <li>192.168.09.1 : invalid</li>
* <li>192.168.010.1 : valid but would print as 192.168.8.1</li>
* </ul>
* @param scratch A "scratch" memory space build by {@link #buildScratch}
*/
@ConvertEvaluator(extraName = "LeadingZerosAreOctal", warnExceptions = { IllegalArgumentException.class })
public static BytesRef leadingZerosAreOctal(
BytesRef string,
@Fixed(includeInToString = false, scope = Fixed.Scope.THREAD_LOCAL) BreakingBytesRefBuilder scratch
) {
/*
* If this is an ipv6 address then delegate to InetAddresses.forString
* because we don't have anything nice for parsing those.
*/
int end = string.offset + string.length;
if (isV6(string, end)) {
InetAddress inetAddress = InetAddresses.forString(string.utf8ToString());
return new BytesRef(InetAddressPoint.encode(inetAddress));
}
System.arraycopy(IPV4_PREFIX, 0, scratch.bytes(), 0, IPV4_PREFIX.length);
int offset = string.offset;
for (int dest = IPV4_PREFIX.length; dest < InetAddressPoint.BYTES; dest++) {
if (offset >= end) {
throw invalid(string);
}
int v;
if (string.bytes[offset] == '0') {
// Octal
offset++;
v = 0;
while (offset < end && string.bytes[offset] != '.') {
v = v * 8 + octalDigit(string, offset++);
}
offset++;
} else {
// Decimal
v = digit(string, offset++);
while (offset < end && string.bytes[offset] != '.') {
v = v * 10 + digit(string, offset++);
}
offset++;
}
scratch.bytes()[dest] = (byte) v;
}
return scratch.bytesRefView();
}
private static int digit(BytesRef string, int offset) {
if (string.bytes[offset] < '0' && '9' < string.bytes[offset]) {
throw invalid(string);
}
return string.bytes[offset] - '0';
}
private static int octalDigit(BytesRef string, int offset) {
if (string.bytes[offset] < '0' && '7' < string.bytes[offset]) {
throw invalid(string);
}
return string.bytes[offset] - '0';
}
private static IllegalArgumentException invalid(BytesRef string) {
return new IllegalArgumentException("'" + string.utf8ToString() + "' is not an IP string literal.");
}
private static boolean isV6(BytesRef string, int end) {
for (int i = string.offset; i < end; i++) {
if (string.bytes[i] == ':') {
return true;
}
}
return false;
}
}

View file

@ -7,10 +7,8 @@
package org.elasticsearch.xpack.esql.expression.function.scalar.convert;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.compute.ann.ConvertEvaluator;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
@ -26,15 +24,15 @@ import java.util.Map;
import static org.elasticsearch.xpack.esql.core.type.DataType.IP;
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.stringToIP;
import static org.elasticsearch.xpack.esql.expression.function.scalar.convert.ParseIp.FROM_KEYWORD_LEADING_ZEROS_REJECTED;
public class ToIP extends AbstractConvertFunction {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "ToIP", ToIP::new);
private static final Map<DataType, BuildFactory> EVALUATORS = Map.ofEntries(
Map.entry(IP, (source, field) -> field),
Map.entry(KEYWORD, ToIPFromStringEvaluator.Factory::new),
Map.entry(TEXT, ToIPFromStringEvaluator.Factory::new)
Map.entry(KEYWORD, FROM_KEYWORD_LEADING_ZEROS_REJECTED),
Map.entry(TEXT, FROM_KEYWORD_LEADING_ZEROS_REJECTED)
);
@FunctionInfo(
@ -90,9 +88,4 @@ public class ToIP extends AbstractConvertFunction {
protected NodeInfo<? extends Expression> info() {
return NodeInfo.create(this, ToIP::new, field());
}
@ConvertEvaluator(extraName = "FromString", warnExceptions = { IllegalArgumentException.class })
static BytesRef fromKeyword(BytesRef asString) {
return stringToIP(asString);
}
}

View file

@ -0,0 +1,206 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.expression.function.scalar.convert;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
import org.elasticsearch.test.ESTestCase;
import java.net.InetAddress;
import java.util.List;
import java.util.function.BiFunction;
import java.util.function.Supplier;
import static org.hamcrest.Matchers.equalTo;
public class ParseIpTests extends ESTestCase {
@ParametersFactory(argumentFormatting = "%s")
public static Iterable<Object[]> parameters() {
List<TestCase> strs = List.of(
new TestCase("192.168.1.1", true, true, true),
new TestCase("192.168.0.1", true, true, true),
new TestCase("255.255.255.255", true, true, true),
new TestCase("1.1.1.1", true, true, true),
new TestCase("0.0.0.0", true, true, true),
new TestCase("192.168.01.1", false, true, true),
new TestCase("192.168.0255.1", false, true, true),
new TestCase("1", false, false, false),
new TestCase("0", false, false, false),
new TestCase("255.1", false, false, false),
new TestCase("255.0", false, false, false),
new TestCase("255.255.1", false, false, false),
new TestCase("255.255.0", false, false, false),
new TestCase(new Supplier<>() {
@Override
public String get() {
return NetworkAddress.format(randomIp(true));
}
@Override
public String toString() {
return "v4";
}
}, true, true, true),
new TestCase(new Supplier<>() {
@Override
public String get() {
return NetworkAddress.format(randomIp(false));
}
@Override
public String toString() {
return "v6";
}
}, true, true, true)
);
return strs.stream().map(s -> new Object[] { s }).toList();
}
private record TestCase(
Supplier<String> str,
boolean validLeadingZerosRejected,
boolean validLeadingZerosAreDecimal,
boolean validLeadingZerosAreOctal
) {
TestCase(String str, boolean validLeadingZerosRejected, boolean validLeadingZerosAreDecimal, boolean validLeadingZerosAreOctal) {
this(new Supplier<>() {
@Override
public String get() {
return str;
}
@Override
public String toString() {
return str;
}
}, validLeadingZerosRejected, validLeadingZerosAreDecimal, validLeadingZerosAreOctal);
}
}
private final TestCase testCase;
private final String str;
public ParseIpTests(TestCase testCase) {
this.testCase = testCase;
this.str = testCase.str.get();
}
public void testLeadingZerosRejecting() {
if (testCase.validLeadingZerosRejected) {
InetAddress inetAddress = InetAddresses.forString(str);
BytesRef expected = new BytesRef(InetAddressPoint.encode(inetAddress));
success(ParseIp::leadingZerosRejected, expected);
} else {
failure(ParseIp::leadingZerosRejected);
}
}
public void testLeadingZerosAreDecimal() {
if (testCase.validLeadingZerosAreDecimal) {
InetAddress inetAddress = InetAddresses.forString(leadingZerosAreDecimalToIp(str));
BytesRef expected = new BytesRef(InetAddressPoint.encode(inetAddress));
success(ParseIp::leadingZerosAreDecimal, expected);
} else {
failure(ParseIp::leadingZerosAreDecimal);
}
}
public void testLeadingZerosAreOctal() {
if (testCase.validLeadingZerosAreOctal) {
InetAddress inetAddress = InetAddresses.forString(leadingZerosAreOctalToIp(str));
BytesRef expected = new BytesRef(InetAddressPoint.encode(inetAddress));
success(ParseIp::leadingZerosAreOctal, expected);
} else {
failure(ParseIp::leadingZerosAreOctal);
}
}
private void success(BiFunction<BytesRef, BreakingBytesRefBuilder, BytesRef> fn, BytesRef expected) {
try (BreakingBytesRefBuilder scratch = ParseIp.buildScratch(new NoopCircuitBreaker("request"))) {
assertThat(fn.apply(new BytesRef(str), scratch), equalTo(expected));
}
}
private void failure(BiFunction<BytesRef, BreakingBytesRefBuilder, BytesRef> fn) {
try (BreakingBytesRefBuilder scratch = ParseIp.buildScratch(new NoopCircuitBreaker("request"))) {
Exception thrown = expectThrows(IllegalArgumentException.class, () -> fn.apply(new BytesRef(str), scratch));
assertThat(thrown.getMessage(), equalTo("'" + str + "' is not an IP string literal."));
}
}
public static String leadingZerosAreDecimalToIp(String ip) {
if (ip.contains(":")) {
// v6 ip, don't change it.
return ip;
}
StringBuilder b = new StringBuilder();
boolean lastWasBreak = true;
boolean lastWasZero = false;
for (int i = 0; i < ip.length(); i++) {
char c = ip.charAt(i);
if (lastWasBreak && c == '0') {
lastWasZero = true;
continue;
}
if (c == '.') {
if (lastWasZero) {
b.append('0');
}
lastWasBreak = true;
} else {
lastWasBreak = false;
}
lastWasZero = false;
b.append(c);
}
if (lastWasZero) {
b.append('0');
}
return b.toString();
}
public static String leadingZerosAreOctalToIp(String ip) {
if (ip.contains(":")) {
// v6 ip, don't change it.
return ip;
}
StringBuilder b = new StringBuilder();
boolean lastWasBreak = true;
boolean octalMode = false;
int current = 0;
for (int i = 0; i < ip.length(); i++) {
char c = ip.charAt(i);
if (lastWasBreak && c == '0') {
octalMode = true;
continue;
}
if (c == '.') {
lastWasBreak = true;
b.append(current).append('.');
current = 0;
continue;
}
lastWasBreak = false;
if (octalMode) {
current = current * 8 + (c - '0');
} else {
current = current * 10 + (c - '0');
}
}
b.append(current);
return b.toString();
}
}

View file

@ -34,7 +34,7 @@ public class ToIPTests extends AbstractScalarFunctionTestCase {
@ParametersFactory
public static Iterable<Object[]> parameters() {
String read = "Attribute[channel=0]";
String stringEvaluator = "ToIPFromStringEvaluator[asString=" + read + "]";
String stringEvaluator = "ParseIpLeadingZerosRejectedEvaluator[string=" + read + "]";
List<TestCaseSupplier> suppliers = new ArrayList<>();
// convert from IP to IP