Merge branch 'main' into main-update-9-10-24

This commit is contained in:
Simon Cooper 2024-10-09 17:08:05 +01:00
commit 09f91cdaec
385 changed files with 20490 additions and 6852 deletions

View file

@ -2,6 +2,7 @@ steps:
- command: .buildkite/scripts/dra-workflow.sh
env:
USE_DRA_CREDENTIALS: "true"
USE_PROD_DOCKER_CREDENTIALS: "true"
agents:
provider: gcp
image: family/elasticsearch-ubuntu-2204
@ -18,4 +19,5 @@ steps:
branch: "${BUILDKITE_BRANCH}"
env:
DRA_WORKFLOW: staging
USE_PROD_DOCKER_CREDENTIALS: "true"
if: build.env('DRA_WORKFLOW') == 'staging'

View file

@ -27,7 +27,8 @@ steps:
image: family/elasticsearch-{{matrix.image}}
diskSizeGb: 350
machineType: n1-standard-8
env: {}
env:
USE_PROD_DOCKER_CREDENTIALS: "true"
- group: packaging-tests-upgrade
steps: $BWC_STEPS
- group: packaging-tests-windows

View file

@ -28,7 +28,8 @@ steps:
image: family/elasticsearch-{{matrix.image}}
diskSizeGb: 350
machineType: n1-standard-8
env: {}
env:
USE_PROD_DOCKER_CREDENTIALS: "true"
- group: packaging-tests-upgrade
steps:
- label: "{{matrix.image}} / 8.0.1 / packaging-tests-upgrade"

View file

@ -24,4 +24,5 @@ steps:
diskSizeGb: 350
machineType: custom-16-32768
env:
USE_PROD_DOCKER_CREDENTIALS: "true"
PACKAGING_TASK: "{{matrix.PACKAGING_TASK}}"

View file

@ -384,6 +384,15 @@ tasks.named("jar") {
exclude("classpath.index")
}
spotless {
java {
// IDEs can sometimes run annotation processors that leave files in
// here, causing Spotless to complain. Even though this path ought not
// to exist, exclude it anyway in order to avoid spurious failures.
toggleOffOn()
}
}
def resolveMainWrapperVersion() {
new URL("https://raw.githubusercontent.com/elastic/elasticsearch/main/build-tools-internal/src/main/resources/minimumGradleVersion").text.trim()
}

View file

@ -29,11 +29,13 @@ public enum DockerBase {
CLOUD_ESS(null, "-cloud-ess", "apt-get"),
// Chainguard based wolfi image with latest jdk
WOLFI(
"docker.elastic.co/wolfi/chainguard-base:latest@sha256:c16d3ad6cebf387e8dd2ad769f54320c4819fbbaa21e729fad087c7ae223b4d0",
// This is usually updated via renovatebot
// spotless:off
WOLFI("docker.elastic.co/wolfi/chainguard-base:latest@sha256:c16d3ad6cebf387e8dd2ad769f54320c4819fbbaa21e729fad087c7ae223b4d0",
"-wolfi",
"apk"
),
// spotless:on
// Based on WOLFI above, with more extras. We don't set a base image because
// we programmatically extend from the Wolfi image.

View file

@ -284,6 +284,7 @@
"Cluster and node setting",
"Command line tool",
"CRUD",
"ES|QL",
"Index setting",
"Ingest",
"JVM option",

View file

@ -1,6 +1,6 @@
### Entitlement Agent
This is a java agent that instruments sensitive class library methods with calls into the `entitlement-runtime` module to check for permissions granted under the _entitlements_ system.
This is a java agent that instruments sensitive class library methods with calls into the `entitlement-bridge` module to check for permissions granted under the _entitlements_ system.
The entitlements system provides an alternative to the legacy `SecurityManager` system, which is deprecated for removal.
With this agent, the Elasticsearch server can retain some control over which class library methods can be invoked by which callers.

View file

@ -7,21 +7,44 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import static java.util.stream.Collectors.joining
apply plugin: 'elasticsearch.build'
apply plugin: 'elasticsearch.embedded-providers'
embeddedProviders {
impl 'entitlement-agent', project(':distribution:tools:entitlement-agent:impl')
}
configurations {
entitlementRuntime
entitlementBridge
}
dependencies {
entitlementRuntime project(":libs:elasticsearch-entitlement-runtime")
implementation project(":libs:elasticsearch-entitlement-runtime")
entitlementBridge project(":distribution:tools:entitlement-bridge")
compileOnly project(":libs:elasticsearch-core")
compileOnly project(":distribution:tools:entitlement-runtime")
testImplementation project(":test:framework")
testImplementation project(":distribution:tools:entitlement-bridge")
testImplementation project(":distribution:tools:entitlement-agent:impl")
}
tasks.named('test').configure {
systemProperty "tests.security.manager", "false"
dependsOn('jar')
jvmArgs "-javaagent:${ tasks.named('jar').flatMap{ it.archiveFile }.get()}"
// Register an argument provider to avoid eager resolution of configurations
jvmArgumentProviders.add(new CommandLineArgumentProvider() {
@Override
Iterable<String> asArguments() {
return ["-javaagent:${tasks.jar.archiveFile.get()}", "-Des.entitlements.bridgeJar=${configurations.entitlementBridge.singleFile}"]
}
})
// The Elasticsearch build plugin automatically adds all compileOnly deps as testImplementation.
// We must not add the bridge this way because it is also on the boot classpath, and that would lead to jar hell.
classpath -= files(configurations.entitlementBridge)
}
tasks.named('jar').configure {

View file

@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
apply plugin: 'elasticsearch.build'
dependencies {
compileOnly project(':distribution:tools:entitlement-agent')
implementation 'org.ow2.asm:asm:9.7'
}
tasks.named('forbiddenApisMain').configure {
replaceSignatureFiles 'jdk-signatures'
}

View file

@ -0,0 +1,26 @@
Copyright (c) 2012 France Télécom
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the name of the copyright holders nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,18 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
import org.elasticsearch.entitlement.instrumentation.impl.InstrumentationServiceImpl;
module org.elasticsearch.entitlement.agent.impl {
requires org.objectweb.asm;
requires org.elasticsearch.entitlement.agent;
provides InstrumentationService with InstrumentationServiceImpl;
}

View file

@ -0,0 +1,41 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.instrumentation.impl;
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
import org.elasticsearch.entitlement.instrumentation.Instrumenter;
import org.elasticsearch.entitlement.instrumentation.MethodKey;
import org.objectweb.asm.Type;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.util.Map;
import java.util.stream.Stream;
public class InstrumentationServiceImpl implements InstrumentationService {
@Override
public Instrumenter newInstrumenter(String classNameSuffix, Map<MethodKey, Method> instrumentationMethods) {
return new InstrumenterImpl(classNameSuffix, instrumentationMethods);
}
/**
* @return a {@link MethodKey} suitable for looking up the given {@code targetMethod} in the entitlements trampoline
*/
public MethodKey methodKeyForTarget(Method targetMethod) {
Type actualType = Type.getMethodType(Type.getMethodDescriptor(targetMethod));
return new MethodKey(
Type.getInternalName(targetMethod.getDeclaringClass()),
targetMethod.getName(),
Stream.of(actualType.getArgumentTypes()).map(Type::getInternalName).toList(),
Modifier.isStatic(targetMethod.getModifiers())
);
}
}

View file

@ -0,0 +1,214 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.instrumentation.impl;
import org.elasticsearch.entitlement.instrumentation.Instrumenter;
import org.elasticsearch.entitlement.instrumentation.MethodKey;
import org.objectweb.asm.AnnotationVisitor;
import org.objectweb.asm.ClassReader;
import org.objectweb.asm.ClassVisitor;
import org.objectweb.asm.ClassWriter;
import org.objectweb.asm.MethodVisitor;
import org.objectweb.asm.Opcodes;
import org.objectweb.asm.Type;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.util.Map;
import java.util.stream.Stream;
import static org.objectweb.asm.ClassWriter.COMPUTE_FRAMES;
import static org.objectweb.asm.ClassWriter.COMPUTE_MAXS;
import static org.objectweb.asm.Opcodes.ACC_STATIC;
import static org.objectweb.asm.Opcodes.GETSTATIC;
import static org.objectweb.asm.Opcodes.INVOKEINTERFACE;
import static org.objectweb.asm.Opcodes.INVOKESTATIC;
import static org.objectweb.asm.Opcodes.INVOKEVIRTUAL;
public class InstrumenterImpl implements Instrumenter {
/**
* To avoid class name collisions during testing without an agent to replace classes in-place.
*/
private final String classNameSuffix;
private final Map<MethodKey, Method> instrumentationMethods;
public InstrumenterImpl(String classNameSuffix, Map<MethodKey, Method> instrumentationMethods) {
this.classNameSuffix = classNameSuffix;
this.instrumentationMethods = instrumentationMethods;
}
public ClassFileInfo instrumentClassFile(Class<?> clazz) throws IOException {
ClassFileInfo initial = getClassFileInfo(clazz);
return new ClassFileInfo(initial.fileName(), instrumentClass(Type.getInternalName(clazz), initial.bytecodes()));
}
public static ClassFileInfo getClassFileInfo(Class<?> clazz) throws IOException {
String internalName = Type.getInternalName(clazz);
String fileName = "/" + internalName + ".class";
byte[] originalBytecodes;
try (InputStream classStream = clazz.getResourceAsStream(fileName)) {
if (classStream == null) {
throw new IllegalStateException("Classfile not found in jar: " + fileName);
}
originalBytecodes = classStream.readAllBytes();
}
return new ClassFileInfo(fileName, originalBytecodes);
}
@Override
public byte[] instrumentClass(String className, byte[] classfileBuffer) {
ClassReader reader = new ClassReader(classfileBuffer);
ClassWriter writer = new ClassWriter(reader, COMPUTE_FRAMES | COMPUTE_MAXS);
ClassVisitor visitor = new EntitlementClassVisitor(Opcodes.ASM9, writer, className);
reader.accept(visitor, 0);
return writer.toByteArray();
}
class EntitlementClassVisitor extends ClassVisitor {
final String className;
EntitlementClassVisitor(int api, ClassVisitor classVisitor, String className) {
super(api, classVisitor);
this.className = className;
}
@Override
public void visit(int version, int access, String name, String signature, String superName, String[] interfaces) {
super.visit(version, access, name + classNameSuffix, signature, superName, interfaces);
}
@Override
public MethodVisitor visitMethod(int access, String name, String descriptor, String signature, String[] exceptions) {
var mv = super.visitMethod(access, name, descriptor, signature, exceptions);
boolean isStatic = (access & ACC_STATIC) != 0;
var key = new MethodKey(
className,
name,
Stream.of(Type.getArgumentTypes(descriptor)).map(Type::getInternalName).toList(),
isStatic
);
var instrumentationMethod = instrumentationMethods.get(key);
if (instrumentationMethod != null) {
// LOGGER.debug("Will instrument method {}", key);
return new EntitlementMethodVisitor(Opcodes.ASM9, mv, isStatic, descriptor, instrumentationMethod);
} else {
// LOGGER.trace("Will not instrument method {}", key);
}
return mv;
}
}
static class EntitlementMethodVisitor extends MethodVisitor {
private final boolean instrumentedMethodIsStatic;
private final String instrumentedMethodDescriptor;
private final Method instrumentationMethod;
private boolean hasCallerSensitiveAnnotation = false;
EntitlementMethodVisitor(
int api,
MethodVisitor methodVisitor,
boolean instrumentedMethodIsStatic,
String instrumentedMethodDescriptor,
Method instrumentationMethod
) {
super(api, methodVisitor);
this.instrumentedMethodIsStatic = instrumentedMethodIsStatic;
this.instrumentedMethodDescriptor = instrumentedMethodDescriptor;
this.instrumentationMethod = instrumentationMethod;
}
@Override
public AnnotationVisitor visitAnnotation(String descriptor, boolean visible) {
if (visible && descriptor.endsWith("CallerSensitive;")) {
hasCallerSensitiveAnnotation = true;
}
return super.visitAnnotation(descriptor, visible);
}
@Override
public void visitCode() {
pushEntitlementChecksObject();
pushCallerClass();
forwardIncomingArguments();
invokeInstrumentationMethod();
super.visitCode();
}
private void pushEntitlementChecksObject() {
mv.visitMethodInsn(
INVOKESTATIC,
"org/elasticsearch/entitlement/api/EntitlementProvider",
"checks",
"()Lorg/elasticsearch/entitlement/api/EntitlementChecks;",
false
);
}
private void pushCallerClass() {
if (hasCallerSensitiveAnnotation) {
mv.visitMethodInsn(
INVOKESTATIC,
"jdk/internal/reflect/Reflection",
"getCallerClass",
Type.getMethodDescriptor(Type.getType(Class.class)),
false
);
} else {
mv.visitFieldInsn(
GETSTATIC,
Type.getInternalName(StackWalker.Option.class),
"RETAIN_CLASS_REFERENCE",
Type.getDescriptor(StackWalker.Option.class)
);
mv.visitMethodInsn(
INVOKESTATIC,
Type.getInternalName(StackWalker.class),
"getInstance",
Type.getMethodDescriptor(Type.getType(StackWalker.class), Type.getType(StackWalker.Option.class)),
false
);
mv.visitMethodInsn(
INVOKEVIRTUAL,
Type.getInternalName(StackWalker.class),
"getCallerClass",
Type.getMethodDescriptor(Type.getType(Class.class)),
false
);
}
}
private void forwardIncomingArguments() {
int localVarIndex = 0;
if (instrumentedMethodIsStatic == false) {
mv.visitVarInsn(Opcodes.ALOAD, localVarIndex++);
}
for (Type type : Type.getArgumentTypes(instrumentedMethodDescriptor)) {
mv.visitVarInsn(type.getOpcode(Opcodes.ILOAD), localVarIndex);
localVarIndex += type.getSize();
}
}
private void invokeInstrumentationMethod() {
mv.visitMethodInsn(
INVOKEINTERFACE,
Type.getInternalName(instrumentationMethod.getDeclaringClass()),
instrumentationMethod.getName(),
Type.getMethodDescriptor(instrumentationMethod),
true
);
}
}
// private static final Logger LOGGER = LogManager.getLogger(Instrumenter.class);
public record ClassFileInfo(String fileName, byte[] bytecodes) {}
}

View file

@ -0,0 +1,10 @@
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the "Elastic License
# 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
# Public License v 1"; you may not use this file except in compliance with, at
# your election, the "Elastic License 2.0", the "GNU Affero General Public
# License v3.0 only", or the "Server Side Public License, v 1".
#
org.elasticsearch.entitlement.instrumentation.impl.InstrumentationServiceImpl

View file

@ -7,7 +7,13 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
module org.elasticsearch.entitlement.agent {
requires java.instrument;
requires org.elasticsearch.entitlement.runtime;
requires org.elasticsearch.base; // for @SuppressForbidden
exports org.elasticsearch.entitlement.instrumentation to org.elasticsearch.entitlement.agent.impl;
uses InstrumentationService;
}

View file

@ -9,13 +9,53 @@
package org.elasticsearch.entitlement.agent;
import org.elasticsearch.entitlement.runtime.api.EntitlementChecks;
import org.elasticsearch.core.SuppressForbidden;
import org.elasticsearch.core.internal.provider.ProviderLocator;
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
import org.elasticsearch.entitlement.instrumentation.MethodKey;
import java.io.IOException;
import java.lang.instrument.Instrumentation;
import java.lang.reflect.Method;
import java.util.Map;
import java.util.Set;
import java.util.jar.JarFile;
public class EntitlementAgent {
public static void premain(String agentArgs, Instrumentation inst) throws Exception {
EntitlementChecks.setAgentBooted();
// Add the bridge library (the one with the entitlement checking interface) to the bootstrap classpath.
// We can't actually reference the classes here for real before this point because they won't resolve.
var bridgeJarName = System.getProperty("es.entitlements.bridgeJar");
if (bridgeJarName == null) {
throw new IllegalArgumentException("System property es.entitlements.bridgeJar is required");
}
addJarToBootstrapClassLoader(inst, bridgeJarName);
Method targetMethod = System.class.getMethod("exit", int.class);
Method instrumentationMethod = Class.forName("org.elasticsearch.entitlement.api.EntitlementChecks")
.getMethod("checkSystemExit", Class.class, int.class);
Map<MethodKey, Method> methodMap = Map.of(INSTRUMENTER_FACTORY.methodKeyForTarget(targetMethod), instrumentationMethod);
inst.addTransformer(new Transformer(INSTRUMENTER_FACTORY.newInstrumenter("", methodMap), Set.of(internalName(System.class))), true);
inst.retransformClasses(System.class);
}
@SuppressForbidden(reason = "The appendToBootstrapClassLoaderSearch method takes a JarFile")
private static void addJarToBootstrapClassLoader(Instrumentation inst, String jarString) throws IOException {
inst.appendToBootstrapClassLoaderSearch(new JarFile(jarString));
}
private static String internalName(Class<?> c) {
return c.getName().replace('.', '/');
}
private static final InstrumentationService INSTRUMENTER_FACTORY = (new ProviderLocator<>(
"entitlement-agent",
InstrumentationService.class,
"org.elasticsearch.entitlement.agent.impl",
Set.of("org.objectweb.nonexistent.asm")
)).get();
// private static final Logger LOGGER = LogManager.getLogger(EntitlementAgent.class);
}

View file

@ -0,0 +1,49 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.agent;
import org.elasticsearch.entitlement.instrumentation.Instrumenter;
import java.lang.instrument.ClassFileTransformer;
import java.security.ProtectionDomain;
import java.util.Set;
/**
* A {@link ClassFileTransformer} that applies an {@link Instrumenter} to the appropriate classes.
*/
public class Transformer implements ClassFileTransformer {
private final Instrumenter instrumenter;
private final Set<String> classesToTransform;
public Transformer(Instrumenter instrumenter, Set<String> classesToTransform) {
this.instrumenter = instrumenter;
this.classesToTransform = classesToTransform;
// TODO: Should warn if any MethodKey doesn't match any methods
}
@Override
public byte[] transform(
ClassLoader loader,
String className,
Class<?> classBeingRedefined,
ProtectionDomain protectionDomain,
byte[] classfileBuffer
) {
if (classesToTransform.contains(className)) {
// System.out.println("Transforming " + className);
return instrumenter.instrumentClass(className, classfileBuffer);
} else {
// System.out.println("Not transforming " + className);
return classfileBuffer;
}
}
// private static final Logger LOGGER = LogManager.getLogger(Transformer.class);
}

View file

@ -0,0 +1,25 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.instrumentation;
import java.lang.reflect.Method;
import java.util.Map;
/**
* The SPI service entry point for instrumentation.
*/
public interface InstrumentationService {
Instrumenter newInstrumenter(String classNameSuffix, Map<MethodKey, Method> instrumentationMethods);
/**
* @return a {@link MethodKey} suitable for looking up the given {@code targetMethod} in the entitlements trampoline
*/
MethodKey methodKeyForTarget(Method targetMethod);
}

View file

@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.instrumentation;
public interface Instrumenter {
byte[] instrumentClass(String className, byte[] classfileBuffer);
}

View file

@ -0,0 +1,18 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.instrumentation;
import java.util.List;
/**
*
* @param className the "internal name" of the class: includes the package info, but with periods replaced by slashes
*/
public record MethodKey(String className, String methodName, List<String> parameterTypes, boolean isStatic) {}

View file

@ -9,21 +9,40 @@
package org.elasticsearch.entitlement.agent;
import org.elasticsearch.entitlement.runtime.api.EntitlementChecks;
import com.carrotsearch.randomizedtesting.annotations.SuppressForbidden;
import org.elasticsearch.entitlement.runtime.api.ElasticsearchEntitlementManager;
import org.elasticsearch.entitlement.runtime.api.NotEntitledException;
import org.elasticsearch.entitlement.runtime.internals.EntitlementInternals;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.ESTestCase.WithoutSecurityManager;
import org.junit.After;
/**
* This is an end-to-end test that runs with the javaagent installed.
* It should exhaustively test every instrumented method to make sure it passes with the entitlement
* and fails without it.
* This is an end-to-end test of the agent and entitlement runtime.
* It runs with the agent installed, and exhaustively tests every instrumented method
* to make sure it works with the entitlement granted and throws without it.
* The only exception is {@link System#exit}, where we can't that it works without
* terminating the JVM.
* See {@code build.gradle} for how we set the command line arguments for this test.
*/
@WithoutSecurityManager
public class EntitlementAgentTests extends ESTestCase {
public void testAgentBooted() {
assertTrue(EntitlementChecks.isAgentBooted());
public static final ElasticsearchEntitlementManager ENTITLEMENT_MANAGER = ElasticsearchEntitlementManager.get();
@After
public void resetEverything() {
EntitlementInternals.reset();
}
/**
* We can't really check that this one passes because it will just exit the JVM.
*/
@SuppressForbidden("Specifically testing System.exit")
public void testSystemExitNotEntitled() {
ENTITLEMENT_MANAGER.activate();
assertThrows(NotEntitledException.class, () -> System.exit(123));
}
}

View file

@ -0,0 +1,11 @@
### Entitlement Bridge
This is the code called directly from instrumented methods.
It's a minimal code stub that is loaded into the boot classloader by the entitlement agent
so that it is callable from the class library methods instrumented by the agent.
Its job is to forward the entitlement checks to the actual runtime library,
which is loaded normally.
It is not responsible for injecting the bytecode instrumentation (that's the agent)
nor for implementing the permission checks (that's the runtime library).

View file

@ -0,0 +1,18 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
apply plugin: 'elasticsearch.build'
dependencies {
}
tasks.named('forbiddenApisMain').configure {
replaceSignatureFiles 'jdk-signatures'
}

View file

@ -7,8 +7,8 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
module org.elasticsearch.entitlement.runtime {
requires org.elasticsearch.base;
module org.elasticsearch.entitlement.bridge {
uses org.elasticsearch.entitlement.api.EntitlementChecks;
exports org.elasticsearch.entitlement.runtime.api to org.elasticsearch.entitlement.agent;
exports org.elasticsearch.entitlement.api;
}

View file

@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.api;
public interface EntitlementChecks {
void checkSystemExit(Class<?> callerClass, int status);
}

View file

@ -0,0 +1,34 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.api;
import java.util.List;
import java.util.ServiceLoader;
public class EntitlementProvider {
private static final EntitlementChecks CHECKS = lookupEntitlementChecksImplementation();
public static EntitlementChecks checks() {
return CHECKS;
}
private static EntitlementChecks lookupEntitlementChecksImplementation() {
List<EntitlementChecks> candidates = ServiceLoader.load(EntitlementChecks.class).stream().map(ServiceLoader.Provider::get).toList();
if (candidates.isEmpty()) {
throw new IllegalStateException("No EntitlementChecks service");
} else if (candidates.size() >= 2) {
throw new IllegalStateException(
"Multiple EntitlementChecks services: " + candidates.stream().map(e -> e.getClass().getSimpleName()).toList()
);
} else {
return candidates.get(0);
}
}
}

View file

@ -5,10 +5,3 @@ This module implements mechanisms to grant and check permissions under the _enti
The entitlements system provides an alternative to the legacy `SecurityManager` system, which is deprecated for removal.
The `entitlement-agent` tool instruments sensitive class library methods with calls to this module, in order to enforce the controls.
This module is responsible for:
- Defining which class library methods are sensitive
- Defining what permissions should be checked for each sensitive method
- Implementing the permission checks
- Offering a "grant" API to grant permissions
It is not responsible for anything to do with bytecode instrumentation; that responsibility lies with `entitlement-agent`.

View file

@ -10,7 +10,9 @@ apply plugin: 'elasticsearch.build'
apply plugin: 'elasticsearch.publish'
dependencies {
compileOnly project(':libs:elasticsearch-core')
compileOnly project(':libs:elasticsearch-core') // For @SuppressForbidden
compileOnly project(':server') // To access the main server module for special permission checks
compileOnly project(':distribution:tools:entitlement-bridge')
testImplementation project(":test:framework")
}

View file

@ -0,0 +1,19 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
module org.elasticsearch.entitlement.runtime {
requires org.elasticsearch.entitlement.bridge;
requires org.elasticsearch.server;
exports org.elasticsearch.entitlement.runtime.api;
provides org.elasticsearch.entitlement.api.EntitlementChecks
with
org.elasticsearch.entitlement.runtime.api.ElasticsearchEntitlementManager;
}

View file

@ -0,0 +1,77 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.runtime.api;
import org.elasticsearch.entitlement.api.EntitlementChecks;
import org.elasticsearch.entitlement.api.EntitlementProvider;
import java.util.Optional;
import static org.elasticsearch.entitlement.runtime.internals.EntitlementInternals.isActive;
/**
* Implementation of the {@link EntitlementChecks} interface, providing additional
* API methods for managing the checks.
* The trampoline module loads this object via SPI.
*/
public class ElasticsearchEntitlementManager implements EntitlementChecks {
/**
* @return the same instance of {@link ElasticsearchEntitlementManager} returned by {@link EntitlementProvider}.
*/
public static ElasticsearchEntitlementManager get() {
return (ElasticsearchEntitlementManager) EntitlementProvider.checks();
}
/**
* Causes entitlements to be enforced.
*/
public void activate() {
isActive = true;
}
@Override
public void checkSystemExit(Class<?> callerClass, int status) {
var requestingModule = requestingModule(callerClass);
if (isTriviallyAllowed(requestingModule)) {
// System.out.println(" - Trivially allowed");
return;
}
// Hard-forbidden until we develop the permission granting scheme
throw new NotEntitledException("Missing entitlement for " + requestingModule);
}
private static Module requestingModule(Class<?> callerClass) {
if (callerClass != null) {
Module callerModule = callerClass.getModule();
if (callerModule.getLayer() != ModuleLayer.boot()) {
// fast path
return callerModule;
}
}
int framesToSkip = 1 // getCallingClass (this method)
+ 1 // the checkXxx method
+ 1 // the runtime config method
+ 1 // the instrumented method
;
Optional<Module> module = StackWalker.getInstance(StackWalker.Option.RETAIN_CLASS_REFERENCE)
.walk(
s -> s.skip(framesToSkip)
.map(f -> f.getDeclaringClass().getModule())
.filter(m -> m.getLayer() != ModuleLayer.boot())
.findFirst()
);
return module.orElse(null);
}
private static boolean isTriviallyAllowed(Module requestingModule) {
return isActive == false || (requestingModule == null) || requestingModule == System.class.getModule();
}
}

View file

@ -9,14 +9,12 @@
package org.elasticsearch.entitlement.runtime.api;
public class EntitlementChecks {
static boolean isAgentBooted = false;
public static void setAgentBooted() {
isAgentBooted = true;
public class NotEntitledException extends RuntimeException {
public NotEntitledException(String message) {
super(message);
}
public static boolean isAgentBooted() {
return isAgentBooted;
public NotEntitledException(String message, Throwable cause) {
super(message, cause);
}
}

View file

@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.entitlement.runtime.internals;
/**
* Don't export this from the module. Just don't.
*/
public class EntitlementInternals {
/**
* When false, entitlement rules are not enforced; all operations are allowed.
*/
public static volatile boolean isActive = false;
public static void reset() {
isActive = false;
}
}

View file

@ -0,0 +1,10 @@
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the "Elastic License
# 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
# Public License v 1"; you may not use this file except in compliance with, at
# your election, the "Elastic License 2.0", the "GNU Affero General Public
# License v3.0 only", or the "Server Side Public License, v 1".
#
org.elasticsearch.entitlement.runtime.api.ElasticsearchEntitlementManager

View file

@ -0,0 +1,5 @@
pr: 111336
summary: Use the same chunking configurations for models in the Elasticsearch service
area: Machine Learning
type: enhancement
issues: []

View file

@ -0,0 +1,5 @@
pr: 112567
summary: Track shard snapshot progress during node shutdown
area: Snapshot/Restore
type: enhancement
issues: []

View file

@ -0,0 +1,5 @@
pr: 112933
summary: "Allow incubating Panama Vector in simdvec, and add vectorized `ipByteBin`"
area: Search
type: enhancement
issues: []

View file

@ -0,0 +1,5 @@
pr: 113563
summary: Use ELSER By Default For Semantic Text
area: Mapping
type: enhancement
issues: []

View file

@ -0,0 +1,5 @@
pr: 113812
summary: Add Streaming Inference spec
area: Machine Learning
type: enhancement
issues: []

View file

@ -0,0 +1,6 @@
pr: 113897
summary: "Add chunking settings configuration to `CohereService,` `AmazonBedrockService,`\
\ and `AzureOpenAiService`"
area: Machine Learning
type: enhancement
issues: []

View file

@ -1,5 +0,0 @@
pr: 113900
summary: Fix BWC for file-settings based role mappings
area: Authentication
type: bug
issues: []

View file

@ -0,0 +1,13 @@
pr: 113967
summary: "ESQL: Entirely remove META FUNCTIONS"
area: ES|QL
type: breaking
issues: []
breaking:
title: "ESQL: Entirely remove META FUNCTIONS"
area: ES|QL
details: |
Removes an undocumented syntax from ESQL: META FUNCTION. This was never
reliable or really useful. Consult the documentation instead.
impact: "Removes an undocumented syntax from ESQL: META FUNCTION"
notable: false

View file

@ -0,0 +1,5 @@
pr: 114002
summary: Add a `mustache.max_output_size_bytes` setting to limit the length of results from mustache scripts
area: Infra/Scripting
type: enhancement
issues: []

View file

@ -0,0 +1,5 @@
pr: 114080
summary: Stream Cohere Completion
area: Machine Learning
type: enhancement
issues: []

View file

@ -0,0 +1,5 @@
pr: 114128
summary: Adding `index_template_substitutions` to the simulate ingest API
area: Ingest Node
type: enhancement
issues: []

View file

@ -0,0 +1,6 @@
pr: 114157
summary: Add a `terminate` ingest processor
area: Ingest Node
type: feature
issues:
- 110218

View file

@ -0,0 +1,5 @@
pr: 114177
summary: "Make `randomInstantBetween` always return value in range [minInstant, `maxInstant]`"
area: Infra/Metrics
type: bug
issues: []

View file

@ -0,0 +1,17 @@
pr: 114231
summary: Remove cluster state from `/_cluster/reroute` response
area: Allocation
type: breaking
issues:
- 88978
breaking:
title: Remove cluster state from `/_cluster/reroute` response
area: REST API
details: >-
The `POST /_cluster/reroute` API no longer returns the cluster state in its
response. The `?metric` query parameter to this API now has no effect and
its use will be forbidden in a future version.
impact: >-
Cease usage of the `?metric` query parameter when calling the
`POST /_cluster/reroute` API.
notable: false

View file

@ -0,0 +1,5 @@
pr: 114264
summary: "Fix analyzed wildcard query in simple_query_string when disjunctions is empty"
area: Search
type: bug
issues: [114185]

View file

@ -0,0 +1,5 @@
pr: 114337
summary: "Enables cluster state role mapper, to include ECK operator-defined role mappings in role resolution"
area: Authentication
type: bug
issues: []

View file

@ -10,7 +10,7 @@ Changes the allocation of shards in a cluster.
[[cluster-reroute-api-request]]
==== {api-request-title}
`POST /_cluster/reroute?metric=none`
`POST /_cluster/reroute`
[[cluster-reroute-api-prereqs]]
==== {api-prereq-title}
@ -193,7 +193,7 @@ This is a short example of a simple reroute API call:
[source,console]
--------------------------------------------------
POST /_cluster/reroute?metric=none
POST /_cluster/reroute
{
"commands": [
{

View file

@ -95,7 +95,7 @@ Changing allocation id V8QXk-QXSZinZMT-NvEq4w to tjm9Ve6uTBewVFAlfUMWjA
You should run the following command to allocate this shard:
POST /_cluster/reroute?metric=none
POST /_cluster/reroute
{
"commands" : [
{

View file

@ -63,18 +63,22 @@ To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s
6. Click on the "Create" button to create the app registration.
7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later.
8. Navigate to the "Scopes" section and click on the "Add Scopes" button.
9. The following scopes need to be added to the app.
9. The following granular scopes need to be added to the app.
+
[source,bash]
----
user:read:admin
meeting:read:admin
chat_channel:read:admin
recording:read:admin
chat_message:read:admin
report:read:admin
user:read:list_users:admin
meeting:read:list_meetings:admin
meeting:read:list_past_participants:admin
cloud_recording:read:list_user_recordings:admin
team_chat:read:list_user_channels:admin
team_chat:read:list_user_messages:admin
----
[NOTE]
====
The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch.
====
+
10. Click on the "Done" button to add the selected scopes to your app.
11. Navigate to the "Activation" section and input the necessary information to activate the app.
@ -220,18 +224,22 @@ To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s
6. Click on the "Create" button to create the app registration.
7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later.
8. Navigate to the "Scopes" section and click on the "Add Scopes" button.
9. The following scopes need to be added to the app.
9. The following granular scopes need to be added to the app.
+
[source,bash]
----
user:read:admin
meeting:read:admin
chat_channel:read:admin
recording:read:admin
chat_message:read:admin
report:read:admin
user:read:list_users:admin
meeting:read:list_meetings:admin
meeting:read:list_past_participants:admin
cloud_recording:read:list_user_recordings:admin
team_chat:read:list_user_channels:admin
team_chat:read:list_user_messages:admin
----
[NOTE]
====
The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch.
====
+
10. Click on the "Done" button to add the selected scopes to your app.
11. Navigate to the "Activation" section and input the necessary information to activate the app.

View file

@ -2,4 +2,10 @@
*Description*
Converts a multivalued expression into a single valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>. The order that <<esql-multivalued-fields, multivalued fields>> are read from underlying storage is not guaranteed. It is *frequently* ascending, but don't rely on that. If you need the minimum value use <<esql-mv_min>> instead of `MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a performance benefit to `MV_FIRST`.
Converts a multivalued expression into a single valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>.
The order that <<esql-multivalued-fields, multivalued fields>> are read from
underlying storage is not guaranteed. It is *frequently* ascending, but don't
rely on that. If you need the minimum value use <<esql-mv_min>> instead of
`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a
performance benefit to `MV_FIRST`.

View file

@ -2,4 +2,10 @@
*Description*
Converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>. The order that <<esql-multivalued-fields, multivalued fields>> are read from underlying storage is not guaranteed. It is *frequently* ascending, but don't rely on that. If you need the maximum value use <<esql-mv_max>> instead of `MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a performance benefit to `MV_LAST`.
Converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>.
The order that <<esql-multivalued-fields, multivalued fields>> are read from
underlying storage is not guaranteed. It is *frequently* ascending, but don't
rely on that. If you need the maximum value use <<esql-mv_max>> instead of
`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a
performance benefit to `MV_LAST`.

View file

@ -2,4 +2,8 @@
*Description*
Returns a subset of the multivalued field using the start and end index values.
Returns a subset of the multivalued field using the start and end index values. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>> or <<esql-mv_sort>>.
The order that <<esql-multivalued-fields, multivalued fields>> are read from
underlying storage is not guaranteed. It is *frequently* ascending, but don't
rely on that.

View file

@ -2,7 +2,7 @@
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
"type" : "eval",
"name" : "mv_first",
"description" : "Converts a multivalued expression into a single valued column containing the\nfirst value. This is most useful when reading from a function that emits\nmultivalued columns in a known order like <<esql-split>>.\n\nThe order that <<esql-multivalued-fields, multivalued fields>> are read from\nunderlying storage is not guaranteed. It is *frequently* ascending, but don't\nrely on that. If you need the minimum value use <<esql-mv_min>> instead of\n`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a\nperformance benefit to `MV_FIRST`.",
"description" : "Converts a multivalued expression into a single valued column containing the\nfirst value. This is most useful when reading from a function that emits\nmultivalued columns in a known order like <<esql-split>>.",
"signatures" : [
{
"params" : [

View file

@ -2,7 +2,7 @@
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
"type" : "eval",
"name" : "mv_last",
"description" : "Converts a multivalue expression into a single valued column containing the last\nvalue. This is most useful when reading from a function that emits multivalued\ncolumns in a known order like <<esql-split>>.\n\nThe order that <<esql-multivalued-fields, multivalued fields>> are read from\nunderlying storage is not guaranteed. It is *frequently* ascending, but don't\nrely on that. If you need the maximum value use <<esql-mv_max>> instead of\n`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a\nperformance benefit to `MV_LAST`.",
"description" : "Converts a multivalue expression into a single valued column containing the last\nvalue. This is most useful when reading from a function that emits multivalued\ncolumns in a known order like <<esql-split>>.",
"signatures" : [
{
"params" : [

View file

@ -2,7 +2,7 @@
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
"type" : "eval",
"name" : "mv_slice",
"description" : "Returns a subset of the multivalued field using the start and end index values.",
"description" : "Returns a subset of the multivalued field using the start and end index values.\nThis is most useful when reading from a function that emits multivalued columns\nin a known order like <<esql-split>> or <<esql-mv_sort>>.",
"signatures" : [
{
"params" : [

View file

@ -7,12 +7,6 @@ Converts a multivalued expression into a single valued column containing the
first value. This is most useful when reading from a function that emits
multivalued columns in a known order like <<esql-split>>.
The order that <<esql-multivalued-fields, multivalued fields>> are read from
underlying storage is not guaranteed. It is *frequently* ascending, but don't
rely on that. If you need the minimum value use <<esql-mv_min>> instead of
`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a
performance benefit to `MV_FIRST`.
```
ROW a="foo;bar;baz"
| EVAL first_a = MV_FIRST(SPLIT(a, ";"))

View file

@ -7,12 +7,6 @@ Converts a multivalue expression into a single valued column containing the last
value. This is most useful when reading from a function that emits multivalued
columns in a known order like <<esql-split>>.
The order that <<esql-multivalued-fields, multivalued fields>> are read from
underlying storage is not guaranteed. It is *frequently* ascending, but don't
rely on that. If you need the maximum value use <<esql-mv_max>> instead of
`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a
performance benefit to `MV_LAST`.
```
ROW a="foo;bar;baz"
| EVAL last_a = MV_LAST(SPLIT(a, ";"))

View file

@ -4,6 +4,8 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ
### MV_SLICE
Returns a subset of the multivalued field using the start and end index values.
This is most useful when reading from a function that emits multivalued columns
in a known order like <<esql-split>> or <<esql-mv_sort>>.
```
row a = [1, 2, 2, 3]

View file

@ -177,6 +177,37 @@ POST /_query
----
// TESTRESPONSE[s/"took": 28/"took": "$body.took"/]
[discrete]
[[esql-multivalued-nulls]]
==== `null` in a list
`null` values in a list are not preserved at the storage layer:
[source,console,id=esql-multivalued-fields-multivalued-nulls]
----
POST /mv/_doc?refresh
{ "a": [2, null, 1] }
POST /_query
{
"query": "FROM mv | LIMIT 1"
}
----
[source,console-result]
----
{
"took": 28,
"columns": [
{ "name": "a", "type": "long"},
],
"values": [
[[1, 2]],
]
}
----
// TESTRESPONSE[s/"took": 28/"took": "$body.took"/]
[discrete]
[[esql-multivalued-fields-functions]]
==== Functions

View file

@ -56,7 +56,7 @@ documents can't be backed up incrementally.
===== Blocks during a force merge
Calls to this API block until the merge is complete (unless request contains
wait_for_completion=false, which is default true). If the client connection
`wait_for_completion=false`, which is default `true`). If the client connection
is lost before completion then the force merge process will continue in the
background. Any new requests to force merge the same indices will also block
until the ongoing force merge is complete.

View file

@ -85,6 +85,8 @@ include::{docdir}/rest-api/common-parms.asciidoc[tag=master-timeout]
[[put-index-template-api-request-body]]
==== {api-request-body-title}
// tag::request-body[]
`composed_of`::
(Optional, array of strings)
An ordered list of component template names. Component templates are merged in the order
@ -102,7 +104,7 @@ See <<create-index-template,create an index template>>.
+
.Properties of `data_stream`
[%collapsible%open]
====
=====
`allow_custom_routing`::
(Optional, Boolean) If `true`, the data stream supports
<<mapping-routing-field,custom routing>>. Defaults to `false`.
@ -117,7 +119,7 @@ See <<create-index-template,create an index template>>.
+
If `time_series`, each backing index has an `index.mode` index setting of
`time_series`.
====
=====
`index_patterns`::
(Required, array of strings)
@ -146,7 +148,7 @@ Template to be applied. It may optionally include an `aliases`, `mappings`, or
+
.Properties of `template`
[%collapsible%open]
====
=====
`aliases`::
(Optional, object of objects) Aliases to add.
+
@ -161,7 +163,7 @@ include::{es-ref-dir}/indices/create-index.asciidoc[tag=aliases-props]
include::{docdir}/rest-api/common-parms.asciidoc[tag=mappings]
include::{docdir}/rest-api/common-parms.asciidoc[tag=settings]
====
=====
`version`::
(Optional, integer)
@ -174,6 +176,7 @@ Marks this index template as deprecated.
When creating or updating a non-deprecated index template that uses deprecated components,
{es} will emit a deprecation warning.
// end::index-template-api-body[]
// end::request-body[]
[[put-index-template-api-example]]
==== {api-examples-title}

View file

@ -102,6 +102,12 @@ POST /_ingest/_simulate
}
}
}
},
"index_template_substitutions": { <3>
"my-index-template": {
"index_patterns": ["my-index-*"],
"composed_of": ["component_template_1", "component_template_2"]
}
}
}
----
@ -109,6 +115,8 @@ POST /_ingest/_simulate
<1> This replaces the existing `my-pipeline` pipeline with the contents given here for the duration of this request.
<2> This replaces the existing `my-component-template` component template with the contents given here for the duration of this request.
These templates can be used to change the pipeline(s) used, or to modify the mapping that will be used to validate the result.
<3> This replaces the existing `my-index-template` index template with the contents given here for the duration of this request.
These templates can be used to change the pipeline(s) used, or to modify the mapping that will be used to validate the result.
[[simulate-ingest-api-request]]
==== {api-request-title}
@ -225,6 +233,19 @@ include::{es-ref-dir}/indices/put-component-template.asciidoc[tag=template]
====
`index_template_substitutions`::
(Optional, map of strings to objects)
Map of index template names to substitute index template definition objects.
+
.Properties of index template definition objects
[%collapsible%open]
====
include::{es-ref-dir}/indices/put-index-template.asciidoc[tag=request-body]
====
[[simulate-ingest-api-example]]
==== {api-examples-title}

View file

@ -0,0 +1,30 @@
[[terminate-processor]]
=== Terminate processor
++++
<titleabbrev>Terminate</titleabbrev>
++++
Terminates the current ingest pipeline, causing no further processors to be run.
This will normally be executed conditionally, using the `if` option.
If this pipeline is being called from another pipeline, the calling pipeline is *not* terminated.
[[terminate-options]]
.Terminate Options
[options="header"]
|======
| Name | Required | Default | Description
include::common-options.asciidoc[]
|======
[source,js]
--------------------------------------------------
{
"description" : "terminates the current pipeline if the error field is present",
"terminate": {
"if": "ctx.error != null"
}
}
--------------------------------------------------
// NOTCONSOLE

View file

@ -204,7 +204,7 @@ For general content, you have the following options for adding data to {es} indi
If you're building a website or app, then you can call Elasticsearch APIs using an https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] in the programming language of your choice. If you use the Python client, then check out the `elasticsearch-labs` repo for various https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search/python-examples[example notebooks].
* {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[File upload]: Use the {kib} file uploader to index single files for one-off testing and exploration. The GUI guides you through setting up your index and field mappings.
* https://github.com/elastic/crawler[Web crawler]: Extract and index web page content into {es} documents.
* {enterprise-search-ref}/connectors.html[Connectors]: Sync data from various third-party data sources to create searchable, read-only replicas in {es}.
* <<es-connectors,Connectors>>: Sync data from various third-party data sources to create searchable, read-only replicas in {es}.
[discrete]
[[es-ingestion-overview-timestamped]]

View file

@ -821,8 +821,6 @@ address.
[[lookup-runtime-fields]]
==== Retrieve fields from related indices
experimental[]
The <<search-fields,`fields`>> parameter on the `_search` API can also be used to retrieve fields from
the related indices via runtime fields with a type of `lookup`.

View file

@ -225,6 +225,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@ -301,6 +312,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@ -397,6 +419,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`span`::::
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@ -517,6 +554,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`span`::::
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@ -608,6 +660,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@ -687,6 +750,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
`with_special_tokens`::::
(Optional, boolean)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`span`::::
(Optional, integer)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
`with_special_tokens`::::
(Optional, boolean)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
@ -790,6 +868,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
=======
`deberta_v2`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
+
.Properties of deberta_v2
[%collapsible%open]
=======
`truncate`::::
(Optional, string)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
=======
`roberta`::::
(Optional, object)
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]

View file

@ -1,12 +1,12 @@
[[re-ranking-overview]]
= Re-ranking
Many search systems are built on two-stage retrieval pipelines.
Many search systems are built on multi-stage retrieval pipelines.
The first stage uses cheap, fast algorithms to find a broad set of possible matches.
Earlier stages use cheap, fast algorithms to find a broad set of possible matches.
The second stage uses a more powerful model, often machine learning-based, to reorder the documents.
This second step is called re-ranking.
Later stages use more powerful models, often machine learning-based, to reorder the documents.
This step is called re-ranking.
Because the resource-intensive model is only applied to the smaller set of pre-filtered results, this approach returns more relevant results while still optimizing for search performance and computational costs.
{es} supports various ranking and re-ranking techniques to optimize search relevance and performance.
@ -18,7 +18,7 @@ Because the resource-intensive model is only applied to the smaller set of pre-f
[float]
[[re-ranking-first-stage-pipeline]]
=== First stage: initial retrieval
=== Initial retrieval
[float]
[[re-ranking-ranking-overview-bm25]]
@ -45,7 +45,7 @@ Hybrid search techniques combine results from full-text and vector search pipeli
[float]
[[re-ranking-overview-second-stage]]
=== Second stage: Re-ranking
=== Re-ranking
When using the following advanced re-ranking pipelines, first-stage retrieval mechanisms effectively generate a set of candidates.
These candidates are funneled into the re-ranker to perform more computationally expensive re-ranking tasks.

View file

@ -1298,10 +1298,11 @@ tag::wait_for_active_shards[]
`wait_for_active_shards`::
+
--
(Optional, string) The number of shard copies that must be active before
proceeding with the operation. Set to `all` or any positive integer up
to the total number of shards in the index (`number_of_replicas+1`).
Default: 1, the primary shard.
(Optional, string) The number of copies of each shard that must be active
before proceeding with the operation. Set to `all` or any non-negative integer
up to the total number of copies of each shard in the index
(`number_of_replicas+1`). Defaults to `1`, meaning to wait just for each
primary shard to be active.
See <<index-wait-for-active-shards>>.
--

View file

@ -239,8 +239,7 @@ GET /_xpack/usage
"keep" : 0,
"enrich" : 0,
"from" : 0,
"row" : 0,
"meta" : 0
"row" : 0
},
"queries" : {
"rest" : {

View file

@ -89,6 +89,16 @@ PUT semantic-embeddings
It will be used to generate the embeddings based on the input text.
Every time you ingest data into the related `semantic_text` field, this endpoint will be used for creating the vector representation of the text.
[NOTE]
====
If you're using web crawlers or connectors to generate indices, you have to
<<indices-put-mapping,update the index mappings>> for these indices to
include the `semantic_text` field. Once the mapping is updated, you'll need to run
a full web crawl or a full connector sync. This ensures that all existing
documents are reprocessed and updated with the new semantic embeddings,
enabling semantic search on the updated data.
====
[discrete]
[[semantic-text-load-data]]
@ -118,6 +128,13 @@ Create the embeddings from the text by reindexing the data from the `test-data`
The data in the `content` field will be reindexed into the `content` semantic text field of the destination index.
The reindexed data will be processed by the {infer} endpoint associated with the `content` semantic text field.
[NOTE]
====
This step uses the reindex API to simulate data ingestion. If you are working with data that has already been indexed,
rather than using the test-data set, reindexing is required to ensure that the data is processed by the {infer} endpoint
and the necessary embeddings are generated.
====
[source,console]
------------------------------------------------------------
POST _reindex?wait_for_completion=false

View file

@ -107,7 +107,7 @@ asynchronously in the background.
[source,console]
----
POST _cluster/reroute?metric=none
POST _cluster/reroute
----
[discrete]
@ -246,7 +246,7 @@ resulting in data loss.
+
[source,console]
----
POST _cluster/reroute?metric=none
POST _cluster/reroute
{
"commands": [
{

View file

@ -15,7 +15,11 @@ module org.elasticsearch.base {
exports org.elasticsearch.core;
exports org.elasticsearch.jdk;
exports org.elasticsearch.core.internal.provider to org.elasticsearch.xcontent, org.elasticsearch.nativeaccess;
exports org.elasticsearch.core.internal.provider
to
org.elasticsearch.xcontent,
org.elasticsearch.nativeaccess,
org.elasticsearch.entitlement.agent;
uses ModuleQualifiedExportsService;
}

View file

@ -53,7 +53,7 @@ public class ScriptServiceBridge extends StableBridgeAPI.Proxy<ScriptService> im
PainlessScriptEngine.NAME,
new PainlessScriptEngine(settings, scriptContexts),
MustacheScriptEngine.NAME,
new MustacheScriptEngine()
new MustacheScriptEngine(settings)
);
return new ScriptService(settings, scriptEngines, ScriptModule.CORE_CONTEXTS, timeProvider);
}

View file

@ -7,6 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/
import org.elasticsearch.gradle.internal.info.BuildParams
import org.elasticsearch.gradle.internal.precommit.CheckForbiddenApisTask
apply plugin: 'elasticsearch.publish'
@ -23,6 +24,20 @@ dependencies {
}
}
// compileMain21Java does not exist within idea (see MrJarPlugin) so we cannot reference directly by name
tasks.matching { it.name == "compileMain21Java" }.configureEach {
options.compilerArgs << '--add-modules=jdk.incubator.vector'
// we remove Werror, since incubating suppression (-Xlint:-incubating)
// is only support since JDK 22
options.compilerArgs -= '-Werror'
}
tasks.named('test').configure {
if (BuildParams.getRuntimeJavaVersion().majorVersion.toInteger() >= 21) {
jvmArgs '--add-modules=jdk.incubator.vector'
}
}
tasks.withType(CheckForbiddenApisTask).configureEach {
replaceSignatureFiles 'jdk-signatures'
}

View file

@ -10,6 +10,7 @@
module org.elasticsearch.simdvec {
requires org.elasticsearch.nativeaccess;
requires org.apache.lucene.core;
requires org.elasticsearch.logging;
exports org.elasticsearch.simdvec to org.elasticsearch.server;
}

View file

@ -0,0 +1,27 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec;
import org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport;
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;
public class ESVectorUtil {
private static final ESVectorUtilSupport IMPL = ESVectorizationProvider.getInstance().getVectorUtilSupport();
public static long ipByteBinByte(byte[] q, byte[] d) {
if (q.length != d.length * B_QUERY) {
throw new IllegalArgumentException("vector dimensions incompatible: " + q.length + "!= " + B_QUERY + " x " + d.length);
}
return IMPL.ipByteBinByte(q, d);
}
}

View file

@ -0,0 +1,39 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
import org.apache.lucene.util.BitUtil;
final class DefaultESVectorUtilSupport implements ESVectorUtilSupport {
DefaultESVectorUtilSupport() {}
@Override
public long ipByteBinByte(byte[] q, byte[] d) {
return ipByteBinByteImpl(q, d);
}
public static long ipByteBinByteImpl(byte[] q, byte[] d) {
long ret = 0;
int size = d.length;
for (int i = 0; i < B_QUERY; i++) {
int r = 0;
long subRet = 0;
for (final int upperBound = d.length & -Integer.BYTES; r < upperBound; r += Integer.BYTES) {
subRet += Integer.bitCount((int) BitUtil.VH_NATIVE_INT.get(q, i * size + r) & (int) BitUtil.VH_NATIVE_INT.get(d, r));
}
for (; r < d.length; r++) {
subRet += Integer.bitCount((q[i * size + r] & d[r]) & 0xFF);
}
ret += subRet << i;
}
return ret;
}
}

View file

@ -0,0 +1,23 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
final class DefaultESVectorizationProvider extends ESVectorizationProvider {
private final ESVectorUtilSupport vectorUtilSupport;
DefaultESVectorizationProvider() {
vectorUtilSupport = new DefaultESVectorUtilSupport();
}
@Override
public ESVectorUtilSupport getVectorUtilSupport() {
return vectorUtilSupport;
}
}

View file

@ -0,0 +1,17 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
public interface ESVectorUtilSupport {
short B_QUERY = 4;
long ipByteBinByte(byte[] q, byte[] d);
}

View file

@ -0,0 +1,38 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
import java.util.Objects;
public abstract class ESVectorizationProvider {
public static ESVectorizationProvider getInstance() {
return Objects.requireNonNull(
ESVectorizationProvider.Holder.INSTANCE,
"call to getInstance() from subclass of VectorizationProvider"
);
}
ESVectorizationProvider() {}
public abstract ESVectorUtilSupport getVectorUtilSupport();
// visible for tests
static ESVectorizationProvider lookup(boolean testMode) {
return new DefaultESVectorizationProvider();
}
/** This static holder class prevents classloading deadlock. */
private static final class Holder {
private Holder() {}
static final ESVectorizationProvider INSTANCE = lookup(false);
}
}

View file

@ -0,0 +1,87 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
import org.apache.lucene.util.Constants;
import org.elasticsearch.logging.LogManager;
import org.elasticsearch.logging.Logger;
import java.util.Locale;
import java.util.Objects;
import java.util.Optional;
public abstract class ESVectorizationProvider {
protected static final Logger logger = LogManager.getLogger(ESVectorizationProvider.class);
public static ESVectorizationProvider getInstance() {
return Objects.requireNonNull(
ESVectorizationProvider.Holder.INSTANCE,
"call to getInstance() from subclass of VectorizationProvider"
);
}
ESVectorizationProvider() {}
public abstract ESVectorUtilSupport getVectorUtilSupport();
// visible for tests
static ESVectorizationProvider lookup(boolean testMode) {
final int runtimeVersion = Runtime.version().feature();
assert runtimeVersion >= 21;
if (runtimeVersion <= 23) {
// only use vector module with Hotspot VM
if (Constants.IS_HOTSPOT_VM == false) {
logger.warn("Java runtime is not using Hotspot VM; Java vector incubator API can't be enabled.");
return new DefaultESVectorizationProvider();
}
// is the incubator module present and readable (JVM providers may to exclude them or it is
// build with jlink)
final var vectorMod = lookupVectorModule();
if (vectorMod.isEmpty()) {
logger.warn(
"Java vector incubator module is not readable. "
+ "For optimal vector performance, pass '--add-modules jdk.incubator.vector' to enable Vector API."
);
return new DefaultESVectorizationProvider();
}
vectorMod.ifPresent(ESVectorizationProvider.class.getModule()::addReads);
var impl = new PanamaESVectorizationProvider();
logger.info(
String.format(
Locale.ENGLISH,
"Java vector incubator API enabled; uses preferredBitSize=%d",
PanamaESVectorUtilSupport.VECTOR_BITSIZE
)
);
return impl;
} else {
logger.warn(
"You are running with unsupported Java "
+ runtimeVersion
+ ". To make full use of the Vector API, please update Elasticsearch."
);
}
return new DefaultESVectorizationProvider();
}
private static Optional<Module> lookupVectorModule() {
return Optional.ofNullable(ESVectorizationProvider.class.getModule().getLayer())
.orElse(ModuleLayer.boot())
.findModule("jdk.incubator.vector");
}
/** This static holder class prevents classloading deadlock. */
private static final class Holder {
private Holder() {}
static final ESVectorizationProvider INSTANCE = lookup(false);
}
}

View file

@ -0,0 +1,153 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.LongVector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorShape;
import jdk.incubator.vector.VectorSpecies;
import org.apache.lucene.util.Constants;
public final class PanamaESVectorUtilSupport implements ESVectorUtilSupport {
static final int VECTOR_BITSIZE;
/** Whether integer vectors can be trusted to actually be fast. */
static final boolean HAS_FAST_INTEGER_VECTORS;
static {
// default to platform supported bitsize
VECTOR_BITSIZE = VectorShape.preferredShape().vectorBitSize();
// hotspot misses some SSE intrinsics, workaround it
// to be fair, they do document this thing only works well with AVX2/AVX3 and Neon
boolean isAMD64withoutAVX2 = Constants.OS_ARCH.equals("amd64") && VECTOR_BITSIZE < 256;
HAS_FAST_INTEGER_VECTORS = isAMD64withoutAVX2 == false;
}
@Override
public long ipByteBinByte(byte[] q, byte[] d) {
// 128 / 8 == 16
if (d.length >= 16 && HAS_FAST_INTEGER_VECTORS) {
if (VECTOR_BITSIZE >= 256) {
return ipByteBin256(q, d);
} else if (VECTOR_BITSIZE == 128) {
return ipByteBin128(q, d);
}
}
return DefaultESVectorUtilSupport.ipByteBinByteImpl(q, d);
}
private static final VectorSpecies<Byte> BYTE_SPECIES_128 = ByteVector.SPECIES_128;
private static final VectorSpecies<Byte> BYTE_SPECIES_256 = ByteVector.SPECIES_256;
static long ipByteBin256(byte[] q, byte[] d) {
long subRet0 = 0;
long subRet1 = 0;
long subRet2 = 0;
long subRet3 = 0;
int i = 0;
if (d.length >= ByteVector.SPECIES_256.vectorByteSize() * 2) {
int limit = ByteVector.SPECIES_256.loopBound(d.length);
var sum0 = LongVector.zero(LongVector.SPECIES_256);
var sum1 = LongVector.zero(LongVector.SPECIES_256);
var sum2 = LongVector.zero(LongVector.SPECIES_256);
var sum3 = LongVector.zero(LongVector.SPECIES_256);
for (; i < limit; i += ByteVector.SPECIES_256.length()) {
var vq0 = ByteVector.fromArray(BYTE_SPECIES_256, q, i).reinterpretAsLongs();
var vq1 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length).reinterpretAsLongs();
var vq2 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length * 2).reinterpretAsLongs();
var vq3 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length * 3).reinterpretAsLongs();
var vd = ByteVector.fromArray(BYTE_SPECIES_256, d, i).reinterpretAsLongs();
sum0 = sum0.add(vq0.and(vd).lanewise(VectorOperators.BIT_COUNT));
sum1 = sum1.add(vq1.and(vd).lanewise(VectorOperators.BIT_COUNT));
sum2 = sum2.add(vq2.and(vd).lanewise(VectorOperators.BIT_COUNT));
sum3 = sum3.add(vq3.and(vd).lanewise(VectorOperators.BIT_COUNT));
}
subRet0 += sum0.reduceLanes(VectorOperators.ADD);
subRet1 += sum1.reduceLanes(VectorOperators.ADD);
subRet2 += sum2.reduceLanes(VectorOperators.ADD);
subRet3 += sum3.reduceLanes(VectorOperators.ADD);
}
if (d.length - i >= ByteVector.SPECIES_128.vectorByteSize()) {
var sum0 = LongVector.zero(LongVector.SPECIES_128);
var sum1 = LongVector.zero(LongVector.SPECIES_128);
var sum2 = LongVector.zero(LongVector.SPECIES_128);
var sum3 = LongVector.zero(LongVector.SPECIES_128);
int limit = ByteVector.SPECIES_128.loopBound(d.length);
for (; i < limit; i += ByteVector.SPECIES_128.length()) {
var vq0 = ByteVector.fromArray(BYTE_SPECIES_128, q, i).reinterpretAsLongs();
var vq1 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length).reinterpretAsLongs();
var vq2 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 2).reinterpretAsLongs();
var vq3 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 3).reinterpretAsLongs();
var vd = ByteVector.fromArray(BYTE_SPECIES_128, d, i).reinterpretAsLongs();
sum0 = sum0.add(vq0.and(vd).lanewise(VectorOperators.BIT_COUNT));
sum1 = sum1.add(vq1.and(vd).lanewise(VectorOperators.BIT_COUNT));
sum2 = sum2.add(vq2.and(vd).lanewise(VectorOperators.BIT_COUNT));
sum3 = sum3.add(vq3.and(vd).lanewise(VectorOperators.BIT_COUNT));
}
subRet0 += sum0.reduceLanes(VectorOperators.ADD);
subRet1 += sum1.reduceLanes(VectorOperators.ADD);
subRet2 += sum2.reduceLanes(VectorOperators.ADD);
subRet3 += sum3.reduceLanes(VectorOperators.ADD);
}
// tail as bytes
for (; i < d.length; i++) {
subRet0 += Integer.bitCount((q[i] & d[i]) & 0xFF);
subRet1 += Integer.bitCount((q[i + d.length] & d[i]) & 0xFF);
subRet2 += Integer.bitCount((q[i + 2 * d.length] & d[i]) & 0xFF);
subRet3 += Integer.bitCount((q[i + 3 * d.length] & d[i]) & 0xFF);
}
return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
}
public static long ipByteBin128(byte[] q, byte[] d) {
long subRet0 = 0;
long subRet1 = 0;
long subRet2 = 0;
long subRet3 = 0;
int i = 0;
var sum0 = IntVector.zero(IntVector.SPECIES_128);
var sum1 = IntVector.zero(IntVector.SPECIES_128);
var sum2 = IntVector.zero(IntVector.SPECIES_128);
var sum3 = IntVector.zero(IntVector.SPECIES_128);
int limit = ByteVector.SPECIES_128.loopBound(d.length);
for (; i < limit; i += ByteVector.SPECIES_128.length()) {
var vd = ByteVector.fromArray(BYTE_SPECIES_128, d, i).reinterpretAsInts();
var vq0 = ByteVector.fromArray(BYTE_SPECIES_128, q, i).reinterpretAsInts();
var vq1 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length).reinterpretAsInts();
var vq2 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 2).reinterpretAsInts();
var vq3 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 3).reinterpretAsInts();
sum0 = sum0.add(vd.and(vq0).lanewise(VectorOperators.BIT_COUNT));
sum1 = sum1.add(vd.and(vq1).lanewise(VectorOperators.BIT_COUNT));
sum2 = sum2.add(vd.and(vq2).lanewise(VectorOperators.BIT_COUNT));
sum3 = sum3.add(vd.and(vq3).lanewise(VectorOperators.BIT_COUNT));
}
subRet0 += sum0.reduceLanes(VectorOperators.ADD);
subRet1 += sum1.reduceLanes(VectorOperators.ADD);
subRet2 += sum2.reduceLanes(VectorOperators.ADD);
subRet3 += sum3.reduceLanes(VectorOperators.ADD);
// tail as bytes
for (; i < d.length; i++) {
int dValue = d[i];
subRet0 += Integer.bitCount((dValue & q[i]) & 0xFF);
subRet1 += Integer.bitCount((dValue & q[i + d.length]) & 0xFF);
subRet2 += Integer.bitCount((dValue & q[i + 2 * d.length]) & 0xFF);
subRet3 += Integer.bitCount((dValue & q[i + 3 * d.length]) & 0xFF);
}
return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
}
}

View file

@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
final class PanamaESVectorizationProvider extends ESVectorizationProvider {
private final ESVectorUtilSupport vectorUtilSupport;
PanamaESVectorizationProvider() {
vectorUtilSupport = new PanamaESVectorUtilSupport();
}
@Override
public ESVectorUtilSupport getVectorUtilSupport() {
return vectorUtilSupport;
}
}

View file

@ -0,0 +1,130 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec;
import org.elasticsearch.simdvec.internal.vectorization.BaseVectorizationTests;
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
import java.util.Arrays;
import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;
public class ESVectorUtilTests extends BaseVectorizationTests {
static final ESVectorizationProvider defaultedProvider = BaseVectorizationTests.defaultProvider();
static final ESVectorizationProvider defOrPanamaProvider = BaseVectorizationTests.maybePanamaProvider();
public void testIpByteBinInvariants() {
int iterations = atLeast(10);
for (int i = 0; i < iterations; i++) {
int size = randomIntBetween(1, 10);
var d = new byte[size];
var q = new byte[size * B_QUERY - 1];
expectThrows(IllegalArgumentException.class, () -> ESVectorUtil.ipByteBinByte(q, d));
}
}
public void testBasicIpByteBin() {
testBasicIpByteBinImpl(ESVectorUtil::ipByteBinByte);
testBasicIpByteBinImpl(defaultedProvider.getVectorUtilSupport()::ipByteBinByte);
testBasicIpByteBinImpl(defOrPanamaProvider.getVectorUtilSupport()::ipByteBinByte);
}
interface IpByteBin {
long apply(byte[] q, byte[] d);
}
void testBasicIpByteBinImpl(IpByteBin ipByteBinFunc) {
assertEquals(15L, ipByteBinFunc.apply(new byte[] { 1, 1, 1, 1 }, new byte[] { 1 }));
assertEquals(30L, ipByteBinFunc.apply(new byte[] { 1, 2, 1, 2, 1, 2, 1, 2 }, new byte[] { 1, 2 }));
var d = new byte[] { 1, 2, 3 };
var q = new byte[] { 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3 };
assert scalarIpByteBin(q, d) == 60L; // 4 + 8 + 16 + 32
assertEquals(60L, ipByteBinFunc.apply(q, d));
d = new byte[] { 1, 2, 3, 4 };
q = new byte[] { 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 };
assert scalarIpByteBin(q, d) == 75L; // 5 + 10 + 20 + 40
assertEquals(75L, ipByteBinFunc.apply(q, d));
d = new byte[] { 1, 2, 3, 4, 5 };
q = new byte[] { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 };
assert scalarIpByteBin(q, d) == 105L; // 7 + 14 + 28 + 56
assertEquals(105L, ipByteBinFunc.apply(q, d));
d = new byte[] { 1, 2, 3, 4, 5, 6 };
q = new byte[] { 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6 };
assert scalarIpByteBin(q, d) == 135L; // 9 + 18 + 36 + 72
assertEquals(135L, ipByteBinFunc.apply(q, d));
d = new byte[] { 1, 2, 3, 4, 5, 6, 7 };
q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7 };
assert scalarIpByteBin(q, d) == 180L; // 12 + 24 + 48 + 96
assertEquals(180L, ipByteBinFunc.apply(q, d));
d = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 };
q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 };
assert scalarIpByteBin(q, d) == 195L; // 13 + 26 + 52 + 104
assertEquals(195L, ipByteBinFunc.apply(q, d));
d = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
assert scalarIpByteBin(q, d) == 225L; // 15 + 30 + 60 + 120
assertEquals(225L, ipByteBinFunc.apply(q, d));
}
public void testIpByteBin() {
testIpByteBinImpl(ESVectorUtil::ipByteBinByte);
testIpByteBinImpl(defaultedProvider.getVectorUtilSupport()::ipByteBinByte);
testIpByteBinImpl(defOrPanamaProvider.getVectorUtilSupport()::ipByteBinByte);
}
void testIpByteBinImpl(IpByteBin ipByteBinFunc) {
int iterations = atLeast(50);
for (int i = 0; i < iterations; i++) {
int size = random().nextInt(5000);
var d = new byte[size];
var q = new byte[size * B_QUERY];
random().nextBytes(d);
random().nextBytes(q);
assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
Arrays.fill(d, Byte.MAX_VALUE);
Arrays.fill(q, Byte.MAX_VALUE);
assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
Arrays.fill(d, Byte.MIN_VALUE);
Arrays.fill(q, Byte.MIN_VALUE);
assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
}
}
static int scalarIpByteBin(byte[] q, byte[] d) {
int res = 0;
for (int i = 0; i < B_QUERY; i++) {
res += (popcount(q, i * d.length, d, d.length) << i);
}
return res;
}
public static int popcount(byte[] a, int aOffset, byte[] b, int length) {
int res = 0;
for (int j = 0; j < length; j++) {
int value = (a[aOffset + j] & b[j]) & 0xFF;
for (int k = 0; k < Byte.SIZE; k++) {
if ((value & (1 << k)) != 0) {
++res;
}
}
}
return res;
}
}

View file

@ -0,0 +1,29 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.simdvec.internal.vectorization;
import org.elasticsearch.test.ESTestCase;
import org.junit.Before;
public class BaseVectorizationTests extends ESTestCase {
@Before
public void sanity() {
assert Runtime.version().feature() < 21 || ModuleLayer.boot().findModule("jdk.incubator.vector").isPresent();
}
public static ESVectorizationProvider defaultProvider() {
return new DefaultESVectorizationProvider();
}
public static ESVectorizationProvider maybePanamaProvider() {
return ESVectorizationProvider.lookup(true);
}
}

View file

@ -3,6 +3,8 @@ setup:
indices.create:
index: test_date
body:
settings:
number_of_shards: 1
mappings:
properties:
date_field:

View file

@ -72,6 +72,7 @@ public class IngestCommonPlugin extends Plugin implements ActionPlugin, IngestPl
entry(SetProcessor.TYPE, new SetProcessor.Factory(parameters.scriptService)),
entry(SortProcessor.TYPE, new SortProcessor.Factory()),
entry(SplitProcessor.TYPE, new SplitProcessor.Factory()),
entry(TerminateProcessor.TYPE, new TerminateProcessor.Factory()),
entry(TrimProcessor.TYPE, new TrimProcessor.Factory()),
entry(URLDecodeProcessor.TYPE, new URLDecodeProcessor.Factory()),
entry(UppercaseProcessor.TYPE, new UppercaseProcessor.Factory()),

View file

@ -0,0 +1,53 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.ingest.common;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import java.util.Map;
/**
* A {@link Processor} which simply prevents subsequent processors in the pipeline from running (without failing, like {@link FailProcessor}
* does). This will normally be run conditionally, using the {@code if} option.
*/
public class TerminateProcessor extends AbstractProcessor {
static final String TYPE = "terminate";
TerminateProcessor(String tag, String description) {
super(tag, description);
}
@Override
public IngestDocument execute(IngestDocument ingestDocument) {
ingestDocument.terminate();
return ingestDocument;
}
@Override
public String getType() {
return TYPE;
}
public static final class Factory implements Processor.Factory {
@Override
public Processor create(
Map<String, Processor.Factory> processorFactories,
String tag,
String description,
Map<String, Object> config
) {
return new TerminateProcessor(tag, description);
}
}
}

View file

@ -0,0 +1,70 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.ingest.common;
import org.elasticsearch.ingest.CompoundProcessor;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Pipeline;
import org.elasticsearch.ingest.TestTemplateService;
import org.elasticsearch.ingest.ValueSource;
import org.elasticsearch.test.ESTestCase;
import java.util.Map;
import static org.elasticsearch.ingest.RandomDocumentPicks.randomIngestDocument;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.nullValue;
public class TerminateProcessorTests extends ESTestCase {
public void testTerminateInPipeline() throws Exception {
Pipeline pipeline = new Pipeline(
"my-pipeline",
null,
null,
null,
new CompoundProcessor(
new SetProcessor(
"before-set",
"Sets before field to true",
new TestTemplateService.MockTemplateScript.Factory("before"),
ValueSource.wrap(true, TestTemplateService.instance()),
null
),
new TerminateProcessor("terminate", "terminates the pipeline"),
new SetProcessor(
"after-set",
"Sets after field to true",
new TestTemplateService.MockTemplateScript.Factory("after"),
ValueSource.wrap(true, TestTemplateService.instance()),
null
)
)
);
IngestDocument input = randomIngestDocument(random(), Map.of("foo", "bar"));
PipelineOutput output = new PipelineOutput();
pipeline.execute(input, output::set);
assertThat(output.exception, nullValue());
// We expect the before-set processor to have run, but not the after-set one:
assertThat(output.document.getSource(), is(Map.of("foo", "bar", "before", true)));
}
private static class PipelineOutput {
IngestDocument document;
Exception exception;
void set(IngestDocument document, Exception exception) {
this.document = document;
this.exception = exception;
}
}
}

View file

@ -0,0 +1,138 @@
---
setup:
- do:
ingest.put_pipeline:
id: "test-pipeline"
body: >
{
"description": "Appends just 'before' to the steps field if the number field is less than 50, or both 'before' and 'after' if not",
"processors": [
{
"append": {
"field": "steps",
"value": "before"
}
},
{
"terminate": {
"if": "ctx.number < 50"
}
},
{
"append": {
"field": "steps",
"value": "after"
}
}
]
}
- do:
ingest.put_pipeline:
id: "test-final-pipeline"
body: >
{
"description": "Appends 'final' to the steps field",
"processors": [
{
"append": {
"field": "steps",
"value": "final"
}
}
]
}
- do:
ingest.put_pipeline:
id: "test-outer-pipeline"
body: >
{
"description": "Runs test-pipeline and then append 'outer' to the steps field",
"processors": [
{
"pipeline": {
"name": "test-pipeline"
}
},
{
"append": {
"field": "steps",
"value": "outer"
}
}
]
}
- do:
indices.create:
index: "test-index-with-default-and-final-pipelines"
body:
settings:
index:
default_pipeline: "test-pipeline"
final_pipeline: "test-final-pipeline"
- do:
indices.create:
index: "test-vanilla-index"
---
teardown:
- do:
indices.delete:
index: "test-index-with-default-and-final-pipelines"
ignore_unavailable: true
- do:
indices.delete:
index: "test-vanilla-index"
ignore_unavailable: true
- do:
ingest.delete_pipeline:
id: "test-pipeline"
ignore: 404
- do:
ingest.delete_pipeline:
id: "test-outer-pipeline"
ignore: 404
---
"Test pipeline including conditional terminate pipeline":
- do:
bulk:
refresh: true
body:
- '{ "index": {"_index": "test-index-with-default-and-final-pipelines" } }'
- '{ "comment": "should terminate", "number": 40, "steps": [] }'
- '{ "index": {"_index": "test-index-with-default-and-final-pipelines" } }'
- '{ "comment": "should continue to end", "number": 60, "steps": [] }'
- do:
search:
rest_total_hits_as_int: true
index: "test-index-with-default-and-final-pipelines"
body:
sort: "number"
- match: { hits.total: 2 }
- match: { hits.hits.0._source.number: 40 }
- match: { hits.hits.1._source.number: 60 }
- match: { hits.hits.0._source.steps: ["before", "final"] }
- match: { hits.hits.1._source.steps: ["before", "after", "final"] }
---
"Test pipeline with terminate invoked from an outer pipeline":
- do:
bulk:
refresh: true
pipeline: "test-outer-pipeline"
body:
- '{ "index": {"_index": "test-vanilla-index" } }'
- '{ "comment": "should terminate inner pipeline but not outer", "number": 40, "steps": [] }'
- do:
search:
rest_total_hits_as_int: true
index: "test-vanilla-index"
body:
sort: "number"
- match: { hits.total: 1 }
- match: { hits.hits.0._source.number: 40 }
- match: { hits.hits.0._source.steps: ["before", "outer"] }

View file

@ -22,12 +22,17 @@ import java.util.Set;
* <p>
* A database has a set of properties that are valid to use with it (see {@link Database#properties()}),
* as well as a list of default properties to use if no properties are specified (see {@link Database#defaultProperties()}).
* <p>
* Some database providers have similar concepts but might have slightly different properties associated with those types.
* This can be accommodated, for example, by having a Foo value and a separate FooV2 value where the 'V' should be read as
* 'variant' or 'variation'. A V-less Database type is inherently the first variant/variation (i.e. V1).
*/
enum Database {
City(
Set.of(
Property.IP,
Property.COUNTRY_IN_EUROPEAN_UNION,
Property.COUNTRY_ISO_CODE,
Property.CONTINENT_CODE,
Property.COUNTRY_NAME,
@ -36,7 +41,9 @@ enum Database {
Property.REGION_NAME,
Property.CITY_NAME,
Property.TIMEZONE,
Property.LOCATION
Property.LOCATION,
Property.POSTAL_CODE,
Property.ACCURACY_RADIUS
),
Set.of(
Property.COUNTRY_ISO_CODE,
@ -49,7 +56,14 @@ enum Database {
)
),
Country(
Set.of(Property.IP, Property.CONTINENT_CODE, Property.CONTINENT_NAME, Property.COUNTRY_NAME, Property.COUNTRY_ISO_CODE),
Set.of(
Property.IP,
Property.CONTINENT_CODE,
Property.CONTINENT_NAME,
Property.COUNTRY_NAME,
Property.COUNTRY_IN_EUROPEAN_UNION,
Property.COUNTRY_ISO_CODE
),
Set.of(Property.CONTINENT_NAME, Property.COUNTRY_NAME, Property.COUNTRY_ISO_CODE)
),
Asn(
@ -80,12 +94,15 @@ enum Database {
Enterprise(
Set.of(
Property.IP,
Property.COUNTRY_CONFIDENCE,
Property.COUNTRY_IN_EUROPEAN_UNION,
Property.COUNTRY_ISO_CODE,
Property.COUNTRY_NAME,
Property.CONTINENT_CODE,
Property.CONTINENT_NAME,
Property.REGION_ISO_CODE,
Property.REGION_NAME,
Property.CITY_CONFIDENCE,
Property.CITY_NAME,
Property.TIMEZONE,
Property.LOCATION,
@ -104,7 +121,10 @@ enum Database {
Property.MOBILE_COUNTRY_CODE,
Property.MOBILE_NETWORK_CODE,
Property.USER_TYPE,
Property.CONNECTION_TYPE
Property.CONNECTION_TYPE,
Property.POSTAL_CODE,
Property.POSTAL_CONFIDENCE,
Property.ACCURACY_RADIUS
),
Set.of(
Property.COUNTRY_ISO_CODE,
@ -137,7 +157,31 @@ enum Database {
Property.MOBILE_COUNTRY_CODE,
Property.MOBILE_NETWORK_CODE
)
);
),
AsnV2(
Set.of(
Property.IP,
Property.ASN,
Property.ORGANIZATION_NAME,
Property.NETWORK,
Property.DOMAIN,
Property.COUNTRY_ISO_CODE,
Property.TYPE
),
Set.of(Property.IP, Property.ASN, Property.ORGANIZATION_NAME, Property.NETWORK)
),
CityV2(
Set.of(
Property.IP,
Property.COUNTRY_ISO_CODE,
Property.REGION_NAME,
Property.CITY_NAME,
Property.TIMEZONE,
Property.LOCATION,
Property.POSTAL_CODE
),
Set.of(Property.COUNTRY_ISO_CODE, Property.REGION_NAME, Property.CITY_NAME, Property.LOCATION)
),;
private final Set<Property> properties;
private final Set<Property> defaultProperties;
@ -187,12 +231,15 @@ enum Database {
enum Property {
IP,
COUNTRY_CONFIDENCE,
COUNTRY_IN_EUROPEAN_UNION,
COUNTRY_ISO_CODE,
COUNTRY_NAME,
CONTINENT_CODE,
CONTINENT_NAME,
REGION_ISO_CODE,
REGION_NAME,
CITY_CONFIDENCE,
CITY_NAME,
TIMEZONE,
LOCATION,
@ -211,7 +258,11 @@ enum Database {
MOBILE_COUNTRY_CODE,
MOBILE_NETWORK_CODE,
CONNECTION_TYPE,
USER_TYPE;
USER_TYPE,
TYPE,
POSTAL_CODE,
POSTAL_CONFIDENCE,
ACCURACY_RADIUS;
/**
* Parses a string representation of a property into an actual Property instance. Not all properties that exist are

View file

@ -76,6 +76,7 @@ final class IpDataLookupFactories {
return database;
}
@Nullable
static Function<Set<Database.Property>, IpDataLookup> getMaxmindLookup(final Database database) {
return switch (database) {
case City -> MaxmindIpDataLookups.City::new;
@ -86,6 +87,7 @@ final class IpDataLookupFactories {
case Domain -> MaxmindIpDataLookups.Domain::new;
case Enterprise -> MaxmindIpDataLookups.Enterprise::new;
case Isp -> MaxmindIpDataLookups.Isp::new;
default -> null;
};
}
@ -97,7 +99,6 @@ final class IpDataLookupFactories {
final Function<Set<Database.Property>, IpDataLookup> factoryMethod = getMaxmindLookup(database);
// note: this can't presently be null, but keep this check -- it will be useful in the near future
if (factoryMethod == null) {
throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]");
}

View file

@ -0,0 +1,338 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.ingest.geoip;
import com.maxmind.db.DatabaseRecord;
import com.maxmind.db.MaxMindDbConstructor;
import com.maxmind.db.MaxMindDbParameter;
import com.maxmind.db.Reader;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.network.NetworkAddress;
import org.elasticsearch.core.Nullable;
import java.io.IOException;
import java.net.InetAddress;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
/**
* A collection of {@link IpDataLookup} implementations for IPinfo databases
*/
final class IpinfoIpDataLookups {
private IpinfoIpDataLookups() {
// utility class
}
private static final Logger logger = LogManager.getLogger(IpinfoIpDataLookups.class);
/**
* Lax-ly parses a string that (ideally) looks like 'AS123' into a Long like 123L (or null, if such parsing isn't possible).
* @param asn a potentially empty (or null) ASN string that is expected to contain 'AS' and then a parsable long
* @return the parsed asn
*/
static Long parseAsn(final String asn) {
if (asn == null || Strings.hasText(asn) == false) {
return null;
} else {
String stripped = asn.toUpperCase(Locale.ROOT).replaceAll("AS", "").trim();
try {
return Long.parseLong(stripped);
} catch (NumberFormatException e) {
logger.trace("Unable to parse non-compliant ASN string [{}]", asn);
return null;
}
}
}
/**
* Lax-ly parses a string that contains a double into a Double (or null, if such parsing isn't possible).
* @param latlon a potentially empty (or null) string that is expected to contain a parsable double
* @return the parsed double
*/
static Double parseLocationDouble(final String latlon) {
if (latlon == null || Strings.hasText(latlon) == false) {
return null;
} else {
String stripped = latlon.trim();
try {
return Double.parseDouble(stripped);
} catch (NumberFormatException e) {
logger.trace("Unable to parse non-compliant location string [{}]", latlon);
return null;
}
}
}
public record AsnResult(
Long asn,
@Nullable String country, // not present in the free asn database
String domain,
String name,
@Nullable String type // not present in the free asn database
) {
@SuppressWarnings("checkstyle:RedundantModifier")
@MaxMindDbConstructor
public AsnResult(
@MaxMindDbParameter(name = "asn") String asn,
@Nullable @MaxMindDbParameter(name = "country") String country,
@MaxMindDbParameter(name = "domain") String domain,
@MaxMindDbParameter(name = "name") String name,
@Nullable @MaxMindDbParameter(name = "type") String type
) {
this(parseAsn(asn), country, domain, name, type);
}
}
public record CountryResult(
@MaxMindDbParameter(name = "continent") String continent,
@MaxMindDbParameter(name = "continent_name") String continentName,
@MaxMindDbParameter(name = "country") String country,
@MaxMindDbParameter(name = "country_name") String countryName
) {
@MaxMindDbConstructor
public CountryResult {}
}
public record GeolocationResult(
String city,
String country,
Double latitude,
Double longitude,
String postalCode,
String region,
String timezone
) {
@SuppressWarnings("checkstyle:RedundantModifier")
@MaxMindDbConstructor
public GeolocationResult(
@MaxMindDbParameter(name = "city") String city,
@MaxMindDbParameter(name = "country") String country,
@MaxMindDbParameter(name = "latitude") String latitude,
@MaxMindDbParameter(name = "longitude") String longitude,
// @MaxMindDbParameter(name = "network") String network, // for now we're not exposing this
@MaxMindDbParameter(name = "postal_code") String postalCode,
@MaxMindDbParameter(name = "region") String region,
@MaxMindDbParameter(name = "timezone") String timezone
) {
this(city, country, parseLocationDouble(latitude), parseLocationDouble(longitude), postalCode, region, timezone);
}
}
static class Asn extends AbstractBase<AsnResult> {
Asn(Set<Database.Property> properties) {
super(properties, AsnResult.class);
}
@Override
protected Map<String, Object> transform(final Result<AsnResult> result) {
AsnResult response = result.result;
Long asn = response.asn;
String organizationName = response.name;
String network = result.network;
Map<String, Object> data = new HashMap<>();
for (Database.Property property : this.properties) {
switch (property) {
case IP -> data.put("ip", result.ip);
case ASN -> {
if (asn != null) {
data.put("asn", asn);
}
}
case ORGANIZATION_NAME -> {
if (organizationName != null) {
data.put("organization_name", organizationName);
}
}
case NETWORK -> {
if (network != null) {
data.put("network", network);
}
}
case COUNTRY_ISO_CODE -> {
if (response.country != null) {
data.put("country_iso_code", response.country);
}
}
case DOMAIN -> {
if (response.domain != null) {
data.put("domain", response.domain);
}
}
case TYPE -> {
if (response.type != null) {
data.put("type", response.type);
}
}
}
}
return data;
}
}
static class Country extends AbstractBase<CountryResult> {
Country(Set<Database.Property> properties) {
super(properties, CountryResult.class);
}
@Override
protected Map<String, Object> transform(final Result<CountryResult> result) {
CountryResult response = result.result;
Map<String, Object> data = new HashMap<>();
for (Database.Property property : this.properties) {
switch (property) {
case IP -> data.put("ip", result.ip);
case COUNTRY_ISO_CODE -> {
String countryIsoCode = response.country;
if (countryIsoCode != null) {
data.put("country_iso_code", countryIsoCode);
}
}
case COUNTRY_NAME -> {
String countryName = response.countryName;
if (countryName != null) {
data.put("country_name", countryName);
}
}
case CONTINENT_CODE -> {
String continentCode = response.continent;
if (continentCode != null) {
data.put("continent_code", continentCode);
}
}
case CONTINENT_NAME -> {
String continentName = response.continentName;
if (continentName != null) {
data.put("continent_name", continentName);
}
}
}
}
return data;
}
}
static class Geolocation extends AbstractBase<GeolocationResult> {
Geolocation(final Set<Database.Property> properties) {
super(properties, GeolocationResult.class);
}
@Override
protected Map<String, Object> transform(final Result<GeolocationResult> result) {
GeolocationResult response = result.result;
Map<String, Object> data = new HashMap<>();
for (Database.Property property : this.properties) {
switch (property) {
case IP -> data.put("ip", result.ip);
case COUNTRY_ISO_CODE -> {
String countryIsoCode = response.country;
if (countryIsoCode != null) {
data.put("country_iso_code", countryIsoCode);
}
}
case REGION_NAME -> {
String subdivisionName = response.region;
if (subdivisionName != null) {
data.put("region_name", subdivisionName);
}
}
case CITY_NAME -> {
String cityName = response.city;
if (cityName != null) {
data.put("city_name", cityName);
}
}
case TIMEZONE -> {
String locationTimeZone = response.timezone;
if (locationTimeZone != null) {
data.put("timezone", locationTimeZone);
}
}
case POSTAL_CODE -> {
String postalCode = response.postalCode;
if (postalCode != null) {
data.put("postal_code", postalCode);
}
}
case LOCATION -> {
Double latitude = response.latitude;
Double longitude = response.longitude;
if (latitude != null && longitude != null) {
Map<String, Object> locationObject = new HashMap<>();
locationObject.put("lat", latitude);
locationObject.put("lon", longitude);
data.put("location", locationObject);
}
}
}
}
return data;
}
}
/**
* Just a little record holder -- there's the data that we receive via the binding to our record objects from the Reader via the
* getRecord call, but then we also need to capture the passed-in ip address that came from the caller as well as the network for
* the returned DatabaseRecord from the Reader.
*/
public record Result<T>(T result, String ip, String network) {}
/**
* The {@link IpinfoIpDataLookups.AbstractBase} is an abstract base implementation of {@link IpDataLookup} that
* provides common functionality for getting a {@link IpinfoIpDataLookups.Result} that wraps a record from a {@link IpDatabase}.
*
* @param <RESPONSE> the record type that will be wrapped and returned
*/
private abstract static class AbstractBase<RESPONSE> implements IpDataLookup {
protected final Set<Database.Property> properties;
protected final Class<RESPONSE> clazz;
AbstractBase(final Set<Database.Property> properties, final Class<RESPONSE> clazz) {
this.properties = Set.copyOf(properties);
this.clazz = clazz;
}
@Override
public Set<Database.Property> getProperties() {
return this.properties;
}
@Override
public final Map<String, Object> getData(final IpDatabase ipDatabase, final String ipAddress) {
final Result<RESPONSE> response = ipDatabase.getResponse(ipAddress, this::lookup);
return (response == null || response.result == null) ? Map.of() : transform(response);
}
@Nullable
private Result<RESPONSE> lookup(final Reader reader, final String ipAddress) throws IOException {
final InetAddress ip = InetAddresses.forString(ipAddress);
final DatabaseRecord<RESPONSE> record = reader.getRecord(ip, clazz);
final RESPONSE data = record.getData();
return (data == null) ? null : new Result<>(data, NetworkAddress.format(ip), record.getNetwork().toString());
}
/**
* Extract the configured properties from the retrieved response
* @param response the non-null response that was retrieved
* @return a mapping of properties for the ip from the response
*/
protected abstract Map<String, Object> transform(Result<RESPONSE> response);
}
}

View file

@ -23,6 +23,7 @@ import com.maxmind.geoip2.model.EnterpriseResponse;
import com.maxmind.geoip2.model.IspResponse;
import com.maxmind.geoip2.record.Continent;
import com.maxmind.geoip2.record.Location;
import com.maxmind.geoip2.record.Postal;
import com.maxmind.geoip2.record.Subdivision;
import org.elasticsearch.common.network.InetAddresses;
@ -139,11 +140,18 @@ final class MaxmindIpDataLookups {
Location location = response.getLocation();
Continent continent = response.getContinent();
Subdivision subdivision = response.getMostSpecificSubdivision();
Postal postal = response.getPostal();
Map<String, Object> data = new HashMap<>();
for (Database.Property property : this.properties) {
switch (property) {
case IP -> data.put("ip", response.getTraits().getIpAddress());
case COUNTRY_IN_EUROPEAN_UNION -> {
if (country.getIsoCode() != null) {
// isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
data.put("country_in_european_union", country.isInEuropeanUnion());
}
}
case COUNTRY_ISO_CODE -> {
String countryIsoCode = country.getIsoCode();
if (countryIsoCode != null) {
@ -206,6 +214,17 @@ final class MaxmindIpDataLookups {
data.put("location", locationObject);
}
}
case ACCURACY_RADIUS -> {
Integer accuracyRadius = location.getAccuracyRadius();
if (accuracyRadius != null) {
data.put("accuracy_radius", accuracyRadius);
}
}
case POSTAL_CODE -> {
if (postal != null && postal.getCode() != null) {
data.put("postal_code", postal.getCode());
}
}
}
}
return data;
@ -254,6 +273,12 @@ final class MaxmindIpDataLookups {
for (Database.Property property : this.properties) {
switch (property) {
case IP -> data.put("ip", response.getTraits().getIpAddress());
case COUNTRY_IN_EUROPEAN_UNION -> {
if (country.getIsoCode() != null) {
// isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
data.put("country_in_european_union", country.isInEuropeanUnion());
}
}
case COUNTRY_ISO_CODE -> {
String countryIsoCode = country.getIsoCode();
if (countryIsoCode != null) {
@ -324,6 +349,7 @@ final class MaxmindIpDataLookups {
Location location = response.getLocation();
Continent continent = response.getContinent();
Subdivision subdivision = response.getMostSpecificSubdivision();
Postal postal = response.getPostal();
Long asn = response.getTraits().getAutonomousSystemNumber();
String organizationName = response.getTraits().getAutonomousSystemOrganization();
@ -351,6 +377,18 @@ final class MaxmindIpDataLookups {
for (Database.Property property : this.properties) {
switch (property) {
case IP -> data.put("ip", response.getTraits().getIpAddress());
case COUNTRY_CONFIDENCE -> {
Integer countryConfidence = country.getConfidence();
if (countryConfidence != null) {
data.put("country_confidence", countryConfidence);
}
}
case COUNTRY_IN_EUROPEAN_UNION -> {
if (country.getIsoCode() != null) {
// isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
data.put("country_in_european_union", country.isInEuropeanUnion());
}
}
case COUNTRY_ISO_CODE -> {
String countryIsoCode = country.getIsoCode();
if (countryIsoCode != null) {
@ -391,6 +429,12 @@ final class MaxmindIpDataLookups {
data.put("region_name", subdivisionName);
}
}
case CITY_CONFIDENCE -> {
Integer cityConfidence = city.getConfidence();
if (cityConfidence != null) {
data.put("city_confidence", cityConfidence);
}
}
case CITY_NAME -> {
String cityName = city.getName();
if (cityName != null) {
@ -413,6 +457,23 @@ final class MaxmindIpDataLookups {
data.put("location", locationObject);
}
}
case ACCURACY_RADIUS -> {
Integer accuracyRadius = location.getAccuracyRadius();
if (accuracyRadius != null) {
data.put("accuracy_radius", accuracyRadius);
}
}
case POSTAL_CODE -> {
if (postal != null && postal.getCode() != null) {
data.put("postal_code", postal.getCode());
}
}
case POSTAL_CONFIDENCE -> {
Integer postalConfidence = postal.getConfidence();
if (postalConfidence != null) {
data.put("postal_confidence", postalConfidence);
}
}
case ASN -> {
if (asn != null) {
data.put("asn", asn);

View file

@ -195,7 +195,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
equalTo(
"[properties] illegal property value ["
+ asnProperty
+ "]. valid values are [IP, COUNTRY_ISO_CODE, COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME]"
+ "]. valid values are [IP, COUNTRY_IN_EUROPEAN_UNION, COUNTRY_ISO_CODE, COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME]"
)
);
}
@ -273,8 +273,9 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
assertThat(
e.getMessage(),
equalTo(
"[properties] illegal property value [invalid]. valid values are [IP, COUNTRY_ISO_CODE, "
+ "COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME, REGION_ISO_CODE, REGION_NAME, CITY_NAME, TIMEZONE, LOCATION]"
"[properties] illegal property value [invalid]. valid values are [IP, COUNTRY_IN_EUROPEAN_UNION, COUNTRY_ISO_CODE, "
+ "COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME, REGION_ISO_CODE, REGION_NAME, CITY_NAME, TIMEZONE, "
+ "LOCATION, POSTAL_CODE, ACCURACY_RADIUS]"
)
);

Some files were not shown because too many files have changed in this diff Show more