mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-24 23:27:25 -04:00
Merge branch 'main' into main-update-9-10-24
This commit is contained in:
commit
09f91cdaec
385 changed files with 20490 additions and 6852 deletions
|
@ -2,6 +2,7 @@ steps:
|
|||
- command: .buildkite/scripts/dra-workflow.sh
|
||||
env:
|
||||
USE_DRA_CREDENTIALS: "true"
|
||||
USE_PROD_DOCKER_CREDENTIALS: "true"
|
||||
agents:
|
||||
provider: gcp
|
||||
image: family/elasticsearch-ubuntu-2204
|
||||
|
@ -18,4 +19,5 @@ steps:
|
|||
branch: "${BUILDKITE_BRANCH}"
|
||||
env:
|
||||
DRA_WORKFLOW: staging
|
||||
USE_PROD_DOCKER_CREDENTIALS: "true"
|
||||
if: build.env('DRA_WORKFLOW') == 'staging'
|
||||
|
|
|
@ -27,7 +27,8 @@ steps:
|
|||
image: family/elasticsearch-{{matrix.image}}
|
||||
diskSizeGb: 350
|
||||
machineType: n1-standard-8
|
||||
env: {}
|
||||
env:
|
||||
USE_PROD_DOCKER_CREDENTIALS: "true"
|
||||
- group: packaging-tests-upgrade
|
||||
steps: $BWC_STEPS
|
||||
- group: packaging-tests-windows
|
||||
|
|
|
@ -28,7 +28,8 @@ steps:
|
|||
image: family/elasticsearch-{{matrix.image}}
|
||||
diskSizeGb: 350
|
||||
machineType: n1-standard-8
|
||||
env: {}
|
||||
env:
|
||||
USE_PROD_DOCKER_CREDENTIALS: "true"
|
||||
- group: packaging-tests-upgrade
|
||||
steps:
|
||||
- label: "{{matrix.image}} / 8.0.1 / packaging-tests-upgrade"
|
||||
|
|
|
@ -24,4 +24,5 @@ steps:
|
|||
diskSizeGb: 350
|
||||
machineType: custom-16-32768
|
||||
env:
|
||||
USE_PROD_DOCKER_CREDENTIALS: "true"
|
||||
PACKAGING_TASK: "{{matrix.PACKAGING_TASK}}"
|
||||
|
|
|
@ -384,6 +384,15 @@ tasks.named("jar") {
|
|||
exclude("classpath.index")
|
||||
}
|
||||
|
||||
spotless {
|
||||
java {
|
||||
// IDEs can sometimes run annotation processors that leave files in
|
||||
// here, causing Spotless to complain. Even though this path ought not
|
||||
// to exist, exclude it anyway in order to avoid spurious failures.
|
||||
toggleOffOn()
|
||||
}
|
||||
}
|
||||
|
||||
def resolveMainWrapperVersion() {
|
||||
new URL("https://raw.githubusercontent.com/elastic/elasticsearch/main/build-tools-internal/src/main/resources/minimumGradleVersion").text.trim()
|
||||
}
|
||||
|
|
|
@ -29,11 +29,13 @@ public enum DockerBase {
|
|||
CLOUD_ESS(null, "-cloud-ess", "apt-get"),
|
||||
|
||||
// Chainguard based wolfi image with latest jdk
|
||||
WOLFI(
|
||||
"docker.elastic.co/wolfi/chainguard-base:latest@sha256:c16d3ad6cebf387e8dd2ad769f54320c4819fbbaa21e729fad087c7ae223b4d0",
|
||||
// This is usually updated via renovatebot
|
||||
// spotless:off
|
||||
WOLFI("docker.elastic.co/wolfi/chainguard-base:latest@sha256:c16d3ad6cebf387e8dd2ad769f54320c4819fbbaa21e729fad087c7ae223b4d0",
|
||||
"-wolfi",
|
||||
"apk"
|
||||
),
|
||||
// spotless:on
|
||||
|
||||
// Based on WOLFI above, with more extras. We don't set a base image because
|
||||
// we programmatically extend from the Wolfi image.
|
||||
|
|
|
@ -284,6 +284,7 @@
|
|||
"Cluster and node setting",
|
||||
"Command line tool",
|
||||
"CRUD",
|
||||
"ES|QL",
|
||||
"Index setting",
|
||||
"Ingest",
|
||||
"JVM option",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
### Entitlement Agent
|
||||
|
||||
This is a java agent that instruments sensitive class library methods with calls into the `entitlement-runtime` module to check for permissions granted under the _entitlements_ system.
|
||||
This is a java agent that instruments sensitive class library methods with calls into the `entitlement-bridge` module to check for permissions granted under the _entitlements_ system.
|
||||
|
||||
The entitlements system provides an alternative to the legacy `SecurityManager` system, which is deprecated for removal.
|
||||
With this agent, the Elasticsearch server can retain some control over which class library methods can be invoked by which callers.
|
||||
|
|
|
@ -7,21 +7,44 @@
|
|||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import static java.util.stream.Collectors.joining
|
||||
|
||||
apply plugin: 'elasticsearch.build'
|
||||
apply plugin: 'elasticsearch.embedded-providers'
|
||||
|
||||
embeddedProviders {
|
||||
impl 'entitlement-agent', project(':distribution:tools:entitlement-agent:impl')
|
||||
}
|
||||
|
||||
configurations {
|
||||
entitlementRuntime
|
||||
entitlementBridge
|
||||
}
|
||||
|
||||
dependencies {
|
||||
entitlementRuntime project(":libs:elasticsearch-entitlement-runtime")
|
||||
implementation project(":libs:elasticsearch-entitlement-runtime")
|
||||
entitlementBridge project(":distribution:tools:entitlement-bridge")
|
||||
compileOnly project(":libs:elasticsearch-core")
|
||||
compileOnly project(":distribution:tools:entitlement-runtime")
|
||||
testImplementation project(":test:framework")
|
||||
testImplementation project(":distribution:tools:entitlement-bridge")
|
||||
testImplementation project(":distribution:tools:entitlement-agent:impl")
|
||||
}
|
||||
|
||||
tasks.named('test').configure {
|
||||
systemProperty "tests.security.manager", "false"
|
||||
dependsOn('jar')
|
||||
jvmArgs "-javaagent:${ tasks.named('jar').flatMap{ it.archiveFile }.get()}"
|
||||
|
||||
// Register an argument provider to avoid eager resolution of configurations
|
||||
jvmArgumentProviders.add(new CommandLineArgumentProvider() {
|
||||
@Override
|
||||
Iterable<String> asArguments() {
|
||||
return ["-javaagent:${tasks.jar.archiveFile.get()}", "-Des.entitlements.bridgeJar=${configurations.entitlementBridge.singleFile}"]
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
// The Elasticsearch build plugin automatically adds all compileOnly deps as testImplementation.
|
||||
// We must not add the bridge this way because it is also on the boot classpath, and that would lead to jar hell.
|
||||
classpath -= files(configurations.entitlementBridge)
|
||||
}
|
||||
|
||||
tasks.named('jar').configure {
|
||||
|
|
20
distribution/tools/entitlement-agent/impl/build.gradle
Normal file
20
distribution/tools/entitlement-agent/impl/build.gradle
Normal file
|
@ -0,0 +1,20 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
apply plugin: 'elasticsearch.build'
|
||||
|
||||
dependencies {
|
||||
compileOnly project(':distribution:tools:entitlement-agent')
|
||||
implementation 'org.ow2.asm:asm:9.7'
|
||||
}
|
||||
|
||||
tasks.named('forbiddenApisMain').configure {
|
||||
replaceSignatureFiles 'jdk-signatures'
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
Copyright (c) 2012 France Télécom
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. Neither the name of the copyright holders nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
|
||||
import org.elasticsearch.entitlement.instrumentation.impl.InstrumentationServiceImpl;
|
||||
|
||||
module org.elasticsearch.entitlement.agent.impl {
|
||||
requires org.objectweb.asm;
|
||||
requires org.elasticsearch.entitlement.agent;
|
||||
|
||||
provides InstrumentationService with InstrumentationServiceImpl;
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.instrumentation.impl;
|
||||
|
||||
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
|
||||
import org.elasticsearch.entitlement.instrumentation.Instrumenter;
|
||||
import org.elasticsearch.entitlement.instrumentation.MethodKey;
|
||||
import org.objectweb.asm.Type;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class InstrumentationServiceImpl implements InstrumentationService {
|
||||
@Override
|
||||
public Instrumenter newInstrumenter(String classNameSuffix, Map<MethodKey, Method> instrumentationMethods) {
|
||||
return new InstrumenterImpl(classNameSuffix, instrumentationMethods);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a {@link MethodKey} suitable for looking up the given {@code targetMethod} in the entitlements trampoline
|
||||
*/
|
||||
public MethodKey methodKeyForTarget(Method targetMethod) {
|
||||
Type actualType = Type.getMethodType(Type.getMethodDescriptor(targetMethod));
|
||||
return new MethodKey(
|
||||
Type.getInternalName(targetMethod.getDeclaringClass()),
|
||||
targetMethod.getName(),
|
||||
Stream.of(actualType.getArgumentTypes()).map(Type::getInternalName).toList(),
|
||||
Modifier.isStatic(targetMethod.getModifiers())
|
||||
);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,214 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.instrumentation.impl;
|
||||
|
||||
import org.elasticsearch.entitlement.instrumentation.Instrumenter;
|
||||
import org.elasticsearch.entitlement.instrumentation.MethodKey;
|
||||
import org.objectweb.asm.AnnotationVisitor;
|
||||
import org.objectweb.asm.ClassReader;
|
||||
import org.objectweb.asm.ClassVisitor;
|
||||
import org.objectweb.asm.ClassWriter;
|
||||
import org.objectweb.asm.MethodVisitor;
|
||||
import org.objectweb.asm.Opcodes;
|
||||
import org.objectweb.asm.Type;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import static org.objectweb.asm.ClassWriter.COMPUTE_FRAMES;
|
||||
import static org.objectweb.asm.ClassWriter.COMPUTE_MAXS;
|
||||
import static org.objectweb.asm.Opcodes.ACC_STATIC;
|
||||
import static org.objectweb.asm.Opcodes.GETSTATIC;
|
||||
import static org.objectweb.asm.Opcodes.INVOKEINTERFACE;
|
||||
import static org.objectweb.asm.Opcodes.INVOKESTATIC;
|
||||
import static org.objectweb.asm.Opcodes.INVOKEVIRTUAL;
|
||||
|
||||
public class InstrumenterImpl implements Instrumenter {
|
||||
/**
|
||||
* To avoid class name collisions during testing without an agent to replace classes in-place.
|
||||
*/
|
||||
private final String classNameSuffix;
|
||||
private final Map<MethodKey, Method> instrumentationMethods;
|
||||
|
||||
public InstrumenterImpl(String classNameSuffix, Map<MethodKey, Method> instrumentationMethods) {
|
||||
this.classNameSuffix = classNameSuffix;
|
||||
this.instrumentationMethods = instrumentationMethods;
|
||||
}
|
||||
|
||||
public ClassFileInfo instrumentClassFile(Class<?> clazz) throws IOException {
|
||||
ClassFileInfo initial = getClassFileInfo(clazz);
|
||||
return new ClassFileInfo(initial.fileName(), instrumentClass(Type.getInternalName(clazz), initial.bytecodes()));
|
||||
}
|
||||
|
||||
public static ClassFileInfo getClassFileInfo(Class<?> clazz) throws IOException {
|
||||
String internalName = Type.getInternalName(clazz);
|
||||
String fileName = "/" + internalName + ".class";
|
||||
byte[] originalBytecodes;
|
||||
try (InputStream classStream = clazz.getResourceAsStream(fileName)) {
|
||||
if (classStream == null) {
|
||||
throw new IllegalStateException("Classfile not found in jar: " + fileName);
|
||||
}
|
||||
originalBytecodes = classStream.readAllBytes();
|
||||
}
|
||||
return new ClassFileInfo(fileName, originalBytecodes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] instrumentClass(String className, byte[] classfileBuffer) {
|
||||
ClassReader reader = new ClassReader(classfileBuffer);
|
||||
ClassWriter writer = new ClassWriter(reader, COMPUTE_FRAMES | COMPUTE_MAXS);
|
||||
ClassVisitor visitor = new EntitlementClassVisitor(Opcodes.ASM9, writer, className);
|
||||
reader.accept(visitor, 0);
|
||||
return writer.toByteArray();
|
||||
}
|
||||
|
||||
class EntitlementClassVisitor extends ClassVisitor {
|
||||
final String className;
|
||||
|
||||
EntitlementClassVisitor(int api, ClassVisitor classVisitor, String className) {
|
||||
super(api, classVisitor);
|
||||
this.className = className;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int version, int access, String name, String signature, String superName, String[] interfaces) {
|
||||
super.visit(version, access, name + classNameSuffix, signature, superName, interfaces);
|
||||
}
|
||||
|
||||
@Override
|
||||
public MethodVisitor visitMethod(int access, String name, String descriptor, String signature, String[] exceptions) {
|
||||
var mv = super.visitMethod(access, name, descriptor, signature, exceptions);
|
||||
boolean isStatic = (access & ACC_STATIC) != 0;
|
||||
var key = new MethodKey(
|
||||
className,
|
||||
name,
|
||||
Stream.of(Type.getArgumentTypes(descriptor)).map(Type::getInternalName).toList(),
|
||||
isStatic
|
||||
);
|
||||
var instrumentationMethod = instrumentationMethods.get(key);
|
||||
if (instrumentationMethod != null) {
|
||||
// LOGGER.debug("Will instrument method {}", key);
|
||||
return new EntitlementMethodVisitor(Opcodes.ASM9, mv, isStatic, descriptor, instrumentationMethod);
|
||||
} else {
|
||||
// LOGGER.trace("Will not instrument method {}", key);
|
||||
}
|
||||
return mv;
|
||||
}
|
||||
}
|
||||
|
||||
static class EntitlementMethodVisitor extends MethodVisitor {
|
||||
private final boolean instrumentedMethodIsStatic;
|
||||
private final String instrumentedMethodDescriptor;
|
||||
private final Method instrumentationMethod;
|
||||
private boolean hasCallerSensitiveAnnotation = false;
|
||||
|
||||
EntitlementMethodVisitor(
|
||||
int api,
|
||||
MethodVisitor methodVisitor,
|
||||
boolean instrumentedMethodIsStatic,
|
||||
String instrumentedMethodDescriptor,
|
||||
Method instrumentationMethod
|
||||
) {
|
||||
super(api, methodVisitor);
|
||||
this.instrumentedMethodIsStatic = instrumentedMethodIsStatic;
|
||||
this.instrumentedMethodDescriptor = instrumentedMethodDescriptor;
|
||||
this.instrumentationMethod = instrumentationMethod;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AnnotationVisitor visitAnnotation(String descriptor, boolean visible) {
|
||||
if (visible && descriptor.endsWith("CallerSensitive;")) {
|
||||
hasCallerSensitiveAnnotation = true;
|
||||
}
|
||||
return super.visitAnnotation(descriptor, visible);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visitCode() {
|
||||
pushEntitlementChecksObject();
|
||||
pushCallerClass();
|
||||
forwardIncomingArguments();
|
||||
invokeInstrumentationMethod();
|
||||
super.visitCode();
|
||||
}
|
||||
|
||||
private void pushEntitlementChecksObject() {
|
||||
mv.visitMethodInsn(
|
||||
INVOKESTATIC,
|
||||
"org/elasticsearch/entitlement/api/EntitlementProvider",
|
||||
"checks",
|
||||
"()Lorg/elasticsearch/entitlement/api/EntitlementChecks;",
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
private void pushCallerClass() {
|
||||
if (hasCallerSensitiveAnnotation) {
|
||||
mv.visitMethodInsn(
|
||||
INVOKESTATIC,
|
||||
"jdk/internal/reflect/Reflection",
|
||||
"getCallerClass",
|
||||
Type.getMethodDescriptor(Type.getType(Class.class)),
|
||||
false
|
||||
);
|
||||
} else {
|
||||
mv.visitFieldInsn(
|
||||
GETSTATIC,
|
||||
Type.getInternalName(StackWalker.Option.class),
|
||||
"RETAIN_CLASS_REFERENCE",
|
||||
Type.getDescriptor(StackWalker.Option.class)
|
||||
);
|
||||
mv.visitMethodInsn(
|
||||
INVOKESTATIC,
|
||||
Type.getInternalName(StackWalker.class),
|
||||
"getInstance",
|
||||
Type.getMethodDescriptor(Type.getType(StackWalker.class), Type.getType(StackWalker.Option.class)),
|
||||
false
|
||||
);
|
||||
mv.visitMethodInsn(
|
||||
INVOKEVIRTUAL,
|
||||
Type.getInternalName(StackWalker.class),
|
||||
"getCallerClass",
|
||||
Type.getMethodDescriptor(Type.getType(Class.class)),
|
||||
false
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private void forwardIncomingArguments() {
|
||||
int localVarIndex = 0;
|
||||
if (instrumentedMethodIsStatic == false) {
|
||||
mv.visitVarInsn(Opcodes.ALOAD, localVarIndex++);
|
||||
}
|
||||
for (Type type : Type.getArgumentTypes(instrumentedMethodDescriptor)) {
|
||||
mv.visitVarInsn(type.getOpcode(Opcodes.ILOAD), localVarIndex);
|
||||
localVarIndex += type.getSize();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void invokeInstrumentationMethod() {
|
||||
mv.visitMethodInsn(
|
||||
INVOKEINTERFACE,
|
||||
Type.getInternalName(instrumentationMethod.getDeclaringClass()),
|
||||
instrumentationMethod.getName(),
|
||||
Type.getMethodDescriptor(instrumentationMethod),
|
||||
true
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// private static final Logger LOGGER = LogManager.getLogger(Instrumenter.class);
|
||||
|
||||
public record ClassFileInfo(String fileName, byte[] bytecodes) {}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
#
|
||||
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
# or more contributor license agreements. Licensed under the "Elastic License
|
||||
# 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
# Public License v 1"; you may not use this file except in compliance with, at
|
||||
# your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
# License v3.0 only", or the "Server Side Public License, v 1".
|
||||
#
|
||||
|
||||
org.elasticsearch.entitlement.instrumentation.impl.InstrumentationServiceImpl
|
|
@ -7,7 +7,13 @@
|
|||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
|
||||
|
||||
module org.elasticsearch.entitlement.agent {
|
||||
requires java.instrument;
|
||||
requires org.elasticsearch.entitlement.runtime;
|
||||
requires org.elasticsearch.base; // for @SuppressForbidden
|
||||
|
||||
exports org.elasticsearch.entitlement.instrumentation to org.elasticsearch.entitlement.agent.impl;
|
||||
|
||||
uses InstrumentationService;
|
||||
}
|
||||
|
|
|
@ -9,13 +9,53 @@
|
|||
|
||||
package org.elasticsearch.entitlement.agent;
|
||||
|
||||
import org.elasticsearch.entitlement.runtime.api.EntitlementChecks;
|
||||
import org.elasticsearch.core.SuppressForbidden;
|
||||
import org.elasticsearch.core.internal.provider.ProviderLocator;
|
||||
import org.elasticsearch.entitlement.instrumentation.InstrumentationService;
|
||||
import org.elasticsearch.entitlement.instrumentation.MethodKey;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.instrument.Instrumentation;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.jar.JarFile;
|
||||
|
||||
public class EntitlementAgent {
|
||||
|
||||
public static void premain(String agentArgs, Instrumentation inst) throws Exception {
|
||||
EntitlementChecks.setAgentBooted();
|
||||
// Add the bridge library (the one with the entitlement checking interface) to the bootstrap classpath.
|
||||
// We can't actually reference the classes here for real before this point because they won't resolve.
|
||||
var bridgeJarName = System.getProperty("es.entitlements.bridgeJar");
|
||||
if (bridgeJarName == null) {
|
||||
throw new IllegalArgumentException("System property es.entitlements.bridgeJar is required");
|
||||
}
|
||||
addJarToBootstrapClassLoader(inst, bridgeJarName);
|
||||
|
||||
Method targetMethod = System.class.getMethod("exit", int.class);
|
||||
Method instrumentationMethod = Class.forName("org.elasticsearch.entitlement.api.EntitlementChecks")
|
||||
.getMethod("checkSystemExit", Class.class, int.class);
|
||||
Map<MethodKey, Method> methodMap = Map.of(INSTRUMENTER_FACTORY.methodKeyForTarget(targetMethod), instrumentationMethod);
|
||||
|
||||
inst.addTransformer(new Transformer(INSTRUMENTER_FACTORY.newInstrumenter("", methodMap), Set.of(internalName(System.class))), true);
|
||||
inst.retransformClasses(System.class);
|
||||
}
|
||||
|
||||
@SuppressForbidden(reason = "The appendToBootstrapClassLoaderSearch method takes a JarFile")
|
||||
private static void addJarToBootstrapClassLoader(Instrumentation inst, String jarString) throws IOException {
|
||||
inst.appendToBootstrapClassLoaderSearch(new JarFile(jarString));
|
||||
}
|
||||
|
||||
private static String internalName(Class<?> c) {
|
||||
return c.getName().replace('.', '/');
|
||||
}
|
||||
|
||||
private static final InstrumentationService INSTRUMENTER_FACTORY = (new ProviderLocator<>(
|
||||
"entitlement-agent",
|
||||
InstrumentationService.class,
|
||||
"org.elasticsearch.entitlement.agent.impl",
|
||||
Set.of("org.objectweb.nonexistent.asm")
|
||||
)).get();
|
||||
|
||||
// private static final Logger LOGGER = LogManager.getLogger(EntitlementAgent.class);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.agent;
|
||||
|
||||
import org.elasticsearch.entitlement.instrumentation.Instrumenter;
|
||||
|
||||
import java.lang.instrument.ClassFileTransformer;
|
||||
import java.security.ProtectionDomain;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A {@link ClassFileTransformer} that applies an {@link Instrumenter} to the appropriate classes.
|
||||
*/
|
||||
public class Transformer implements ClassFileTransformer {
|
||||
private final Instrumenter instrumenter;
|
||||
private final Set<String> classesToTransform;
|
||||
|
||||
public Transformer(Instrumenter instrumenter, Set<String> classesToTransform) {
|
||||
this.instrumenter = instrumenter;
|
||||
this.classesToTransform = classesToTransform;
|
||||
// TODO: Should warn if any MethodKey doesn't match any methods
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] transform(
|
||||
ClassLoader loader,
|
||||
String className,
|
||||
Class<?> classBeingRedefined,
|
||||
ProtectionDomain protectionDomain,
|
||||
byte[] classfileBuffer
|
||||
) {
|
||||
if (classesToTransform.contains(className)) {
|
||||
// System.out.println("Transforming " + className);
|
||||
return instrumenter.instrumentClass(className, classfileBuffer);
|
||||
} else {
|
||||
// System.out.println("Not transforming " + className);
|
||||
return classfileBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
// private static final Logger LOGGER = LogManager.getLogger(Transformer.class);
|
||||
}
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.instrumentation;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* The SPI service entry point for instrumentation.
|
||||
*/
|
||||
public interface InstrumentationService {
|
||||
Instrumenter newInstrumenter(String classNameSuffix, Map<MethodKey, Method> instrumentationMethods);
|
||||
|
||||
/**
|
||||
* @return a {@link MethodKey} suitable for looking up the given {@code targetMethod} in the entitlements trampoline
|
||||
*/
|
||||
MethodKey methodKeyForTarget(Method targetMethod);
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.instrumentation;
|
||||
|
||||
public interface Instrumenter {
|
||||
byte[] instrumentClass(String className, byte[] classfileBuffer);
|
||||
}
|
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.instrumentation;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param className the "internal name" of the class: includes the package info, but with periods replaced by slashes
|
||||
*/
|
||||
public record MethodKey(String className, String methodName, List<String> parameterTypes, boolean isStatic) {}
|
|
@ -9,21 +9,40 @@
|
|||
|
||||
package org.elasticsearch.entitlement.agent;
|
||||
|
||||
import org.elasticsearch.entitlement.runtime.api.EntitlementChecks;
|
||||
import com.carrotsearch.randomizedtesting.annotations.SuppressForbidden;
|
||||
|
||||
import org.elasticsearch.entitlement.runtime.api.ElasticsearchEntitlementManager;
|
||||
import org.elasticsearch.entitlement.runtime.api.NotEntitledException;
|
||||
import org.elasticsearch.entitlement.runtime.internals.EntitlementInternals;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.ESTestCase.WithoutSecurityManager;
|
||||
import org.junit.After;
|
||||
|
||||
/**
|
||||
* This is an end-to-end test that runs with the javaagent installed.
|
||||
* It should exhaustively test every instrumented method to make sure it passes with the entitlement
|
||||
* and fails without it.
|
||||
* This is an end-to-end test of the agent and entitlement runtime.
|
||||
* It runs with the agent installed, and exhaustively tests every instrumented method
|
||||
* to make sure it works with the entitlement granted and throws without it.
|
||||
* The only exception is {@link System#exit}, where we can't that it works without
|
||||
* terminating the JVM.
|
||||
* See {@code build.gradle} for how we set the command line arguments for this test.
|
||||
*/
|
||||
@WithoutSecurityManager
|
||||
public class EntitlementAgentTests extends ESTestCase {
|
||||
|
||||
public void testAgentBooted() {
|
||||
assertTrue(EntitlementChecks.isAgentBooted());
|
||||
public static final ElasticsearchEntitlementManager ENTITLEMENT_MANAGER = ElasticsearchEntitlementManager.get();
|
||||
|
||||
@After
|
||||
public void resetEverything() {
|
||||
EntitlementInternals.reset();
|
||||
}
|
||||
|
||||
/**
|
||||
* We can't really check that this one passes because it will just exit the JVM.
|
||||
*/
|
||||
@SuppressForbidden("Specifically testing System.exit")
|
||||
public void testSystemExitNotEntitled() {
|
||||
ENTITLEMENT_MANAGER.activate();
|
||||
assertThrows(NotEntitledException.class, () -> System.exit(123));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
11
distribution/tools/entitlement-bridge/README.md
Normal file
11
distribution/tools/entitlement-bridge/README.md
Normal file
|
@ -0,0 +1,11 @@
|
|||
### Entitlement Bridge
|
||||
|
||||
This is the code called directly from instrumented methods.
|
||||
It's a minimal code stub that is loaded into the boot classloader by the entitlement agent
|
||||
so that it is callable from the class library methods instrumented by the agent.
|
||||
Its job is to forward the entitlement checks to the actual runtime library,
|
||||
which is loaded normally.
|
||||
|
||||
It is not responsible for injecting the bytecode instrumentation (that's the agent)
|
||||
nor for implementing the permission checks (that's the runtime library).
|
||||
|
18
distribution/tools/entitlement-bridge/build.gradle
Normal file
18
distribution/tools/entitlement-bridge/build.gradle
Normal file
|
@ -0,0 +1,18 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
apply plugin: 'elasticsearch.build'
|
||||
|
||||
dependencies {
|
||||
}
|
||||
|
||||
tasks.named('forbiddenApisMain').configure {
|
||||
replaceSignatureFiles 'jdk-signatures'
|
||||
}
|
||||
|
|
@ -7,8 +7,8 @@
|
|||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
module org.elasticsearch.entitlement.runtime {
|
||||
requires org.elasticsearch.base;
|
||||
module org.elasticsearch.entitlement.bridge {
|
||||
uses org.elasticsearch.entitlement.api.EntitlementChecks;
|
||||
|
||||
exports org.elasticsearch.entitlement.runtime.api to org.elasticsearch.entitlement.agent;
|
||||
exports org.elasticsearch.entitlement.api;
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.api;
|
||||
|
||||
public interface EntitlementChecks {
|
||||
void checkSystemExit(Class<?> callerClass, int status);
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.api;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ServiceLoader;
|
||||
|
||||
public class EntitlementProvider {
|
||||
private static final EntitlementChecks CHECKS = lookupEntitlementChecksImplementation();
|
||||
|
||||
public static EntitlementChecks checks() {
|
||||
return CHECKS;
|
||||
}
|
||||
|
||||
private static EntitlementChecks lookupEntitlementChecksImplementation() {
|
||||
List<EntitlementChecks> candidates = ServiceLoader.load(EntitlementChecks.class).stream().map(ServiceLoader.Provider::get).toList();
|
||||
if (candidates.isEmpty()) {
|
||||
throw new IllegalStateException("No EntitlementChecks service");
|
||||
} else if (candidates.size() >= 2) {
|
||||
throw new IllegalStateException(
|
||||
"Multiple EntitlementChecks services: " + candidates.stream().map(e -> e.getClass().getSimpleName()).toList()
|
||||
);
|
||||
} else {
|
||||
return candidates.get(0);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -5,10 +5,3 @@ This module implements mechanisms to grant and check permissions under the _enti
|
|||
The entitlements system provides an alternative to the legacy `SecurityManager` system, which is deprecated for removal.
|
||||
The `entitlement-agent` tool instruments sensitive class library methods with calls to this module, in order to enforce the controls.
|
||||
|
||||
This module is responsible for:
|
||||
- Defining which class library methods are sensitive
|
||||
- Defining what permissions should be checked for each sensitive method
|
||||
- Implementing the permission checks
|
||||
- Offering a "grant" API to grant permissions
|
||||
|
||||
It is not responsible for anything to do with bytecode instrumentation; that responsibility lies with `entitlement-agent`.
|
|
@ -10,7 +10,9 @@ apply plugin: 'elasticsearch.build'
|
|||
apply plugin: 'elasticsearch.publish'
|
||||
|
||||
dependencies {
|
||||
compileOnly project(':libs:elasticsearch-core')
|
||||
compileOnly project(':libs:elasticsearch-core') // For @SuppressForbidden
|
||||
compileOnly project(':server') // To access the main server module for special permission checks
|
||||
compileOnly project(':distribution:tools:entitlement-bridge')
|
||||
|
||||
testImplementation project(":test:framework")
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
module org.elasticsearch.entitlement.runtime {
|
||||
requires org.elasticsearch.entitlement.bridge;
|
||||
requires org.elasticsearch.server;
|
||||
|
||||
exports org.elasticsearch.entitlement.runtime.api;
|
||||
|
||||
provides org.elasticsearch.entitlement.api.EntitlementChecks
|
||||
with
|
||||
org.elasticsearch.entitlement.runtime.api.ElasticsearchEntitlementManager;
|
||||
}
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.runtime.api;
|
||||
|
||||
import org.elasticsearch.entitlement.api.EntitlementChecks;
|
||||
import org.elasticsearch.entitlement.api.EntitlementProvider;
|
||||
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.elasticsearch.entitlement.runtime.internals.EntitlementInternals.isActive;
|
||||
|
||||
/**
|
||||
* Implementation of the {@link EntitlementChecks} interface, providing additional
|
||||
* API methods for managing the checks.
|
||||
* The trampoline module loads this object via SPI.
|
||||
*/
|
||||
public class ElasticsearchEntitlementManager implements EntitlementChecks {
|
||||
/**
|
||||
* @return the same instance of {@link ElasticsearchEntitlementManager} returned by {@link EntitlementProvider}.
|
||||
*/
|
||||
public static ElasticsearchEntitlementManager get() {
|
||||
return (ElasticsearchEntitlementManager) EntitlementProvider.checks();
|
||||
}
|
||||
|
||||
/**
|
||||
* Causes entitlements to be enforced.
|
||||
*/
|
||||
public void activate() {
|
||||
isActive = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkSystemExit(Class<?> callerClass, int status) {
|
||||
var requestingModule = requestingModule(callerClass);
|
||||
if (isTriviallyAllowed(requestingModule)) {
|
||||
// System.out.println(" - Trivially allowed");
|
||||
return;
|
||||
}
|
||||
// Hard-forbidden until we develop the permission granting scheme
|
||||
throw new NotEntitledException("Missing entitlement for " + requestingModule);
|
||||
}
|
||||
|
||||
private static Module requestingModule(Class<?> callerClass) {
|
||||
if (callerClass != null) {
|
||||
Module callerModule = callerClass.getModule();
|
||||
if (callerModule.getLayer() != ModuleLayer.boot()) {
|
||||
// fast path
|
||||
return callerModule;
|
||||
}
|
||||
}
|
||||
int framesToSkip = 1 // getCallingClass (this method)
|
||||
+ 1 // the checkXxx method
|
||||
+ 1 // the runtime config method
|
||||
+ 1 // the instrumented method
|
||||
;
|
||||
Optional<Module> module = StackWalker.getInstance(StackWalker.Option.RETAIN_CLASS_REFERENCE)
|
||||
.walk(
|
||||
s -> s.skip(framesToSkip)
|
||||
.map(f -> f.getDeclaringClass().getModule())
|
||||
.filter(m -> m.getLayer() != ModuleLayer.boot())
|
||||
.findFirst()
|
||||
);
|
||||
return module.orElse(null);
|
||||
}
|
||||
|
||||
private static boolean isTriviallyAllowed(Module requestingModule) {
|
||||
return isActive == false || (requestingModule == null) || requestingModule == System.class.getModule();
|
||||
}
|
||||
|
||||
}
|
|
@ -9,14 +9,12 @@
|
|||
|
||||
package org.elasticsearch.entitlement.runtime.api;
|
||||
|
||||
public class EntitlementChecks {
|
||||
static boolean isAgentBooted = false;
|
||||
|
||||
public static void setAgentBooted() {
|
||||
isAgentBooted = true;
|
||||
public class NotEntitledException extends RuntimeException {
|
||||
public NotEntitledException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public static boolean isAgentBooted() {
|
||||
return isAgentBooted;
|
||||
public NotEntitledException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.entitlement.runtime.internals;
|
||||
|
||||
/**
|
||||
* Don't export this from the module. Just don't.
|
||||
*/
|
||||
public class EntitlementInternals {
|
||||
/**
|
||||
* When false, entitlement rules are not enforced; all operations are allowed.
|
||||
*/
|
||||
public static volatile boolean isActive = false;
|
||||
|
||||
public static void reset() {
|
||||
isActive = false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
#
|
||||
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
# or more contributor license agreements. Licensed under the "Elastic License
|
||||
# 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
# Public License v 1"; you may not use this file except in compliance with, at
|
||||
# your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
# License v3.0 only", or the "Server Side Public License, v 1".
|
||||
#
|
||||
|
||||
org.elasticsearch.entitlement.runtime.api.ElasticsearchEntitlementManager
|
5
docs/changelog/111336.yaml
Normal file
5
docs/changelog/111336.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 111336
|
||||
summary: Use the same chunking configurations for models in the Elasticsearch service
|
||||
area: Machine Learning
|
||||
type: enhancement
|
||||
issues: []
|
5
docs/changelog/112567.yaml
Normal file
5
docs/changelog/112567.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 112567
|
||||
summary: Track shard snapshot progress during node shutdown
|
||||
area: Snapshot/Restore
|
||||
type: enhancement
|
||||
issues: []
|
5
docs/changelog/112933.yaml
Normal file
5
docs/changelog/112933.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 112933
|
||||
summary: "Allow incubating Panama Vector in simdvec, and add vectorized `ipByteBin`"
|
||||
area: Search
|
||||
type: enhancement
|
||||
issues: []
|
5
docs/changelog/113563.yaml
Normal file
5
docs/changelog/113563.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 113563
|
||||
summary: Use ELSER By Default For Semantic Text
|
||||
area: Mapping
|
||||
type: enhancement
|
||||
issues: []
|
5
docs/changelog/113812.yaml
Normal file
5
docs/changelog/113812.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 113812
|
||||
summary: Add Streaming Inference spec
|
||||
area: Machine Learning
|
||||
type: enhancement
|
||||
issues: []
|
6
docs/changelog/113897.yaml
Normal file
6
docs/changelog/113897.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
pr: 113897
|
||||
summary: "Add chunking settings configuration to `CohereService,` `AmazonBedrockService,`\
|
||||
\ and `AzureOpenAiService`"
|
||||
area: Machine Learning
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -1,5 +0,0 @@
|
|||
pr: 113900
|
||||
summary: Fix BWC for file-settings based role mappings
|
||||
area: Authentication
|
||||
type: bug
|
||||
issues: []
|
13
docs/changelog/113967.yaml
Normal file
13
docs/changelog/113967.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
pr: 113967
|
||||
summary: "ESQL: Entirely remove META FUNCTIONS"
|
||||
area: ES|QL
|
||||
type: breaking
|
||||
issues: []
|
||||
breaking:
|
||||
title: "ESQL: Entirely remove META FUNCTIONS"
|
||||
area: ES|QL
|
||||
details: |
|
||||
Removes an undocumented syntax from ESQL: META FUNCTION. This was never
|
||||
reliable or really useful. Consult the documentation instead.
|
||||
impact: "Removes an undocumented syntax from ESQL: META FUNCTION"
|
||||
notable: false
|
5
docs/changelog/114002.yaml
Normal file
5
docs/changelog/114002.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 114002
|
||||
summary: Add a `mustache.max_output_size_bytes` setting to limit the length of results from mustache scripts
|
||||
area: Infra/Scripting
|
||||
type: enhancement
|
||||
issues: []
|
5
docs/changelog/114080.yaml
Normal file
5
docs/changelog/114080.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 114080
|
||||
summary: Stream Cohere Completion
|
||||
area: Machine Learning
|
||||
type: enhancement
|
||||
issues: []
|
5
docs/changelog/114128.yaml
Normal file
5
docs/changelog/114128.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 114128
|
||||
summary: Adding `index_template_substitutions` to the simulate ingest API
|
||||
area: Ingest Node
|
||||
type: enhancement
|
||||
issues: []
|
6
docs/changelog/114157.yaml
Normal file
6
docs/changelog/114157.yaml
Normal file
|
@ -0,0 +1,6 @@
|
|||
pr: 114157
|
||||
summary: Add a `terminate` ingest processor
|
||||
area: Ingest Node
|
||||
type: feature
|
||||
issues:
|
||||
- 110218
|
5
docs/changelog/114177.yaml
Normal file
5
docs/changelog/114177.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 114177
|
||||
summary: "Make `randomInstantBetween` always return value in range [minInstant, `maxInstant]`"
|
||||
area: Infra/Metrics
|
||||
type: bug
|
||||
issues: []
|
17
docs/changelog/114231.yaml
Normal file
17
docs/changelog/114231.yaml
Normal file
|
@ -0,0 +1,17 @@
|
|||
pr: 114231
|
||||
summary: Remove cluster state from `/_cluster/reroute` response
|
||||
area: Allocation
|
||||
type: breaking
|
||||
issues:
|
||||
- 88978
|
||||
breaking:
|
||||
title: Remove cluster state from `/_cluster/reroute` response
|
||||
area: REST API
|
||||
details: >-
|
||||
The `POST /_cluster/reroute` API no longer returns the cluster state in its
|
||||
response. The `?metric` query parameter to this API now has no effect and
|
||||
its use will be forbidden in a future version.
|
||||
impact: >-
|
||||
Cease usage of the `?metric` query parameter when calling the
|
||||
`POST /_cluster/reroute` API.
|
||||
notable: false
|
5
docs/changelog/114264.yaml
Normal file
5
docs/changelog/114264.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 114264
|
||||
summary: "Fix analyzed wildcard query in simple_query_string when disjunctions is empty"
|
||||
area: Search
|
||||
type: bug
|
||||
issues: [114185]
|
5
docs/changelog/114337.yaml
Normal file
5
docs/changelog/114337.yaml
Normal file
|
@ -0,0 +1,5 @@
|
|||
pr: 114337
|
||||
summary: "Enables cluster state role mapper, to include ECK operator-defined role mappings in role resolution"
|
||||
area: Authentication
|
||||
type: bug
|
||||
issues: []
|
|
@ -10,7 +10,7 @@ Changes the allocation of shards in a cluster.
|
|||
[[cluster-reroute-api-request]]
|
||||
==== {api-request-title}
|
||||
|
||||
`POST /_cluster/reroute?metric=none`
|
||||
`POST /_cluster/reroute`
|
||||
|
||||
[[cluster-reroute-api-prereqs]]
|
||||
==== {api-prereq-title}
|
||||
|
@ -193,7 +193,7 @@ This is a short example of a simple reroute API call:
|
|||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
POST /_cluster/reroute?metric=none
|
||||
POST /_cluster/reroute
|
||||
{
|
||||
"commands": [
|
||||
{
|
||||
|
|
|
@ -95,7 +95,7 @@ Changing allocation id V8QXk-QXSZinZMT-NvEq4w to tjm9Ve6uTBewVFAlfUMWjA
|
|||
|
||||
You should run the following command to allocate this shard:
|
||||
|
||||
POST /_cluster/reroute?metric=none
|
||||
POST /_cluster/reroute
|
||||
{
|
||||
"commands" : [
|
||||
{
|
||||
|
|
|
@ -63,18 +63,22 @@ To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s
|
|||
6. Click on the "Create" button to create the app registration.
|
||||
7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later.
|
||||
8. Navigate to the "Scopes" section and click on the "Add Scopes" button.
|
||||
9. The following scopes need to be added to the app.
|
||||
9. The following granular scopes need to be added to the app.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
user:read:admin
|
||||
meeting:read:admin
|
||||
chat_channel:read:admin
|
||||
recording:read:admin
|
||||
chat_message:read:admin
|
||||
report:read:admin
|
||||
user:read:list_users:admin
|
||||
meeting:read:list_meetings:admin
|
||||
meeting:read:list_past_participants:admin
|
||||
cloud_recording:read:list_user_recordings:admin
|
||||
team_chat:read:list_user_channels:admin
|
||||
team_chat:read:list_user_messages:admin
|
||||
----
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch.
|
||||
====
|
||||
+
|
||||
10. Click on the "Done" button to add the selected scopes to your app.
|
||||
11. Navigate to the "Activation" section and input the necessary information to activate the app.
|
||||
|
||||
|
@ -220,18 +224,22 @@ To connect to Zoom you need to https://developers.zoom.us/docs/internal-apps/s2s
|
|||
6. Click on the "Create" button to create the app registration.
|
||||
7. After the registration is complete, you will be redirected to the app's overview page. Take note of the "App Credentials" value, as you'll need it later.
|
||||
8. Navigate to the "Scopes" section and click on the "Add Scopes" button.
|
||||
9. The following scopes need to be added to the app.
|
||||
9. The following granular scopes need to be added to the app.
|
||||
+
|
||||
[source,bash]
|
||||
----
|
||||
user:read:admin
|
||||
meeting:read:admin
|
||||
chat_channel:read:admin
|
||||
recording:read:admin
|
||||
chat_message:read:admin
|
||||
report:read:admin
|
||||
user:read:list_users:admin
|
||||
meeting:read:list_meetings:admin
|
||||
meeting:read:list_past_participants:admin
|
||||
cloud_recording:read:list_user_recordings:admin
|
||||
team_chat:read:list_user_channels:admin
|
||||
team_chat:read:list_user_messages:admin
|
||||
----
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
The connector requires a minimum scope of `user:read:list_users:admin` to ingest data into Elasticsearch.
|
||||
====
|
||||
+
|
||||
10. Click on the "Done" button to add the selected scopes to your app.
|
||||
11. Navigate to the "Activation" section and input the necessary information to activate the app.
|
||||
|
||||
|
|
|
@ -2,4 +2,10 @@
|
|||
|
||||
*Description*
|
||||
|
||||
Converts a multivalued expression into a single valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>. The order that <<esql-multivalued-fields, multivalued fields>> are read from underlying storage is not guaranteed. It is *frequently* ascending, but don't rely on that. If you need the minimum value use <<esql-mv_min>> instead of `MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a performance benefit to `MV_FIRST`.
|
||||
Converts a multivalued expression into a single valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>.
|
||||
|
||||
The order that <<esql-multivalued-fields, multivalued fields>> are read from
|
||||
underlying storage is not guaranteed. It is *frequently* ascending, but don't
|
||||
rely on that. If you need the minimum value use <<esql-mv_min>> instead of
|
||||
`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a
|
||||
performance benefit to `MV_FIRST`.
|
||||
|
|
|
@ -2,4 +2,10 @@
|
|||
|
||||
*Description*
|
||||
|
||||
Converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>. The order that <<esql-multivalued-fields, multivalued fields>> are read from underlying storage is not guaranteed. It is *frequently* ascending, but don't rely on that. If you need the maximum value use <<esql-mv_max>> instead of `MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a performance benefit to `MV_LAST`.
|
||||
Converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>>.
|
||||
|
||||
The order that <<esql-multivalued-fields, multivalued fields>> are read from
|
||||
underlying storage is not guaranteed. It is *frequently* ascending, but don't
|
||||
rely on that. If you need the maximum value use <<esql-mv_max>> instead of
|
||||
`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a
|
||||
performance benefit to `MV_LAST`.
|
||||
|
|
|
@ -2,4 +2,8 @@
|
|||
|
||||
*Description*
|
||||
|
||||
Returns a subset of the multivalued field using the start and end index values.
|
||||
Returns a subset of the multivalued field using the start and end index values. This is most useful when reading from a function that emits multivalued columns in a known order like <<esql-split>> or <<esql-mv_sort>>.
|
||||
|
||||
The order that <<esql-multivalued-fields, multivalued fields>> are read from
|
||||
underlying storage is not guaranteed. It is *frequently* ascending, but don't
|
||||
rely on that.
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
|
||||
"type" : "eval",
|
||||
"name" : "mv_first",
|
||||
"description" : "Converts a multivalued expression into a single valued column containing the\nfirst value. This is most useful when reading from a function that emits\nmultivalued columns in a known order like <<esql-split>>.\n\nThe order that <<esql-multivalued-fields, multivalued fields>> are read from\nunderlying storage is not guaranteed. It is *frequently* ascending, but don't\nrely on that. If you need the minimum value use <<esql-mv_min>> instead of\n`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a\nperformance benefit to `MV_FIRST`.",
|
||||
"description" : "Converts a multivalued expression into a single valued column containing the\nfirst value. This is most useful when reading from a function that emits\nmultivalued columns in a known order like <<esql-split>>.",
|
||||
"signatures" : [
|
||||
{
|
||||
"params" : [
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
|
||||
"type" : "eval",
|
||||
"name" : "mv_last",
|
||||
"description" : "Converts a multivalue expression into a single valued column containing the last\nvalue. This is most useful when reading from a function that emits multivalued\ncolumns in a known order like <<esql-split>>.\n\nThe order that <<esql-multivalued-fields, multivalued fields>> are read from\nunderlying storage is not guaranteed. It is *frequently* ascending, but don't\nrely on that. If you need the maximum value use <<esql-mv_max>> instead of\n`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a\nperformance benefit to `MV_LAST`.",
|
||||
"description" : "Converts a multivalue expression into a single valued column containing the last\nvalue. This is most useful when reading from a function that emits multivalued\ncolumns in a known order like <<esql-split>>.",
|
||||
"signatures" : [
|
||||
{
|
||||
"params" : [
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
"comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.",
|
||||
"type" : "eval",
|
||||
"name" : "mv_slice",
|
||||
"description" : "Returns a subset of the multivalued field using the start and end index values.",
|
||||
"description" : "Returns a subset of the multivalued field using the start and end index values.\nThis is most useful when reading from a function that emits multivalued columns\nin a known order like <<esql-split>> or <<esql-mv_sort>>.",
|
||||
"signatures" : [
|
||||
{
|
||||
"params" : [
|
||||
|
|
|
@ -7,12 +7,6 @@ Converts a multivalued expression into a single valued column containing the
|
|||
first value. This is most useful when reading from a function that emits
|
||||
multivalued columns in a known order like <<esql-split>>.
|
||||
|
||||
The order that <<esql-multivalued-fields, multivalued fields>> are read from
|
||||
underlying storage is not guaranteed. It is *frequently* ascending, but don't
|
||||
rely on that. If you need the minimum value use <<esql-mv_min>> instead of
|
||||
`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a
|
||||
performance benefit to `MV_FIRST`.
|
||||
|
||||
```
|
||||
ROW a="foo;bar;baz"
|
||||
| EVAL first_a = MV_FIRST(SPLIT(a, ";"))
|
||||
|
|
|
@ -7,12 +7,6 @@ Converts a multivalue expression into a single valued column containing the last
|
|||
value. This is most useful when reading from a function that emits multivalued
|
||||
columns in a known order like <<esql-split>>.
|
||||
|
||||
The order that <<esql-multivalued-fields, multivalued fields>> are read from
|
||||
underlying storage is not guaranteed. It is *frequently* ascending, but don't
|
||||
rely on that. If you need the maximum value use <<esql-mv_max>> instead of
|
||||
`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a
|
||||
performance benefit to `MV_LAST`.
|
||||
|
||||
```
|
||||
ROW a="foo;bar;baz"
|
||||
| EVAL last_a = MV_LAST(SPLIT(a, ";"))
|
||||
|
|
|
@ -4,6 +4,8 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ
|
|||
|
||||
### MV_SLICE
|
||||
Returns a subset of the multivalued field using the start and end index values.
|
||||
This is most useful when reading from a function that emits multivalued columns
|
||||
in a known order like <<esql-split>> or <<esql-mv_sort>>.
|
||||
|
||||
```
|
||||
row a = [1, 2, 2, 3]
|
||||
|
|
|
@ -177,6 +177,37 @@ POST /_query
|
|||
----
|
||||
// TESTRESPONSE[s/"took": 28/"took": "$body.took"/]
|
||||
|
||||
[discrete]
|
||||
[[esql-multivalued-nulls]]
|
||||
==== `null` in a list
|
||||
|
||||
`null` values in a list are not preserved at the storage layer:
|
||||
|
||||
[source,console,id=esql-multivalued-fields-multivalued-nulls]
|
||||
----
|
||||
POST /mv/_doc?refresh
|
||||
{ "a": [2, null, 1] }
|
||||
|
||||
POST /_query
|
||||
{
|
||||
"query": "FROM mv | LIMIT 1"
|
||||
}
|
||||
----
|
||||
|
||||
[source,console-result]
|
||||
----
|
||||
{
|
||||
"took": 28,
|
||||
"columns": [
|
||||
{ "name": "a", "type": "long"},
|
||||
],
|
||||
"values": [
|
||||
[[1, 2]],
|
||||
]
|
||||
}
|
||||
----
|
||||
// TESTRESPONSE[s/"took": 28/"took": "$body.took"/]
|
||||
|
||||
[discrete]
|
||||
[[esql-multivalued-fields-functions]]
|
||||
==== Functions
|
||||
|
|
|
@ -56,7 +56,7 @@ documents can't be backed up incrementally.
|
|||
===== Blocks during a force merge
|
||||
|
||||
Calls to this API block until the merge is complete (unless request contains
|
||||
wait_for_completion=false, which is default true). If the client connection
|
||||
`wait_for_completion=false`, which is default `true`). If the client connection
|
||||
is lost before completion then the force merge process will continue in the
|
||||
background. Any new requests to force merge the same indices will also block
|
||||
until the ongoing force merge is complete.
|
||||
|
|
|
@ -85,6 +85,8 @@ include::{docdir}/rest-api/common-parms.asciidoc[tag=master-timeout]
|
|||
[[put-index-template-api-request-body]]
|
||||
==== {api-request-body-title}
|
||||
|
||||
// tag::request-body[]
|
||||
|
||||
`composed_of`::
|
||||
(Optional, array of strings)
|
||||
An ordered list of component template names. Component templates are merged in the order
|
||||
|
@ -102,7 +104,7 @@ See <<create-index-template,create an index template>>.
|
|||
+
|
||||
.Properties of `data_stream`
|
||||
[%collapsible%open]
|
||||
====
|
||||
=====
|
||||
`allow_custom_routing`::
|
||||
(Optional, Boolean) If `true`, the data stream supports
|
||||
<<mapping-routing-field,custom routing>>. Defaults to `false`.
|
||||
|
@ -117,7 +119,7 @@ See <<create-index-template,create an index template>>.
|
|||
+
|
||||
If `time_series`, each backing index has an `index.mode` index setting of
|
||||
`time_series`.
|
||||
====
|
||||
=====
|
||||
|
||||
`index_patterns`::
|
||||
(Required, array of strings)
|
||||
|
@ -146,7 +148,7 @@ Template to be applied. It may optionally include an `aliases`, `mappings`, or
|
|||
+
|
||||
.Properties of `template`
|
||||
[%collapsible%open]
|
||||
====
|
||||
=====
|
||||
`aliases`::
|
||||
(Optional, object of objects) Aliases to add.
|
||||
+
|
||||
|
@ -161,7 +163,7 @@ include::{es-ref-dir}/indices/create-index.asciidoc[tag=aliases-props]
|
|||
include::{docdir}/rest-api/common-parms.asciidoc[tag=mappings]
|
||||
|
||||
include::{docdir}/rest-api/common-parms.asciidoc[tag=settings]
|
||||
====
|
||||
=====
|
||||
|
||||
`version`::
|
||||
(Optional, integer)
|
||||
|
@ -174,6 +176,7 @@ Marks this index template as deprecated.
|
|||
When creating or updating a non-deprecated index template that uses deprecated components,
|
||||
{es} will emit a deprecation warning.
|
||||
// end::index-template-api-body[]
|
||||
// end::request-body[]
|
||||
|
||||
[[put-index-template-api-example]]
|
||||
==== {api-examples-title}
|
||||
|
|
|
@ -102,6 +102,12 @@ POST /_ingest/_simulate
|
|||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"index_template_substitutions": { <3>
|
||||
"my-index-template": {
|
||||
"index_patterns": ["my-index-*"],
|
||||
"composed_of": ["component_template_1", "component_template_2"]
|
||||
}
|
||||
}
|
||||
}
|
||||
----
|
||||
|
@ -109,6 +115,8 @@ POST /_ingest/_simulate
|
|||
<1> This replaces the existing `my-pipeline` pipeline with the contents given here for the duration of this request.
|
||||
<2> This replaces the existing `my-component-template` component template with the contents given here for the duration of this request.
|
||||
These templates can be used to change the pipeline(s) used, or to modify the mapping that will be used to validate the result.
|
||||
<3> This replaces the existing `my-index-template` index template with the contents given here for the duration of this request.
|
||||
These templates can be used to change the pipeline(s) used, or to modify the mapping that will be used to validate the result.
|
||||
|
||||
[[simulate-ingest-api-request]]
|
||||
==== {api-request-title}
|
||||
|
@ -225,6 +233,19 @@ include::{es-ref-dir}/indices/put-component-template.asciidoc[tag=template]
|
|||
|
||||
====
|
||||
|
||||
`index_template_substitutions`::
|
||||
(Optional, map of strings to objects)
|
||||
Map of index template names to substitute index template definition objects.
|
||||
+
|
||||
.Properties of index template definition objects
|
||||
[%collapsible%open]
|
||||
|
||||
====
|
||||
|
||||
include::{es-ref-dir}/indices/put-index-template.asciidoc[tag=request-body]
|
||||
|
||||
====
|
||||
|
||||
[[simulate-ingest-api-example]]
|
||||
==== {api-examples-title}
|
||||
|
||||
|
|
30
docs/reference/ingest/processors/terminate.asciidoc
Normal file
30
docs/reference/ingest/processors/terminate.asciidoc
Normal file
|
@ -0,0 +1,30 @@
|
|||
[[terminate-processor]]
|
||||
=== Terminate processor
|
||||
|
||||
++++
|
||||
<titleabbrev>Terminate</titleabbrev>
|
||||
++++
|
||||
|
||||
Terminates the current ingest pipeline, causing no further processors to be run.
|
||||
This will normally be executed conditionally, using the `if` option.
|
||||
|
||||
If this pipeline is being called from another pipeline, the calling pipeline is *not* terminated.
|
||||
|
||||
[[terminate-options]]
|
||||
.Terminate Options
|
||||
[options="header"]
|
||||
|======
|
||||
| Name | Required | Default | Description
|
||||
include::common-options.asciidoc[]
|
||||
|======
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"description" : "terminates the current pipeline if the error field is present",
|
||||
"terminate": {
|
||||
"if": "ctx.error != null"
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// NOTCONSOLE
|
|
@ -204,7 +204,7 @@ For general content, you have the following options for adding data to {es} indi
|
|||
If you're building a website or app, then you can call Elasticsearch APIs using an https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] in the programming language of your choice. If you use the Python client, then check out the `elasticsearch-labs` repo for various https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search/python-examples[example notebooks].
|
||||
* {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[File upload]: Use the {kib} file uploader to index single files for one-off testing and exploration. The GUI guides you through setting up your index and field mappings.
|
||||
* https://github.com/elastic/crawler[Web crawler]: Extract and index web page content into {es} documents.
|
||||
* {enterprise-search-ref}/connectors.html[Connectors]: Sync data from various third-party data sources to create searchable, read-only replicas in {es}.
|
||||
* <<es-connectors,Connectors>>: Sync data from various third-party data sources to create searchable, read-only replicas in {es}.
|
||||
|
||||
[discrete]
|
||||
[[es-ingestion-overview-timestamped]]
|
||||
|
@ -492,4 +492,4 @@ and restrictions. You can review the following guides to learn how to tune your
|
|||
* <<use-elasticsearch-for-time-series-data,Tune for time series data>>
|
||||
|
||||
Many {es} options come with different performance considerations and trade-offs. The best way to determine the
|
||||
optimal configuration for your use case is through https://www.elastic.co/elasticon/conf/2016/sf/quantitative-cluster-sizing[testing with your own data and queries].
|
||||
optimal configuration for your use case is through https://www.elastic.co/elasticon/conf/2016/sf/quantitative-cluster-sizing[testing with your own data and queries].
|
||||
|
|
|
@ -821,8 +821,6 @@ address.
|
|||
[[lookup-runtime-fields]]
|
||||
==== Retrieve fields from related indices
|
||||
|
||||
experimental[]
|
||||
|
||||
The <<search-fields,`fields`>> parameter on the `_search` API can also be used to retrieve fields from
|
||||
the related indices via runtime fields with a type of `lookup`.
|
||||
|
||||
|
|
|
@ -225,6 +225,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
|
|||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
|
||||
=======
|
||||
`deberta_v2`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
|
||||
+
|
||||
.Properties of deberta_v2
|
||||
[%collapsible%open]
|
||||
=======
|
||||
`truncate`::::
|
||||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
|
||||
=======
|
||||
`roberta`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
|
||||
|
@ -301,6 +312,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
|
|||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
|
||||
=======
|
||||
`deberta_v2`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
|
||||
+
|
||||
.Properties of deberta_v2
|
||||
[%collapsible%open]
|
||||
=======
|
||||
`truncate`::::
|
||||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
|
||||
=======
|
||||
`roberta`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
|
||||
|
@ -397,6 +419,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
|
|||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
|
||||
=======
|
||||
`deberta_v2`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
|
||||
+
|
||||
.Properties of deberta_v2
|
||||
[%collapsible%open]
|
||||
=======
|
||||
`span`::::
|
||||
(Optional, integer)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
|
||||
|
||||
`truncate`::::
|
||||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
|
||||
=======
|
||||
`roberta`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
|
||||
|
@ -517,6 +554,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
|
|||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
|
||||
=======
|
||||
`deberta_v2`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
|
||||
+
|
||||
.Properties of deberta_v2
|
||||
[%collapsible%open]
|
||||
=======
|
||||
`span`::::
|
||||
(Optional, integer)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
|
||||
|
||||
`truncate`::::
|
||||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
|
||||
=======
|
||||
`roberta`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
|
||||
|
@ -608,6 +660,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
|
|||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
|
||||
=======
|
||||
`deberta_v2`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
|
||||
+
|
||||
.Properties of deberta_v2
|
||||
[%collapsible%open]
|
||||
=======
|
||||
`truncate`::::
|
||||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
|
||||
=======
|
||||
`roberta`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
|
||||
|
@ -687,6 +750,21 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
|
|||
(Optional, integer)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
|
||||
|
||||
`with_special_tokens`::::
|
||||
(Optional, boolean)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
|
||||
=======
|
||||
`deberta_v2`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
|
||||
+
|
||||
.Properties of deberta_v2
|
||||
[%collapsible%open]
|
||||
=======
|
||||
`span`::::
|
||||
(Optional, integer)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
|
||||
|
||||
`with_special_tokens`::::
|
||||
(Optional, boolean)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
|
||||
|
@ -790,6 +868,17 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
|
|||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
|
||||
=======
|
||||
`deberta_v2`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
|
||||
+
|
||||
.Properties of deberta_v2
|
||||
[%collapsible%open]
|
||||
=======
|
||||
`truncate`::::
|
||||
(Optional, string)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
|
||||
=======
|
||||
`roberta`::::
|
||||
(Optional, object)
|
||||
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
[[re-ranking-overview]]
|
||||
= Re-ranking
|
||||
|
||||
Many search systems are built on two-stage retrieval pipelines.
|
||||
Many search systems are built on multi-stage retrieval pipelines.
|
||||
|
||||
The first stage uses cheap, fast algorithms to find a broad set of possible matches.
|
||||
Earlier stages use cheap, fast algorithms to find a broad set of possible matches.
|
||||
|
||||
The second stage uses a more powerful model, often machine learning-based, to reorder the documents.
|
||||
This second step is called re-ranking.
|
||||
Later stages use more powerful models, often machine learning-based, to reorder the documents.
|
||||
This step is called re-ranking.
|
||||
Because the resource-intensive model is only applied to the smaller set of pre-filtered results, this approach returns more relevant results while still optimizing for search performance and computational costs.
|
||||
|
||||
{es} supports various ranking and re-ranking techniques to optimize search relevance and performance.
|
||||
|
@ -18,7 +18,7 @@ Because the resource-intensive model is only applied to the smaller set of pre-f
|
|||
|
||||
[float]
|
||||
[[re-ranking-first-stage-pipeline]]
|
||||
=== First stage: initial retrieval
|
||||
=== Initial retrieval
|
||||
|
||||
[float]
|
||||
[[re-ranking-ranking-overview-bm25]]
|
||||
|
@ -45,7 +45,7 @@ Hybrid search techniques combine results from full-text and vector search pipeli
|
|||
|
||||
[float]
|
||||
[[re-ranking-overview-second-stage]]
|
||||
=== Second stage: Re-ranking
|
||||
=== Re-ranking
|
||||
|
||||
When using the following advanced re-ranking pipelines, first-stage retrieval mechanisms effectively generate a set of candidates.
|
||||
These candidates are funneled into the re-ranker to perform more computationally expensive re-ranking tasks.
|
||||
|
@ -67,4 +67,4 @@ Learning To Rank involves training a machine learning model to build a ranking f
|
|||
LTR is best suited for when you have ample training data and need highly customized relevance tuning.
|
||||
|
||||
include::semantic-reranking.asciidoc[]
|
||||
include::learning-to-rank.asciidoc[]
|
||||
include::learning-to-rank.asciidoc[]
|
||||
|
|
|
@ -1298,10 +1298,11 @@ tag::wait_for_active_shards[]
|
|||
`wait_for_active_shards`::
|
||||
+
|
||||
--
|
||||
(Optional, string) The number of shard copies that must be active before
|
||||
proceeding with the operation. Set to `all` or any positive integer up
|
||||
to the total number of shards in the index (`number_of_replicas+1`).
|
||||
Default: 1, the primary shard.
|
||||
(Optional, string) The number of copies of each shard that must be active
|
||||
before proceeding with the operation. Set to `all` or any non-negative integer
|
||||
up to the total number of copies of each shard in the index
|
||||
(`number_of_replicas+1`). Defaults to `1`, meaning to wait just for each
|
||||
primary shard to be active.
|
||||
|
||||
See <<index-wait-for-active-shards>>.
|
||||
--
|
||||
|
|
|
@ -239,8 +239,7 @@ GET /_xpack/usage
|
|||
"keep" : 0,
|
||||
"enrich" : 0,
|
||||
"from" : 0,
|
||||
"row" : 0,
|
||||
"meta" : 0
|
||||
"row" : 0
|
||||
},
|
||||
"queries" : {
|
||||
"rest" : {
|
||||
|
|
|
@ -89,6 +89,16 @@ PUT semantic-embeddings
|
|||
It will be used to generate the embeddings based on the input text.
|
||||
Every time you ingest data into the related `semantic_text` field, this endpoint will be used for creating the vector representation of the text.
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
If you're using web crawlers or connectors to generate indices, you have to
|
||||
<<indices-put-mapping,update the index mappings>> for these indices to
|
||||
include the `semantic_text` field. Once the mapping is updated, you'll need to run
|
||||
a full web crawl or a full connector sync. This ensures that all existing
|
||||
documents are reprocessed and updated with the new semantic embeddings,
|
||||
enabling semantic search on the updated data.
|
||||
====
|
||||
|
||||
|
||||
[discrete]
|
||||
[[semantic-text-load-data]]
|
||||
|
@ -118,6 +128,13 @@ Create the embeddings from the text by reindexing the data from the `test-data`
|
|||
The data in the `content` field will be reindexed into the `content` semantic text field of the destination index.
|
||||
The reindexed data will be processed by the {infer} endpoint associated with the `content` semantic text field.
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
This step uses the reindex API to simulate data ingestion. If you are working with data that has already been indexed,
|
||||
rather than using the test-data set, reindexing is required to ensure that the data is processed by the {infer} endpoint
|
||||
and the necessary embeddings are generated.
|
||||
====
|
||||
|
||||
[source,console]
|
||||
------------------------------------------------------------
|
||||
POST _reindex?wait_for_completion=false
|
||||
|
|
|
@ -2,12 +2,12 @@
|
|||
=== Red or yellow cluster health status
|
||||
|
||||
A red or yellow cluster health status indicates one or more shards are not assigned to
|
||||
a node.
|
||||
a node.
|
||||
|
||||
* **Red health status**: The cluster has some unassigned primary shards, which
|
||||
means that some operations such as searches and indexing may fail.
|
||||
* **Yellow health status**: The cluster has no unassigned primary shards but some
|
||||
unassigned replica shards. This increases your risk of data loss and can degrade
|
||||
means that some operations such as searches and indexing may fail.
|
||||
* **Yellow health status**: The cluster has no unassigned primary shards but some
|
||||
unassigned replica shards. This increases your risk of data loss and can degrade
|
||||
cluster performance.
|
||||
|
||||
When your cluster has a red or yellow health status, it will continue to process
|
||||
|
@ -16,8 +16,8 @@ cleanup activities until the cluster returns to green health status. For instanc
|
|||
some <<index-lifecycle-management,{ilm-init}>> actions require the index on which they
|
||||
operate to have a green health status.
|
||||
|
||||
In many cases, your cluster will recover to green health status automatically.
|
||||
If the cluster doesn't automatically recover, then you must <<fix-red-yellow-cluster-status,manually address>>
|
||||
In many cases, your cluster will recover to green health status automatically.
|
||||
If the cluster doesn't automatically recover, then you must <<fix-red-yellow-cluster-status,manually address>>
|
||||
the remaining problems so management and cleanup activities can proceed.
|
||||
|
||||
[discrete]
|
||||
|
@ -107,7 +107,7 @@ asynchronously in the background.
|
|||
|
||||
[source,console]
|
||||
----
|
||||
POST _cluster/reroute?metric=none
|
||||
POST _cluster/reroute
|
||||
----
|
||||
|
||||
[discrete]
|
||||
|
@ -231,10 +231,10 @@ unassigned. See <<high-jvm-memory-pressure>>.
|
|||
|
||||
If a node containing a primary shard is lost, {es} can typically replace it
|
||||
using a replica on another node. If you can't recover the node and replicas
|
||||
don't exist or are irrecoverable, <<cluster-allocation-explain,Allocation
|
||||
Explain>> will report `no_valid_shard_copy` and you'll need to do one of the following:
|
||||
don't exist or are irrecoverable, <<cluster-allocation-explain,Allocation
|
||||
Explain>> will report `no_valid_shard_copy` and you'll need to do one of the following:
|
||||
|
||||
* restore the missing data from <<snapshot-restore,snapshot>>
|
||||
* restore the missing data from <<snapshot-restore,snapshot>>
|
||||
* index the missing data from its original data source
|
||||
* accept data loss on the index-level by running <<indices-delete-index,Delete Index>>
|
||||
* accept data loss on the shard-level by executing <<cluster-reroute,Cluster Reroute>> allocate_stale_primary or allocate_empty_primary command with `accept_data_loss: true`
|
||||
|
@ -246,7 +246,7 @@ resulting in data loss.
|
|||
+
|
||||
[source,console]
|
||||
----
|
||||
POST _cluster/reroute?metric=none
|
||||
POST _cluster/reroute
|
||||
{
|
||||
"commands": [
|
||||
{
|
||||
|
|
|
@ -15,7 +15,11 @@ module org.elasticsearch.base {
|
|||
|
||||
exports org.elasticsearch.core;
|
||||
exports org.elasticsearch.jdk;
|
||||
exports org.elasticsearch.core.internal.provider to org.elasticsearch.xcontent, org.elasticsearch.nativeaccess;
|
||||
exports org.elasticsearch.core.internal.provider
|
||||
to
|
||||
org.elasticsearch.xcontent,
|
||||
org.elasticsearch.nativeaccess,
|
||||
org.elasticsearch.entitlement.agent;
|
||||
|
||||
uses ModuleQualifiedExportsService;
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ public class ScriptServiceBridge extends StableBridgeAPI.Proxy<ScriptService> im
|
|||
PainlessScriptEngine.NAME,
|
||||
new PainlessScriptEngine(settings, scriptContexts),
|
||||
MustacheScriptEngine.NAME,
|
||||
new MustacheScriptEngine()
|
||||
new MustacheScriptEngine(settings)
|
||||
);
|
||||
return new ScriptService(settings, scriptEngines, ScriptModule.CORE_CONTEXTS, timeProvider);
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
import org.elasticsearch.gradle.internal.info.BuildParams
|
||||
import org.elasticsearch.gradle.internal.precommit.CheckForbiddenApisTask
|
||||
|
||||
apply plugin: 'elasticsearch.publish'
|
||||
|
@ -23,6 +24,20 @@ dependencies {
|
|||
}
|
||||
}
|
||||
|
||||
// compileMain21Java does not exist within idea (see MrJarPlugin) so we cannot reference directly by name
|
||||
tasks.matching { it.name == "compileMain21Java" }.configureEach {
|
||||
options.compilerArgs << '--add-modules=jdk.incubator.vector'
|
||||
// we remove Werror, since incubating suppression (-Xlint:-incubating)
|
||||
// is only support since JDK 22
|
||||
options.compilerArgs -= '-Werror'
|
||||
}
|
||||
|
||||
tasks.named('test').configure {
|
||||
if (BuildParams.getRuntimeJavaVersion().majorVersion.toInteger() >= 21) {
|
||||
jvmArgs '--add-modules=jdk.incubator.vector'
|
||||
}
|
||||
}
|
||||
|
||||
tasks.withType(CheckForbiddenApisTask).configureEach {
|
||||
replaceSignatureFiles 'jdk-signatures'
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
module org.elasticsearch.simdvec {
|
||||
requires org.elasticsearch.nativeaccess;
|
||||
requires org.apache.lucene.core;
|
||||
requires org.elasticsearch.logging;
|
||||
|
||||
exports org.elasticsearch.simdvec to org.elasticsearch.server;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec;
|
||||
|
||||
import org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport;
|
||||
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
|
||||
|
||||
import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;
|
||||
|
||||
public class ESVectorUtil {
|
||||
|
||||
private static final ESVectorUtilSupport IMPL = ESVectorizationProvider.getInstance().getVectorUtilSupport();
|
||||
|
||||
public static long ipByteBinByte(byte[] q, byte[] d) {
|
||||
if (q.length != d.length * B_QUERY) {
|
||||
throw new IllegalArgumentException("vector dimensions incompatible: " + q.length + "!= " + B_QUERY + " x " + d.length);
|
||||
}
|
||||
return IMPL.ipByteBinByte(q, d);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
|
||||
final class DefaultESVectorUtilSupport implements ESVectorUtilSupport {
|
||||
|
||||
DefaultESVectorUtilSupport() {}
|
||||
|
||||
@Override
|
||||
public long ipByteBinByte(byte[] q, byte[] d) {
|
||||
return ipByteBinByteImpl(q, d);
|
||||
}
|
||||
|
||||
public static long ipByteBinByteImpl(byte[] q, byte[] d) {
|
||||
long ret = 0;
|
||||
int size = d.length;
|
||||
for (int i = 0; i < B_QUERY; i++) {
|
||||
int r = 0;
|
||||
long subRet = 0;
|
||||
for (final int upperBound = d.length & -Integer.BYTES; r < upperBound; r += Integer.BYTES) {
|
||||
subRet += Integer.bitCount((int) BitUtil.VH_NATIVE_INT.get(q, i * size + r) & (int) BitUtil.VH_NATIVE_INT.get(d, r));
|
||||
}
|
||||
for (; r < d.length; r++) {
|
||||
subRet += Integer.bitCount((q[i * size + r] & d[r]) & 0xFF);
|
||||
}
|
||||
ret += subRet << i;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
final class DefaultESVectorizationProvider extends ESVectorizationProvider {
|
||||
private final ESVectorUtilSupport vectorUtilSupport;
|
||||
|
||||
DefaultESVectorizationProvider() {
|
||||
vectorUtilSupport = new DefaultESVectorUtilSupport();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ESVectorUtilSupport getVectorUtilSupport() {
|
||||
return vectorUtilSupport;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
public interface ESVectorUtilSupport {
|
||||
|
||||
short B_QUERY = 4;
|
||||
|
||||
long ipByteBinByte(byte[] q, byte[] d);
|
||||
}
|
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
public abstract class ESVectorizationProvider {
|
||||
|
||||
public static ESVectorizationProvider getInstance() {
|
||||
return Objects.requireNonNull(
|
||||
ESVectorizationProvider.Holder.INSTANCE,
|
||||
"call to getInstance() from subclass of VectorizationProvider"
|
||||
);
|
||||
}
|
||||
|
||||
ESVectorizationProvider() {}
|
||||
|
||||
public abstract ESVectorUtilSupport getVectorUtilSupport();
|
||||
|
||||
// visible for tests
|
||||
static ESVectorizationProvider lookup(boolean testMode) {
|
||||
return new DefaultESVectorizationProvider();
|
||||
}
|
||||
|
||||
/** This static holder class prevents classloading deadlock. */
|
||||
private static final class Holder {
|
||||
private Holder() {}
|
||||
|
||||
static final ESVectorizationProvider INSTANCE = lookup(false);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.elasticsearch.logging.LogManager;
|
||||
import org.elasticsearch.logging.Logger;
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
public abstract class ESVectorizationProvider {
|
||||
|
||||
protected static final Logger logger = LogManager.getLogger(ESVectorizationProvider.class);
|
||||
|
||||
public static ESVectorizationProvider getInstance() {
|
||||
return Objects.requireNonNull(
|
||||
ESVectorizationProvider.Holder.INSTANCE,
|
||||
"call to getInstance() from subclass of VectorizationProvider"
|
||||
);
|
||||
}
|
||||
|
||||
ESVectorizationProvider() {}
|
||||
|
||||
public abstract ESVectorUtilSupport getVectorUtilSupport();
|
||||
|
||||
// visible for tests
|
||||
static ESVectorizationProvider lookup(boolean testMode) {
|
||||
final int runtimeVersion = Runtime.version().feature();
|
||||
assert runtimeVersion >= 21;
|
||||
if (runtimeVersion <= 23) {
|
||||
// only use vector module with Hotspot VM
|
||||
if (Constants.IS_HOTSPOT_VM == false) {
|
||||
logger.warn("Java runtime is not using Hotspot VM; Java vector incubator API can't be enabled.");
|
||||
return new DefaultESVectorizationProvider();
|
||||
}
|
||||
// is the incubator module present and readable (JVM providers may to exclude them or it is
|
||||
// build with jlink)
|
||||
final var vectorMod = lookupVectorModule();
|
||||
if (vectorMod.isEmpty()) {
|
||||
logger.warn(
|
||||
"Java vector incubator module is not readable. "
|
||||
+ "For optimal vector performance, pass '--add-modules jdk.incubator.vector' to enable Vector API."
|
||||
);
|
||||
return new DefaultESVectorizationProvider();
|
||||
}
|
||||
vectorMod.ifPresent(ESVectorizationProvider.class.getModule()::addReads);
|
||||
var impl = new PanamaESVectorizationProvider();
|
||||
logger.info(
|
||||
String.format(
|
||||
Locale.ENGLISH,
|
||||
"Java vector incubator API enabled; uses preferredBitSize=%d",
|
||||
PanamaESVectorUtilSupport.VECTOR_BITSIZE
|
||||
)
|
||||
);
|
||||
return impl;
|
||||
} else {
|
||||
logger.warn(
|
||||
"You are running with unsupported Java "
|
||||
+ runtimeVersion
|
||||
+ ". To make full use of the Vector API, please update Elasticsearch."
|
||||
);
|
||||
}
|
||||
return new DefaultESVectorizationProvider();
|
||||
}
|
||||
|
||||
private static Optional<Module> lookupVectorModule() {
|
||||
return Optional.ofNullable(ESVectorizationProvider.class.getModule().getLayer())
|
||||
.orElse(ModuleLayer.boot())
|
||||
.findModule("jdk.incubator.vector");
|
||||
}
|
||||
|
||||
/** This static holder class prevents classloading deadlock. */
|
||||
private static final class Holder {
|
||||
private Holder() {}
|
||||
|
||||
static final ESVectorizationProvider INSTANCE = lookup(false);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
import jdk.incubator.vector.ByteVector;
|
||||
import jdk.incubator.vector.IntVector;
|
||||
import jdk.incubator.vector.LongVector;
|
||||
import jdk.incubator.vector.VectorOperators;
|
||||
import jdk.incubator.vector.VectorShape;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
|
||||
import org.apache.lucene.util.Constants;
|
||||
|
||||
public final class PanamaESVectorUtilSupport implements ESVectorUtilSupport {
|
||||
|
||||
static final int VECTOR_BITSIZE;
|
||||
|
||||
/** Whether integer vectors can be trusted to actually be fast. */
|
||||
static final boolean HAS_FAST_INTEGER_VECTORS;
|
||||
|
||||
static {
|
||||
// default to platform supported bitsize
|
||||
VECTOR_BITSIZE = VectorShape.preferredShape().vectorBitSize();
|
||||
|
||||
// hotspot misses some SSE intrinsics, workaround it
|
||||
// to be fair, they do document this thing only works well with AVX2/AVX3 and Neon
|
||||
boolean isAMD64withoutAVX2 = Constants.OS_ARCH.equals("amd64") && VECTOR_BITSIZE < 256;
|
||||
HAS_FAST_INTEGER_VECTORS = isAMD64withoutAVX2 == false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ipByteBinByte(byte[] q, byte[] d) {
|
||||
// 128 / 8 == 16
|
||||
if (d.length >= 16 && HAS_FAST_INTEGER_VECTORS) {
|
||||
if (VECTOR_BITSIZE >= 256) {
|
||||
return ipByteBin256(q, d);
|
||||
} else if (VECTOR_BITSIZE == 128) {
|
||||
return ipByteBin128(q, d);
|
||||
}
|
||||
}
|
||||
return DefaultESVectorUtilSupport.ipByteBinByteImpl(q, d);
|
||||
}
|
||||
|
||||
private static final VectorSpecies<Byte> BYTE_SPECIES_128 = ByteVector.SPECIES_128;
|
||||
private static final VectorSpecies<Byte> BYTE_SPECIES_256 = ByteVector.SPECIES_256;
|
||||
|
||||
static long ipByteBin256(byte[] q, byte[] d) {
|
||||
long subRet0 = 0;
|
||||
long subRet1 = 0;
|
||||
long subRet2 = 0;
|
||||
long subRet3 = 0;
|
||||
int i = 0;
|
||||
|
||||
if (d.length >= ByteVector.SPECIES_256.vectorByteSize() * 2) {
|
||||
int limit = ByteVector.SPECIES_256.loopBound(d.length);
|
||||
var sum0 = LongVector.zero(LongVector.SPECIES_256);
|
||||
var sum1 = LongVector.zero(LongVector.SPECIES_256);
|
||||
var sum2 = LongVector.zero(LongVector.SPECIES_256);
|
||||
var sum3 = LongVector.zero(LongVector.SPECIES_256);
|
||||
for (; i < limit; i += ByteVector.SPECIES_256.length()) {
|
||||
var vq0 = ByteVector.fromArray(BYTE_SPECIES_256, q, i).reinterpretAsLongs();
|
||||
var vq1 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length).reinterpretAsLongs();
|
||||
var vq2 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length * 2).reinterpretAsLongs();
|
||||
var vq3 = ByteVector.fromArray(BYTE_SPECIES_256, q, i + d.length * 3).reinterpretAsLongs();
|
||||
var vd = ByteVector.fromArray(BYTE_SPECIES_256, d, i).reinterpretAsLongs();
|
||||
sum0 = sum0.add(vq0.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum1 = sum1.add(vq1.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum2 = sum2.add(vq2.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum3 = sum3.add(vq3.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
}
|
||||
subRet0 += sum0.reduceLanes(VectorOperators.ADD);
|
||||
subRet1 += sum1.reduceLanes(VectorOperators.ADD);
|
||||
subRet2 += sum2.reduceLanes(VectorOperators.ADD);
|
||||
subRet3 += sum3.reduceLanes(VectorOperators.ADD);
|
||||
}
|
||||
|
||||
if (d.length - i >= ByteVector.SPECIES_128.vectorByteSize()) {
|
||||
var sum0 = LongVector.zero(LongVector.SPECIES_128);
|
||||
var sum1 = LongVector.zero(LongVector.SPECIES_128);
|
||||
var sum2 = LongVector.zero(LongVector.SPECIES_128);
|
||||
var sum3 = LongVector.zero(LongVector.SPECIES_128);
|
||||
int limit = ByteVector.SPECIES_128.loopBound(d.length);
|
||||
for (; i < limit; i += ByteVector.SPECIES_128.length()) {
|
||||
var vq0 = ByteVector.fromArray(BYTE_SPECIES_128, q, i).reinterpretAsLongs();
|
||||
var vq1 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length).reinterpretAsLongs();
|
||||
var vq2 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 2).reinterpretAsLongs();
|
||||
var vq3 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 3).reinterpretAsLongs();
|
||||
var vd = ByteVector.fromArray(BYTE_SPECIES_128, d, i).reinterpretAsLongs();
|
||||
sum0 = sum0.add(vq0.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum1 = sum1.add(vq1.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum2 = sum2.add(vq2.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum3 = sum3.add(vq3.and(vd).lanewise(VectorOperators.BIT_COUNT));
|
||||
}
|
||||
subRet0 += sum0.reduceLanes(VectorOperators.ADD);
|
||||
subRet1 += sum1.reduceLanes(VectorOperators.ADD);
|
||||
subRet2 += sum2.reduceLanes(VectorOperators.ADD);
|
||||
subRet3 += sum3.reduceLanes(VectorOperators.ADD);
|
||||
}
|
||||
// tail as bytes
|
||||
for (; i < d.length; i++) {
|
||||
subRet0 += Integer.bitCount((q[i] & d[i]) & 0xFF);
|
||||
subRet1 += Integer.bitCount((q[i + d.length] & d[i]) & 0xFF);
|
||||
subRet2 += Integer.bitCount((q[i + 2 * d.length] & d[i]) & 0xFF);
|
||||
subRet3 += Integer.bitCount((q[i + 3 * d.length] & d[i]) & 0xFF);
|
||||
}
|
||||
return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
|
||||
}
|
||||
|
||||
public static long ipByteBin128(byte[] q, byte[] d) {
|
||||
long subRet0 = 0;
|
||||
long subRet1 = 0;
|
||||
long subRet2 = 0;
|
||||
long subRet3 = 0;
|
||||
int i = 0;
|
||||
|
||||
var sum0 = IntVector.zero(IntVector.SPECIES_128);
|
||||
var sum1 = IntVector.zero(IntVector.SPECIES_128);
|
||||
var sum2 = IntVector.zero(IntVector.SPECIES_128);
|
||||
var sum3 = IntVector.zero(IntVector.SPECIES_128);
|
||||
int limit = ByteVector.SPECIES_128.loopBound(d.length);
|
||||
for (; i < limit; i += ByteVector.SPECIES_128.length()) {
|
||||
var vd = ByteVector.fromArray(BYTE_SPECIES_128, d, i).reinterpretAsInts();
|
||||
var vq0 = ByteVector.fromArray(BYTE_SPECIES_128, q, i).reinterpretAsInts();
|
||||
var vq1 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length).reinterpretAsInts();
|
||||
var vq2 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 2).reinterpretAsInts();
|
||||
var vq3 = ByteVector.fromArray(BYTE_SPECIES_128, q, i + d.length * 3).reinterpretAsInts();
|
||||
sum0 = sum0.add(vd.and(vq0).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum1 = sum1.add(vd.and(vq1).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum2 = sum2.add(vd.and(vq2).lanewise(VectorOperators.BIT_COUNT));
|
||||
sum3 = sum3.add(vd.and(vq3).lanewise(VectorOperators.BIT_COUNT));
|
||||
}
|
||||
subRet0 += sum0.reduceLanes(VectorOperators.ADD);
|
||||
subRet1 += sum1.reduceLanes(VectorOperators.ADD);
|
||||
subRet2 += sum2.reduceLanes(VectorOperators.ADD);
|
||||
subRet3 += sum3.reduceLanes(VectorOperators.ADD);
|
||||
// tail as bytes
|
||||
for (; i < d.length; i++) {
|
||||
int dValue = d[i];
|
||||
subRet0 += Integer.bitCount((dValue & q[i]) & 0xFF);
|
||||
subRet1 += Integer.bitCount((dValue & q[i + d.length]) & 0xFF);
|
||||
subRet2 += Integer.bitCount((dValue & q[i + 2 * d.length]) & 0xFF);
|
||||
subRet3 += Integer.bitCount((dValue & q[i + 3 * d.length]) & 0xFF);
|
||||
}
|
||||
return subRet0 + (subRet1 << 1) + (subRet2 << 2) + (subRet3 << 3);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
final class PanamaESVectorizationProvider extends ESVectorizationProvider {
|
||||
|
||||
private final ESVectorUtilSupport vectorUtilSupport;
|
||||
|
||||
PanamaESVectorizationProvider() {
|
||||
vectorUtilSupport = new PanamaESVectorUtilSupport();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ESVectorUtilSupport getVectorUtilSupport() {
|
||||
return vectorUtilSupport;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,130 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec;
|
||||
|
||||
import org.elasticsearch.simdvec.internal.vectorization.BaseVectorizationTests;
|
||||
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.elasticsearch.simdvec.internal.vectorization.ESVectorUtilSupport.B_QUERY;
|
||||
|
||||
public class ESVectorUtilTests extends BaseVectorizationTests {
|
||||
|
||||
static final ESVectorizationProvider defaultedProvider = BaseVectorizationTests.defaultProvider();
|
||||
static final ESVectorizationProvider defOrPanamaProvider = BaseVectorizationTests.maybePanamaProvider();
|
||||
|
||||
public void testIpByteBinInvariants() {
|
||||
int iterations = atLeast(10);
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
int size = randomIntBetween(1, 10);
|
||||
var d = new byte[size];
|
||||
var q = new byte[size * B_QUERY - 1];
|
||||
expectThrows(IllegalArgumentException.class, () -> ESVectorUtil.ipByteBinByte(q, d));
|
||||
}
|
||||
}
|
||||
|
||||
public void testBasicIpByteBin() {
|
||||
testBasicIpByteBinImpl(ESVectorUtil::ipByteBinByte);
|
||||
testBasicIpByteBinImpl(defaultedProvider.getVectorUtilSupport()::ipByteBinByte);
|
||||
testBasicIpByteBinImpl(defOrPanamaProvider.getVectorUtilSupport()::ipByteBinByte);
|
||||
}
|
||||
|
||||
interface IpByteBin {
|
||||
long apply(byte[] q, byte[] d);
|
||||
}
|
||||
|
||||
void testBasicIpByteBinImpl(IpByteBin ipByteBinFunc) {
|
||||
assertEquals(15L, ipByteBinFunc.apply(new byte[] { 1, 1, 1, 1 }, new byte[] { 1 }));
|
||||
assertEquals(30L, ipByteBinFunc.apply(new byte[] { 1, 2, 1, 2, 1, 2, 1, 2 }, new byte[] { 1, 2 }));
|
||||
|
||||
var d = new byte[] { 1, 2, 3 };
|
||||
var q = new byte[] { 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3 };
|
||||
assert scalarIpByteBin(q, d) == 60L; // 4 + 8 + 16 + 32
|
||||
assertEquals(60L, ipByteBinFunc.apply(q, d));
|
||||
|
||||
d = new byte[] { 1, 2, 3, 4 };
|
||||
q = new byte[] { 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 };
|
||||
assert scalarIpByteBin(q, d) == 75L; // 5 + 10 + 20 + 40
|
||||
assertEquals(75L, ipByteBinFunc.apply(q, d));
|
||||
|
||||
d = new byte[] { 1, 2, 3, 4, 5 };
|
||||
q = new byte[] { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5 };
|
||||
assert scalarIpByteBin(q, d) == 105L; // 7 + 14 + 28 + 56
|
||||
assertEquals(105L, ipByteBinFunc.apply(q, d));
|
||||
|
||||
d = new byte[] { 1, 2, 3, 4, 5, 6 };
|
||||
q = new byte[] { 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6 };
|
||||
assert scalarIpByteBin(q, d) == 135L; // 9 + 18 + 36 + 72
|
||||
assertEquals(135L, ipByteBinFunc.apply(q, d));
|
||||
|
||||
d = new byte[] { 1, 2, 3, 4, 5, 6, 7 };
|
||||
q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7 };
|
||||
assert scalarIpByteBin(q, d) == 180L; // 12 + 24 + 48 + 96
|
||||
assertEquals(180L, ipByteBinFunc.apply(q, d));
|
||||
|
||||
d = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
assert scalarIpByteBin(q, d) == 195L; // 13 + 26 + 52 + 104
|
||||
assertEquals(195L, ipByteBinFunc.apply(q, d));
|
||||
|
||||
d = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
q = new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
|
||||
assert scalarIpByteBin(q, d) == 225L; // 15 + 30 + 60 + 120
|
||||
assertEquals(225L, ipByteBinFunc.apply(q, d));
|
||||
}
|
||||
|
||||
public void testIpByteBin() {
|
||||
testIpByteBinImpl(ESVectorUtil::ipByteBinByte);
|
||||
testIpByteBinImpl(defaultedProvider.getVectorUtilSupport()::ipByteBinByte);
|
||||
testIpByteBinImpl(defOrPanamaProvider.getVectorUtilSupport()::ipByteBinByte);
|
||||
}
|
||||
|
||||
void testIpByteBinImpl(IpByteBin ipByteBinFunc) {
|
||||
int iterations = atLeast(50);
|
||||
for (int i = 0; i < iterations; i++) {
|
||||
int size = random().nextInt(5000);
|
||||
var d = new byte[size];
|
||||
var q = new byte[size * B_QUERY];
|
||||
random().nextBytes(d);
|
||||
random().nextBytes(q);
|
||||
assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
|
||||
|
||||
Arrays.fill(d, Byte.MAX_VALUE);
|
||||
Arrays.fill(q, Byte.MAX_VALUE);
|
||||
assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
|
||||
|
||||
Arrays.fill(d, Byte.MIN_VALUE);
|
||||
Arrays.fill(q, Byte.MIN_VALUE);
|
||||
assertEquals(scalarIpByteBin(q, d), ipByteBinFunc.apply(q, d));
|
||||
}
|
||||
}
|
||||
|
||||
static int scalarIpByteBin(byte[] q, byte[] d) {
|
||||
int res = 0;
|
||||
for (int i = 0; i < B_QUERY; i++) {
|
||||
res += (popcount(q, i * d.length, d, d.length) << i);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
public static int popcount(byte[] a, int aOffset, byte[] b, int length) {
|
||||
int res = 0;
|
||||
for (int j = 0; j < length; j++) {
|
||||
int value = (a[aOffset + j] & b[j]) & 0xFF;
|
||||
for (int k = 0; k < Byte.SIZE; k++) {
|
||||
if ((value & (1 << k)) != 0) {
|
||||
++res;
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.simdvec.internal.vectorization;
|
||||
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.junit.Before;
|
||||
|
||||
public class BaseVectorizationTests extends ESTestCase {
|
||||
|
||||
@Before
|
||||
public void sanity() {
|
||||
assert Runtime.version().feature() < 21 || ModuleLayer.boot().findModule("jdk.incubator.vector").isPresent();
|
||||
}
|
||||
|
||||
public static ESVectorizationProvider defaultProvider() {
|
||||
return new DefaultESVectorizationProvider();
|
||||
}
|
||||
|
||||
public static ESVectorizationProvider maybePanamaProvider() {
|
||||
return ESVectorizationProvider.lookup(true);
|
||||
}
|
||||
}
|
|
@ -3,6 +3,8 @@ setup:
|
|||
indices.create:
|
||||
index: test_date
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
mappings:
|
||||
properties:
|
||||
date_field:
|
||||
|
|
|
@ -72,6 +72,7 @@ public class IngestCommonPlugin extends Plugin implements ActionPlugin, IngestPl
|
|||
entry(SetProcessor.TYPE, new SetProcessor.Factory(parameters.scriptService)),
|
||||
entry(SortProcessor.TYPE, new SortProcessor.Factory()),
|
||||
entry(SplitProcessor.TYPE, new SplitProcessor.Factory()),
|
||||
entry(TerminateProcessor.TYPE, new TerminateProcessor.Factory()),
|
||||
entry(TrimProcessor.TYPE, new TrimProcessor.Factory()),
|
||||
entry(URLDecodeProcessor.TYPE, new URLDecodeProcessor.Factory()),
|
||||
entry(UppercaseProcessor.TYPE, new UppercaseProcessor.Factory()),
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.common;
|
||||
|
||||
import org.elasticsearch.ingest.AbstractProcessor;
|
||||
import org.elasticsearch.ingest.IngestDocument;
|
||||
import org.elasticsearch.ingest.Processor;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A {@link Processor} which simply prevents subsequent processors in the pipeline from running (without failing, like {@link FailProcessor}
|
||||
* does). This will normally be run conditionally, using the {@code if} option.
|
||||
*/
|
||||
public class TerminateProcessor extends AbstractProcessor {
|
||||
|
||||
static final String TYPE = "terminate";
|
||||
|
||||
TerminateProcessor(String tag, String description) {
|
||||
super(tag, description);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IngestDocument execute(IngestDocument ingestDocument) {
|
||||
ingestDocument.terminate();
|
||||
return ingestDocument;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getType() {
|
||||
return TYPE;
|
||||
}
|
||||
|
||||
public static final class Factory implements Processor.Factory {
|
||||
|
||||
@Override
|
||||
public Processor create(
|
||||
Map<String, Processor.Factory> processorFactories,
|
||||
String tag,
|
||||
String description,
|
||||
Map<String, Object> config
|
||||
) {
|
||||
return new TerminateProcessor(tag, description);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.common;
|
||||
|
||||
import org.elasticsearch.ingest.CompoundProcessor;
|
||||
import org.elasticsearch.ingest.IngestDocument;
|
||||
import org.elasticsearch.ingest.Pipeline;
|
||||
import org.elasticsearch.ingest.TestTemplateService;
|
||||
import org.elasticsearch.ingest.ValueSource;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.ingest.RandomDocumentPicks.randomIngestDocument;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.nullValue;
|
||||
|
||||
public class TerminateProcessorTests extends ESTestCase {
|
||||
|
||||
public void testTerminateInPipeline() throws Exception {
|
||||
Pipeline pipeline = new Pipeline(
|
||||
"my-pipeline",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
new CompoundProcessor(
|
||||
new SetProcessor(
|
||||
"before-set",
|
||||
"Sets before field to true",
|
||||
new TestTemplateService.MockTemplateScript.Factory("before"),
|
||||
ValueSource.wrap(true, TestTemplateService.instance()),
|
||||
null
|
||||
),
|
||||
new TerminateProcessor("terminate", "terminates the pipeline"),
|
||||
new SetProcessor(
|
||||
"after-set",
|
||||
"Sets after field to true",
|
||||
new TestTemplateService.MockTemplateScript.Factory("after"),
|
||||
ValueSource.wrap(true, TestTemplateService.instance()),
|
||||
null
|
||||
)
|
||||
)
|
||||
);
|
||||
IngestDocument input = randomIngestDocument(random(), Map.of("foo", "bar"));
|
||||
PipelineOutput output = new PipelineOutput();
|
||||
|
||||
pipeline.execute(input, output::set);
|
||||
|
||||
assertThat(output.exception, nullValue());
|
||||
// We expect the before-set processor to have run, but not the after-set one:
|
||||
assertThat(output.document.getSource(), is(Map.of("foo", "bar", "before", true)));
|
||||
}
|
||||
|
||||
private static class PipelineOutput {
|
||||
IngestDocument document;
|
||||
Exception exception;
|
||||
|
||||
void set(IngestDocument document, Exception exception) {
|
||||
this.document = document;
|
||||
this.exception = exception;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
---
|
||||
setup:
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "test-pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "Appends just 'before' to the steps field if the number field is less than 50, or both 'before' and 'after' if not",
|
||||
"processors": [
|
||||
{
|
||||
"append": {
|
||||
"field": "steps",
|
||||
"value": "before"
|
||||
}
|
||||
},
|
||||
{
|
||||
"terminate": {
|
||||
"if": "ctx.number < 50"
|
||||
}
|
||||
},
|
||||
{
|
||||
"append": {
|
||||
"field": "steps",
|
||||
"value": "after"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "test-final-pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "Appends 'final' to the steps field",
|
||||
"processors": [
|
||||
{
|
||||
"append": {
|
||||
"field": "steps",
|
||||
"value": "final"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- do:
|
||||
ingest.put_pipeline:
|
||||
id: "test-outer-pipeline"
|
||||
body: >
|
||||
{
|
||||
"description": "Runs test-pipeline and then append 'outer' to the steps field",
|
||||
"processors": [
|
||||
{
|
||||
"pipeline": {
|
||||
"name": "test-pipeline"
|
||||
}
|
||||
},
|
||||
{
|
||||
"append": {
|
||||
"field": "steps",
|
||||
"value": "outer"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
- do:
|
||||
indices.create:
|
||||
index: "test-index-with-default-and-final-pipelines"
|
||||
body:
|
||||
settings:
|
||||
index:
|
||||
default_pipeline: "test-pipeline"
|
||||
final_pipeline: "test-final-pipeline"
|
||||
- do:
|
||||
indices.create:
|
||||
index: "test-vanilla-index"
|
||||
|
||||
---
|
||||
teardown:
|
||||
- do:
|
||||
indices.delete:
|
||||
index: "test-index-with-default-and-final-pipelines"
|
||||
ignore_unavailable: true
|
||||
- do:
|
||||
indices.delete:
|
||||
index: "test-vanilla-index"
|
||||
ignore_unavailable: true
|
||||
- do:
|
||||
ingest.delete_pipeline:
|
||||
id: "test-pipeline"
|
||||
ignore: 404
|
||||
- do:
|
||||
ingest.delete_pipeline:
|
||||
id: "test-outer-pipeline"
|
||||
ignore: 404
|
||||
|
||||
---
|
||||
"Test pipeline including conditional terminate pipeline":
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
refresh: true
|
||||
body:
|
||||
- '{ "index": {"_index": "test-index-with-default-and-final-pipelines" } }'
|
||||
- '{ "comment": "should terminate", "number": 40, "steps": [] }'
|
||||
- '{ "index": {"_index": "test-index-with-default-and-final-pipelines" } }'
|
||||
- '{ "comment": "should continue to end", "number": 60, "steps": [] }'
|
||||
|
||||
- do:
|
||||
search:
|
||||
rest_total_hits_as_int: true
|
||||
index: "test-index-with-default-and-final-pipelines"
|
||||
body:
|
||||
sort: "number"
|
||||
- match: { hits.total: 2 }
|
||||
- match: { hits.hits.0._source.number: 40 }
|
||||
- match: { hits.hits.1._source.number: 60 }
|
||||
- match: { hits.hits.0._source.steps: ["before", "final"] }
|
||||
- match: { hits.hits.1._source.steps: ["before", "after", "final"] }
|
||||
|
||||
---
|
||||
"Test pipeline with terminate invoked from an outer pipeline":
|
||||
|
||||
- do:
|
||||
bulk:
|
||||
refresh: true
|
||||
pipeline: "test-outer-pipeline"
|
||||
body:
|
||||
- '{ "index": {"_index": "test-vanilla-index" } }'
|
||||
- '{ "comment": "should terminate inner pipeline but not outer", "number": 40, "steps": [] }'
|
||||
|
||||
- do:
|
||||
search:
|
||||
rest_total_hits_as_int: true
|
||||
index: "test-vanilla-index"
|
||||
body:
|
||||
sort: "number"
|
||||
- match: { hits.total: 1 }
|
||||
- match: { hits.hits.0._source.number: 40 }
|
||||
- match: { hits.hits.0._source.steps: ["before", "outer"] }
|
|
@ -22,12 +22,17 @@ import java.util.Set;
|
|||
* <p>
|
||||
* A database has a set of properties that are valid to use with it (see {@link Database#properties()}),
|
||||
* as well as a list of default properties to use if no properties are specified (see {@link Database#defaultProperties()}).
|
||||
* <p>
|
||||
* Some database providers have similar concepts but might have slightly different properties associated with those types.
|
||||
* This can be accommodated, for example, by having a Foo value and a separate FooV2 value where the 'V' should be read as
|
||||
* 'variant' or 'variation'. A V-less Database type is inherently the first variant/variation (i.e. V1).
|
||||
*/
|
||||
enum Database {
|
||||
|
||||
City(
|
||||
Set.of(
|
||||
Property.IP,
|
||||
Property.COUNTRY_IN_EUROPEAN_UNION,
|
||||
Property.COUNTRY_ISO_CODE,
|
||||
Property.CONTINENT_CODE,
|
||||
Property.COUNTRY_NAME,
|
||||
|
@ -36,7 +41,9 @@ enum Database {
|
|||
Property.REGION_NAME,
|
||||
Property.CITY_NAME,
|
||||
Property.TIMEZONE,
|
||||
Property.LOCATION
|
||||
Property.LOCATION,
|
||||
Property.POSTAL_CODE,
|
||||
Property.ACCURACY_RADIUS
|
||||
),
|
||||
Set.of(
|
||||
Property.COUNTRY_ISO_CODE,
|
||||
|
@ -49,7 +56,14 @@ enum Database {
|
|||
)
|
||||
),
|
||||
Country(
|
||||
Set.of(Property.IP, Property.CONTINENT_CODE, Property.CONTINENT_NAME, Property.COUNTRY_NAME, Property.COUNTRY_ISO_CODE),
|
||||
Set.of(
|
||||
Property.IP,
|
||||
Property.CONTINENT_CODE,
|
||||
Property.CONTINENT_NAME,
|
||||
Property.COUNTRY_NAME,
|
||||
Property.COUNTRY_IN_EUROPEAN_UNION,
|
||||
Property.COUNTRY_ISO_CODE
|
||||
),
|
||||
Set.of(Property.CONTINENT_NAME, Property.COUNTRY_NAME, Property.COUNTRY_ISO_CODE)
|
||||
),
|
||||
Asn(
|
||||
|
@ -80,12 +94,15 @@ enum Database {
|
|||
Enterprise(
|
||||
Set.of(
|
||||
Property.IP,
|
||||
Property.COUNTRY_CONFIDENCE,
|
||||
Property.COUNTRY_IN_EUROPEAN_UNION,
|
||||
Property.COUNTRY_ISO_CODE,
|
||||
Property.COUNTRY_NAME,
|
||||
Property.CONTINENT_CODE,
|
||||
Property.CONTINENT_NAME,
|
||||
Property.REGION_ISO_CODE,
|
||||
Property.REGION_NAME,
|
||||
Property.CITY_CONFIDENCE,
|
||||
Property.CITY_NAME,
|
||||
Property.TIMEZONE,
|
||||
Property.LOCATION,
|
||||
|
@ -104,7 +121,10 @@ enum Database {
|
|||
Property.MOBILE_COUNTRY_CODE,
|
||||
Property.MOBILE_NETWORK_CODE,
|
||||
Property.USER_TYPE,
|
||||
Property.CONNECTION_TYPE
|
||||
Property.CONNECTION_TYPE,
|
||||
Property.POSTAL_CODE,
|
||||
Property.POSTAL_CONFIDENCE,
|
||||
Property.ACCURACY_RADIUS
|
||||
),
|
||||
Set.of(
|
||||
Property.COUNTRY_ISO_CODE,
|
||||
|
@ -137,7 +157,31 @@ enum Database {
|
|||
Property.MOBILE_COUNTRY_CODE,
|
||||
Property.MOBILE_NETWORK_CODE
|
||||
)
|
||||
);
|
||||
),
|
||||
AsnV2(
|
||||
Set.of(
|
||||
Property.IP,
|
||||
Property.ASN,
|
||||
Property.ORGANIZATION_NAME,
|
||||
Property.NETWORK,
|
||||
Property.DOMAIN,
|
||||
Property.COUNTRY_ISO_CODE,
|
||||
Property.TYPE
|
||||
),
|
||||
Set.of(Property.IP, Property.ASN, Property.ORGANIZATION_NAME, Property.NETWORK)
|
||||
),
|
||||
CityV2(
|
||||
Set.of(
|
||||
Property.IP,
|
||||
Property.COUNTRY_ISO_CODE,
|
||||
Property.REGION_NAME,
|
||||
Property.CITY_NAME,
|
||||
Property.TIMEZONE,
|
||||
Property.LOCATION,
|
||||
Property.POSTAL_CODE
|
||||
),
|
||||
Set.of(Property.COUNTRY_ISO_CODE, Property.REGION_NAME, Property.CITY_NAME, Property.LOCATION)
|
||||
),;
|
||||
|
||||
private final Set<Property> properties;
|
||||
private final Set<Property> defaultProperties;
|
||||
|
@ -187,12 +231,15 @@ enum Database {
|
|||
enum Property {
|
||||
|
||||
IP,
|
||||
COUNTRY_CONFIDENCE,
|
||||
COUNTRY_IN_EUROPEAN_UNION,
|
||||
COUNTRY_ISO_CODE,
|
||||
COUNTRY_NAME,
|
||||
CONTINENT_CODE,
|
||||
CONTINENT_NAME,
|
||||
REGION_ISO_CODE,
|
||||
REGION_NAME,
|
||||
CITY_CONFIDENCE,
|
||||
CITY_NAME,
|
||||
TIMEZONE,
|
||||
LOCATION,
|
||||
|
@ -211,7 +258,11 @@ enum Database {
|
|||
MOBILE_COUNTRY_CODE,
|
||||
MOBILE_NETWORK_CODE,
|
||||
CONNECTION_TYPE,
|
||||
USER_TYPE;
|
||||
USER_TYPE,
|
||||
TYPE,
|
||||
POSTAL_CODE,
|
||||
POSTAL_CONFIDENCE,
|
||||
ACCURACY_RADIUS;
|
||||
|
||||
/**
|
||||
* Parses a string representation of a property into an actual Property instance. Not all properties that exist are
|
||||
|
|
|
@ -76,6 +76,7 @@ final class IpDataLookupFactories {
|
|||
return database;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
static Function<Set<Database.Property>, IpDataLookup> getMaxmindLookup(final Database database) {
|
||||
return switch (database) {
|
||||
case City -> MaxmindIpDataLookups.City::new;
|
||||
|
@ -86,6 +87,7 @@ final class IpDataLookupFactories {
|
|||
case Domain -> MaxmindIpDataLookups.Domain::new;
|
||||
case Enterprise -> MaxmindIpDataLookups.Enterprise::new;
|
||||
case Isp -> MaxmindIpDataLookups.Isp::new;
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -97,7 +99,6 @@ final class IpDataLookupFactories {
|
|||
|
||||
final Function<Set<Database.Property>, IpDataLookup> factoryMethod = getMaxmindLookup(database);
|
||||
|
||||
// note: this can't presently be null, but keep this check -- it will be useful in the near future
|
||||
if (factoryMethod == null) {
|
||||
throw new IllegalArgumentException("Unsupported database type [" + databaseType + "] for file [" + databaseFile + "]");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,338 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.ingest.geoip;
|
||||
|
||||
import com.maxmind.db.DatabaseRecord;
|
||||
import com.maxmind.db.MaxMindDbConstructor;
|
||||
import com.maxmind.db.MaxMindDbParameter;
|
||||
import com.maxmind.db.Reader;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.network.InetAddresses;
|
||||
import org.elasticsearch.common.network.NetworkAddress;
|
||||
import org.elasticsearch.core.Nullable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A collection of {@link IpDataLookup} implementations for IPinfo databases
|
||||
*/
|
||||
final class IpinfoIpDataLookups {
|
||||
|
||||
private IpinfoIpDataLookups() {
|
||||
// utility class
|
||||
}
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(IpinfoIpDataLookups.class);
|
||||
|
||||
/**
|
||||
* Lax-ly parses a string that (ideally) looks like 'AS123' into a Long like 123L (or null, if such parsing isn't possible).
|
||||
* @param asn a potentially empty (or null) ASN string that is expected to contain 'AS' and then a parsable long
|
||||
* @return the parsed asn
|
||||
*/
|
||||
static Long parseAsn(final String asn) {
|
||||
if (asn == null || Strings.hasText(asn) == false) {
|
||||
return null;
|
||||
} else {
|
||||
String stripped = asn.toUpperCase(Locale.ROOT).replaceAll("AS", "").trim();
|
||||
try {
|
||||
return Long.parseLong(stripped);
|
||||
} catch (NumberFormatException e) {
|
||||
logger.trace("Unable to parse non-compliant ASN string [{}]", asn);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Lax-ly parses a string that contains a double into a Double (or null, if such parsing isn't possible).
|
||||
* @param latlon a potentially empty (or null) string that is expected to contain a parsable double
|
||||
* @return the parsed double
|
||||
*/
|
||||
static Double parseLocationDouble(final String latlon) {
|
||||
if (latlon == null || Strings.hasText(latlon) == false) {
|
||||
return null;
|
||||
} else {
|
||||
String stripped = latlon.trim();
|
||||
try {
|
||||
return Double.parseDouble(stripped);
|
||||
} catch (NumberFormatException e) {
|
||||
logger.trace("Unable to parse non-compliant location string [{}]", latlon);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public record AsnResult(
|
||||
Long asn,
|
||||
@Nullable String country, // not present in the free asn database
|
||||
String domain,
|
||||
String name,
|
||||
@Nullable String type // not present in the free asn database
|
||||
) {
|
||||
@SuppressWarnings("checkstyle:RedundantModifier")
|
||||
@MaxMindDbConstructor
|
||||
public AsnResult(
|
||||
@MaxMindDbParameter(name = "asn") String asn,
|
||||
@Nullable @MaxMindDbParameter(name = "country") String country,
|
||||
@MaxMindDbParameter(name = "domain") String domain,
|
||||
@MaxMindDbParameter(name = "name") String name,
|
||||
@Nullable @MaxMindDbParameter(name = "type") String type
|
||||
) {
|
||||
this(parseAsn(asn), country, domain, name, type);
|
||||
}
|
||||
}
|
||||
|
||||
public record CountryResult(
|
||||
@MaxMindDbParameter(name = "continent") String continent,
|
||||
@MaxMindDbParameter(name = "continent_name") String continentName,
|
||||
@MaxMindDbParameter(name = "country") String country,
|
||||
@MaxMindDbParameter(name = "country_name") String countryName
|
||||
) {
|
||||
@MaxMindDbConstructor
|
||||
public CountryResult {}
|
||||
}
|
||||
|
||||
public record GeolocationResult(
|
||||
String city,
|
||||
String country,
|
||||
Double latitude,
|
||||
Double longitude,
|
||||
String postalCode,
|
||||
String region,
|
||||
String timezone
|
||||
) {
|
||||
@SuppressWarnings("checkstyle:RedundantModifier")
|
||||
@MaxMindDbConstructor
|
||||
public GeolocationResult(
|
||||
@MaxMindDbParameter(name = "city") String city,
|
||||
@MaxMindDbParameter(name = "country") String country,
|
||||
@MaxMindDbParameter(name = "latitude") String latitude,
|
||||
@MaxMindDbParameter(name = "longitude") String longitude,
|
||||
// @MaxMindDbParameter(name = "network") String network, // for now we're not exposing this
|
||||
@MaxMindDbParameter(name = "postal_code") String postalCode,
|
||||
@MaxMindDbParameter(name = "region") String region,
|
||||
@MaxMindDbParameter(name = "timezone") String timezone
|
||||
) {
|
||||
this(city, country, parseLocationDouble(latitude), parseLocationDouble(longitude), postalCode, region, timezone);
|
||||
}
|
||||
}
|
||||
|
||||
static class Asn extends AbstractBase<AsnResult> {
|
||||
Asn(Set<Database.Property> properties) {
|
||||
super(properties, AsnResult.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> transform(final Result<AsnResult> result) {
|
||||
AsnResult response = result.result;
|
||||
Long asn = response.asn;
|
||||
String organizationName = response.name;
|
||||
String network = result.network;
|
||||
|
||||
Map<String, Object> data = new HashMap<>();
|
||||
for (Database.Property property : this.properties) {
|
||||
switch (property) {
|
||||
case IP -> data.put("ip", result.ip);
|
||||
case ASN -> {
|
||||
if (asn != null) {
|
||||
data.put("asn", asn);
|
||||
}
|
||||
}
|
||||
case ORGANIZATION_NAME -> {
|
||||
if (organizationName != null) {
|
||||
data.put("organization_name", organizationName);
|
||||
}
|
||||
}
|
||||
case NETWORK -> {
|
||||
if (network != null) {
|
||||
data.put("network", network);
|
||||
}
|
||||
}
|
||||
case COUNTRY_ISO_CODE -> {
|
||||
if (response.country != null) {
|
||||
data.put("country_iso_code", response.country);
|
||||
}
|
||||
}
|
||||
case DOMAIN -> {
|
||||
if (response.domain != null) {
|
||||
data.put("domain", response.domain);
|
||||
}
|
||||
}
|
||||
case TYPE -> {
|
||||
if (response.type != null) {
|
||||
data.put("type", response.type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
static class Country extends AbstractBase<CountryResult> {
|
||||
Country(Set<Database.Property> properties) {
|
||||
super(properties, CountryResult.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> transform(final Result<CountryResult> result) {
|
||||
CountryResult response = result.result;
|
||||
|
||||
Map<String, Object> data = new HashMap<>();
|
||||
for (Database.Property property : this.properties) {
|
||||
switch (property) {
|
||||
case IP -> data.put("ip", result.ip);
|
||||
case COUNTRY_ISO_CODE -> {
|
||||
String countryIsoCode = response.country;
|
||||
if (countryIsoCode != null) {
|
||||
data.put("country_iso_code", countryIsoCode);
|
||||
}
|
||||
}
|
||||
case COUNTRY_NAME -> {
|
||||
String countryName = response.countryName;
|
||||
if (countryName != null) {
|
||||
data.put("country_name", countryName);
|
||||
}
|
||||
}
|
||||
case CONTINENT_CODE -> {
|
||||
String continentCode = response.continent;
|
||||
if (continentCode != null) {
|
||||
data.put("continent_code", continentCode);
|
||||
}
|
||||
}
|
||||
case CONTINENT_NAME -> {
|
||||
String continentName = response.continentName;
|
||||
if (continentName != null) {
|
||||
data.put("continent_name", continentName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
static class Geolocation extends AbstractBase<GeolocationResult> {
|
||||
Geolocation(final Set<Database.Property> properties) {
|
||||
super(properties, GeolocationResult.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Map<String, Object> transform(final Result<GeolocationResult> result) {
|
||||
GeolocationResult response = result.result;
|
||||
|
||||
Map<String, Object> data = new HashMap<>();
|
||||
for (Database.Property property : this.properties) {
|
||||
switch (property) {
|
||||
case IP -> data.put("ip", result.ip);
|
||||
case COUNTRY_ISO_CODE -> {
|
||||
String countryIsoCode = response.country;
|
||||
if (countryIsoCode != null) {
|
||||
data.put("country_iso_code", countryIsoCode);
|
||||
}
|
||||
}
|
||||
case REGION_NAME -> {
|
||||
String subdivisionName = response.region;
|
||||
if (subdivisionName != null) {
|
||||
data.put("region_name", subdivisionName);
|
||||
}
|
||||
}
|
||||
case CITY_NAME -> {
|
||||
String cityName = response.city;
|
||||
if (cityName != null) {
|
||||
data.put("city_name", cityName);
|
||||
}
|
||||
}
|
||||
case TIMEZONE -> {
|
||||
String locationTimeZone = response.timezone;
|
||||
if (locationTimeZone != null) {
|
||||
data.put("timezone", locationTimeZone);
|
||||
}
|
||||
}
|
||||
case POSTAL_CODE -> {
|
||||
String postalCode = response.postalCode;
|
||||
if (postalCode != null) {
|
||||
data.put("postal_code", postalCode);
|
||||
}
|
||||
}
|
||||
case LOCATION -> {
|
||||
Double latitude = response.latitude;
|
||||
Double longitude = response.longitude;
|
||||
if (latitude != null && longitude != null) {
|
||||
Map<String, Object> locationObject = new HashMap<>();
|
||||
locationObject.put("lat", latitude);
|
||||
locationObject.put("lon", longitude);
|
||||
data.put("location", locationObject);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Just a little record holder -- there's the data that we receive via the binding to our record objects from the Reader via the
|
||||
* getRecord call, but then we also need to capture the passed-in ip address that came from the caller as well as the network for
|
||||
* the returned DatabaseRecord from the Reader.
|
||||
*/
|
||||
public record Result<T>(T result, String ip, String network) {}
|
||||
|
||||
/**
|
||||
* The {@link IpinfoIpDataLookups.AbstractBase} is an abstract base implementation of {@link IpDataLookup} that
|
||||
* provides common functionality for getting a {@link IpinfoIpDataLookups.Result} that wraps a record from a {@link IpDatabase}.
|
||||
*
|
||||
* @param <RESPONSE> the record type that will be wrapped and returned
|
||||
*/
|
||||
private abstract static class AbstractBase<RESPONSE> implements IpDataLookup {
|
||||
|
||||
protected final Set<Database.Property> properties;
|
||||
protected final Class<RESPONSE> clazz;
|
||||
|
||||
AbstractBase(final Set<Database.Property> properties, final Class<RESPONSE> clazz) {
|
||||
this.properties = Set.copyOf(properties);
|
||||
this.clazz = clazz;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Database.Property> getProperties() {
|
||||
return this.properties;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Map<String, Object> getData(final IpDatabase ipDatabase, final String ipAddress) {
|
||||
final Result<RESPONSE> response = ipDatabase.getResponse(ipAddress, this::lookup);
|
||||
return (response == null || response.result == null) ? Map.of() : transform(response);
|
||||
}
|
||||
|
||||
@Nullable
|
||||
private Result<RESPONSE> lookup(final Reader reader, final String ipAddress) throws IOException {
|
||||
final InetAddress ip = InetAddresses.forString(ipAddress);
|
||||
final DatabaseRecord<RESPONSE> record = reader.getRecord(ip, clazz);
|
||||
final RESPONSE data = record.getData();
|
||||
return (data == null) ? null : new Result<>(data, NetworkAddress.format(ip), record.getNetwork().toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the configured properties from the retrieved response
|
||||
* @param response the non-null response that was retrieved
|
||||
* @return a mapping of properties for the ip from the response
|
||||
*/
|
||||
protected abstract Map<String, Object> transform(Result<RESPONSE> response);
|
||||
}
|
||||
}
|
|
@ -23,6 +23,7 @@ import com.maxmind.geoip2.model.EnterpriseResponse;
|
|||
import com.maxmind.geoip2.model.IspResponse;
|
||||
import com.maxmind.geoip2.record.Continent;
|
||||
import com.maxmind.geoip2.record.Location;
|
||||
import com.maxmind.geoip2.record.Postal;
|
||||
import com.maxmind.geoip2.record.Subdivision;
|
||||
|
||||
import org.elasticsearch.common.network.InetAddresses;
|
||||
|
@ -139,11 +140,18 @@ final class MaxmindIpDataLookups {
|
|||
Location location = response.getLocation();
|
||||
Continent continent = response.getContinent();
|
||||
Subdivision subdivision = response.getMostSpecificSubdivision();
|
||||
Postal postal = response.getPostal();
|
||||
|
||||
Map<String, Object> data = new HashMap<>();
|
||||
for (Database.Property property : this.properties) {
|
||||
switch (property) {
|
||||
case IP -> data.put("ip", response.getTraits().getIpAddress());
|
||||
case COUNTRY_IN_EUROPEAN_UNION -> {
|
||||
if (country.getIsoCode() != null) {
|
||||
// isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
|
||||
data.put("country_in_european_union", country.isInEuropeanUnion());
|
||||
}
|
||||
}
|
||||
case COUNTRY_ISO_CODE -> {
|
||||
String countryIsoCode = country.getIsoCode();
|
||||
if (countryIsoCode != null) {
|
||||
|
@ -206,6 +214,17 @@ final class MaxmindIpDataLookups {
|
|||
data.put("location", locationObject);
|
||||
}
|
||||
}
|
||||
case ACCURACY_RADIUS -> {
|
||||
Integer accuracyRadius = location.getAccuracyRadius();
|
||||
if (accuracyRadius != null) {
|
||||
data.put("accuracy_radius", accuracyRadius);
|
||||
}
|
||||
}
|
||||
case POSTAL_CODE -> {
|
||||
if (postal != null && postal.getCode() != null) {
|
||||
data.put("postal_code", postal.getCode());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return data;
|
||||
|
@ -254,6 +273,12 @@ final class MaxmindIpDataLookups {
|
|||
for (Database.Property property : this.properties) {
|
||||
switch (property) {
|
||||
case IP -> data.put("ip", response.getTraits().getIpAddress());
|
||||
case COUNTRY_IN_EUROPEAN_UNION -> {
|
||||
if (country.getIsoCode() != null) {
|
||||
// isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
|
||||
data.put("country_in_european_union", country.isInEuropeanUnion());
|
||||
}
|
||||
}
|
||||
case COUNTRY_ISO_CODE -> {
|
||||
String countryIsoCode = country.getIsoCode();
|
||||
if (countryIsoCode != null) {
|
||||
|
@ -324,6 +349,7 @@ final class MaxmindIpDataLookups {
|
|||
Location location = response.getLocation();
|
||||
Continent continent = response.getContinent();
|
||||
Subdivision subdivision = response.getMostSpecificSubdivision();
|
||||
Postal postal = response.getPostal();
|
||||
|
||||
Long asn = response.getTraits().getAutonomousSystemNumber();
|
||||
String organizationName = response.getTraits().getAutonomousSystemOrganization();
|
||||
|
@ -351,6 +377,18 @@ final class MaxmindIpDataLookups {
|
|||
for (Database.Property property : this.properties) {
|
||||
switch (property) {
|
||||
case IP -> data.put("ip", response.getTraits().getIpAddress());
|
||||
case COUNTRY_CONFIDENCE -> {
|
||||
Integer countryConfidence = country.getConfidence();
|
||||
if (countryConfidence != null) {
|
||||
data.put("country_confidence", countryConfidence);
|
||||
}
|
||||
}
|
||||
case COUNTRY_IN_EUROPEAN_UNION -> {
|
||||
if (country.getIsoCode() != null) {
|
||||
// isInEuropeanUnion is a boolean so it can't be null. But it really only makes sense if we have a country
|
||||
data.put("country_in_european_union", country.isInEuropeanUnion());
|
||||
}
|
||||
}
|
||||
case COUNTRY_ISO_CODE -> {
|
||||
String countryIsoCode = country.getIsoCode();
|
||||
if (countryIsoCode != null) {
|
||||
|
@ -391,6 +429,12 @@ final class MaxmindIpDataLookups {
|
|||
data.put("region_name", subdivisionName);
|
||||
}
|
||||
}
|
||||
case CITY_CONFIDENCE -> {
|
||||
Integer cityConfidence = city.getConfidence();
|
||||
if (cityConfidence != null) {
|
||||
data.put("city_confidence", cityConfidence);
|
||||
}
|
||||
}
|
||||
case CITY_NAME -> {
|
||||
String cityName = city.getName();
|
||||
if (cityName != null) {
|
||||
|
@ -413,6 +457,23 @@ final class MaxmindIpDataLookups {
|
|||
data.put("location", locationObject);
|
||||
}
|
||||
}
|
||||
case ACCURACY_RADIUS -> {
|
||||
Integer accuracyRadius = location.getAccuracyRadius();
|
||||
if (accuracyRadius != null) {
|
||||
data.put("accuracy_radius", accuracyRadius);
|
||||
}
|
||||
}
|
||||
case POSTAL_CODE -> {
|
||||
if (postal != null && postal.getCode() != null) {
|
||||
data.put("postal_code", postal.getCode());
|
||||
}
|
||||
}
|
||||
case POSTAL_CONFIDENCE -> {
|
||||
Integer postalConfidence = postal.getConfidence();
|
||||
if (postalConfidence != null) {
|
||||
data.put("postal_confidence", postalConfidence);
|
||||
}
|
||||
}
|
||||
case ASN -> {
|
||||
if (asn != null) {
|
||||
data.put("asn", asn);
|
||||
|
|
|
@ -195,7 +195,7 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
|
|||
equalTo(
|
||||
"[properties] illegal property value ["
|
||||
+ asnProperty
|
||||
+ "]. valid values are [IP, COUNTRY_ISO_CODE, COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME]"
|
||||
+ "]. valid values are [IP, COUNTRY_IN_EUROPEAN_UNION, COUNTRY_ISO_CODE, COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME]"
|
||||
)
|
||||
);
|
||||
}
|
||||
|
@ -273,8 +273,9 @@ public class GeoIpProcessorFactoryTests extends ESTestCase {
|
|||
assertThat(
|
||||
e.getMessage(),
|
||||
equalTo(
|
||||
"[properties] illegal property value [invalid]. valid values are [IP, COUNTRY_ISO_CODE, "
|
||||
+ "COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME, REGION_ISO_CODE, REGION_NAME, CITY_NAME, TIMEZONE, LOCATION]"
|
||||
"[properties] illegal property value [invalid]. valid values are [IP, COUNTRY_IN_EUROPEAN_UNION, COUNTRY_ISO_CODE, "
|
||||
+ "COUNTRY_NAME, CONTINENT_CODE, CONTINENT_NAME, REGION_ISO_CODE, REGION_NAME, CITY_NAME, TIMEZONE, "
|
||||
+ "LOCATION, POSTAL_CODE, ACCURACY_RADIUS]"
|
||||
)
|
||||
);
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue