mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-04-25 15:47:23 -04:00
* [DOCS] Remove snapshot references from stable plugin example * Fix indentation * More indentation issues
213 lines
No EOL
6.5 KiB
Text
213 lines
No EOL
6.5 KiB
Text
[[example-text-analysis-plugin]]
|
|
==== Example text analysis plugin
|
|
|
|
This example shows how to create a simple "Hello world" text analysis plugin
|
|
using the stable plugin API. The plugin provides a custom Lucene token filter
|
|
that strips all tokens except for "hello" and "world".
|
|
|
|
Elastic provides a Grade plugin, `elasticsearch.stable-esplugin`, that makes it
|
|
easier to develop and package stable plugins. The steps in this guide assume you
|
|
use this plugin. However, you don't need Gradle to create plugins.
|
|
|
|
. Create a new directory for your project.
|
|
. In this example, the source code is organized under the `main` and
|
|
`test` directories. In your project's home directory, create `src/` `src/main/`,
|
|
and `src/test/` directories.
|
|
. Create the following `build.gradle` build script in your project's home
|
|
directory:
|
|
+
|
|
[source,gradle]
|
|
----
|
|
ext.pluginApiVersion = '8.7.0'
|
|
ext.luceneVersion = '9.5.0'
|
|
|
|
buildscript {
|
|
ext.pluginApiVersion = '8.7.0'
|
|
repositories {
|
|
mavenCentral()
|
|
}
|
|
dependencies {
|
|
classpath "org.elasticsearch.gradle:build-tools:${pluginApiVersion}"
|
|
}
|
|
}
|
|
|
|
apply plugin: 'elasticsearch.stable-esplugin'
|
|
apply plugin: 'elasticsearch.yaml-rest-test'
|
|
|
|
esplugin {
|
|
name 'my-plugin'
|
|
description 'My analysis plugin'
|
|
}
|
|
|
|
group 'org.example'
|
|
version '1.0-SNAPSHOT'
|
|
|
|
repositories {
|
|
mavenLocal()
|
|
mavenCentral()
|
|
}
|
|
|
|
dependencies {
|
|
|
|
//TODO transitive dependency off and plugin-api dependency?
|
|
compileOnly "org.elasticsearch.plugin:elasticsearch-plugin-api:${pluginApiVersion}"
|
|
compileOnly "org.elasticsearch.plugin:elasticsearch-plugin-analysis-api:${pluginApiVersion}"
|
|
compileOnly "org.apache.lucene:lucene-analysis-common:${luceneVersion}"
|
|
|
|
//TODO for testing this also have to be declared
|
|
testImplementation "org.elasticsearch.plugin:elasticsearch-plugin-api:${pluginApiVersion}"
|
|
testImplementation "org.elasticsearch.plugin:elasticsearch-plugin-analysis-api:${pluginApiVersion}"
|
|
testImplementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}"
|
|
|
|
testImplementation ('junit:junit:4.13.2'){
|
|
exclude group: 'org.hamcrest'
|
|
}
|
|
testImplementation 'org.mockito:mockito-core:4.4.0'
|
|
testImplementation 'org.hamcrest:hamcrest:2.2'
|
|
|
|
}
|
|
----
|
|
. In `src/main/java/org/example/`, create `HelloWorldTokenFilter.java`. This
|
|
file provides the code for a token filter that strips all tokens except for
|
|
"hello" and "world":
|
|
+
|
|
[source,java]
|
|
----
|
|
package org.example;
|
|
|
|
import org.apache.lucene.analysis.FilteringTokenFilter;
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
|
|
import java.util.Arrays;
|
|
|
|
public class HelloWorldTokenFilter extends FilteringTokenFilter {
|
|
private final CharTermAttribute term = addAttribute(CharTermAttribute.class);
|
|
|
|
public HelloWorldTokenFilter(TokenStream input) {
|
|
super(input);
|
|
}
|
|
|
|
@Override
|
|
public boolean accept() {
|
|
if (term.length() != 5) return false;
|
|
return Arrays.equals(term.buffer(), 0, 4, "hello".toCharArray(), 0, 4)
|
|
|| Arrays.equals(term.buffer(), 0, 4, "world".toCharArray(), 0, 4);
|
|
}
|
|
}
|
|
----
|
|
. This filter can be provided to Elasticsearch using the following
|
|
`HelloWorldTokenFilterFactory.java` factory class. The `@NamedComponent`
|
|
annotation is used to give the filter the `hello_world` name. This is the name
|
|
you can use to refer to the filter, once the plugin has been deployed.
|
|
+
|
|
[source,java]
|
|
----
|
|
package org.example;
|
|
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
import org.elasticsearch.plugin.analysis.TokenFilterFactory;
|
|
import org.elasticsearch.plugin.NamedComponent;
|
|
|
|
@NamedComponent(value = "hello_world")
|
|
public class HelloWorldTokenFilterFactory implements TokenFilterFactory {
|
|
|
|
@Override
|
|
public TokenStream create(TokenStream tokenStream) {
|
|
return new HelloWorldTokenFilter(tokenStream);
|
|
}
|
|
|
|
}
|
|
----
|
|
. Unit tests may go under the `src/test` directory. You will have to add
|
|
dependencies for your preferred testing framework.
|
|
|
|
. Run:
|
|
+
|
|
[source,sh]
|
|
----
|
|
gradle bundlePlugin
|
|
----
|
|
This builds the JAR file, generates the metadata files, and bundles them into a
|
|
plugin ZIP file. The resulting ZIP file will be written to the
|
|
`build/distributions` directory.
|
|
. <<plugin-management,Install the plugin>>.
|
|
. You can use the `_analyze` API to verify that the `hello_world` token filter
|
|
works as expected:
|
|
+
|
|
[source,console]
|
|
----
|
|
GET /_analyze
|
|
{
|
|
"text": "hello to everyone except the world",
|
|
"tokenizer": "standard",
|
|
"filter": ["hello_world"]
|
|
}
|
|
----
|
|
// TEST[skip:would require this plugin to be installed]
|
|
|
|
[discrete]
|
|
=== YAML REST tests
|
|
|
|
If you are using the `elasticsearch.stable-esplugin` plugin for Gradle, you can
|
|
use {es}'s YAML Rest Test framework. This framework allows you to load your
|
|
plugin in a running test cluster and issue real REST API queries against it. The
|
|
full syntax for this framework is beyond the scope of this tutorial, but there
|
|
are many examples in the Elasticsearch repository. Refer to the
|
|
{es-repo}tree/main/plugins/examples/stable-analysis[example analysis plugin] in
|
|
the {es} Github repository for an example.
|
|
|
|
. Create a `yamlRestTest` directory in the `src` directory.
|
|
. Under the `yamlRestTest` directory, create a `java` folder for Java sources
|
|
and a `resources` folder.
|
|
. In `src/yamlRestTest/java/org/example/`, create
|
|
`HelloWorldPluginClientYamlTestSuiteIT.java`. This class implements
|
|
`ESClientYamlSuiteTestCase`.
|
|
+
|
|
[source,java]
|
|
----
|
|
import com.carrotsearch.randomizedtesting.annotations.Name;
|
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
|
import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
|
|
import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
|
|
|
|
public class HelloWorldPluginClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
|
|
|
|
public HelloWorldPluginClientYamlTestSuiteIT(
|
|
@Name("yaml") ClientYamlTestCandidate testCandidate
|
|
) {
|
|
super(testCandidate);
|
|
}
|
|
|
|
@ParametersFactory
|
|
public static Iterable<Object[]> parameters() throws Exception {
|
|
return ESClientYamlSuiteTestCase.createParameters();
|
|
}
|
|
}
|
|
----
|
|
. In `src/yamlRestTest/resources/rest-api-spec/test/plugin`, create the
|
|
`10_token_filter.yml` YAML file:
|
|
+
|
|
[source,yaml]
|
|
----
|
|
## Sample rest test
|
|
---
|
|
"Hello world plugin test - removes all tokens except hello and world":
|
|
- do:
|
|
indices.analyze:
|
|
body:
|
|
text: hello to everyone except the world
|
|
tokenizer: standard
|
|
filter:
|
|
- type: "hello_world"
|
|
- length: { tokens: 2 }
|
|
- match: { tokens.0.token: "hello" }
|
|
- match: { tokens.1.token: "world" }
|
|
|
|
----
|
|
. Run the test with:
|
|
+
|
|
[source,sh]
|
|
----
|
|
gradle yamlRestTest
|
|
---- |