mirror of
https://github.com/elastic/elasticsearch.git
synced 2025-06-28 01:22:26 -04:00
Simple version of patterned_text with a single doc value for arguments (#129292)
Initial version of patterned_text mapper. Behaves similarly to match_only_text. This version uses a single SortedSetDocValues for a template and another for arguments. It splits the message by delimiters, the classifies a token as an argument if it contains a digit. All arguments are concatenated and inserted as a single doc value. A single inverted index is used, without positions. Phrase queries are still possible, using the SourceConfirmedTextQuery, but are not fast.
This commit is contained in:
parent
2df9dd42fb
commit
9aaba25d58
18 changed files with 2045 additions and 4 deletions
|
@ -14,4 +14,6 @@ module org.elasticsearch.mapper.extras {
|
||||||
requires org.apache.lucene.core;
|
requires org.apache.lucene.core;
|
||||||
requires org.apache.lucene.memory;
|
requires org.apache.lucene.memory;
|
||||||
requires org.apache.lucene.queries;
|
requires org.apache.lucene.queries;
|
||||||
|
|
||||||
|
exports org.elasticsearch.index.mapper.extras;
|
||||||
}
|
}
|
||||||
|
|
|
@ -173,7 +173,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
|
||||||
super(name, true, false, false, tsi, meta);
|
super(name, true, false, false, tsi, meta);
|
||||||
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
|
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
|
||||||
this.textFieldType = new TextFieldType(name, isSyntheticSource);
|
this.textFieldType = new TextFieldType(name, isSyntheticSource);
|
||||||
this.originalName = isSyntheticSource ? name() + "._original" : null;
|
this.originalName = isSyntheticSource ? name + "._original" : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MatchOnlyTextFieldType(String name) {
|
public MatchOnlyTextFieldType(String name) {
|
||||||
|
|
|
@ -124,7 +124,8 @@ public class HighlightPhase implements FetchSubPhase {
|
||||||
if (fieldNameContainsWildcards) {
|
if (fieldNameContainsWildcards) {
|
||||||
if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false
|
if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false
|
||||||
&& fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false
|
&& fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false
|
||||||
&& fieldType.typeName().equals("match_only_text") == false) {
|
&& fieldType.typeName().equals("match_only_text") == false
|
||||||
|
&& fieldType.typeName().equals("patterned_text") == false) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (highlighter.canHighlight(fieldType) == false) {
|
if (highlighter.canHighlight(fieldType) == false) {
|
||||||
|
|
|
@ -24,12 +24,13 @@ base {
|
||||||
|
|
||||||
restResources {
|
restResources {
|
||||||
restApi {
|
restApi {
|
||||||
include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query'
|
include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query', 'field_caps'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
compileOnly project(path: xpackModule('core'))
|
compileOnly project(path: xpackModule('core'))
|
||||||
|
implementation project(':modules:mapper-extras')
|
||||||
testImplementation project(':modules:data-streams')
|
testImplementation project(':modules:data-streams')
|
||||||
testImplementation(testArtifact(project(xpackModule('core'))))
|
testImplementation(testArtifact(project(xpackModule('core'))))
|
||||||
javaRestTestImplementation(testArtifact(project(xpackModule('spatial'))))
|
javaRestTestImplementation(testArtifact(project(xpackModule('spatial'))))
|
||||||
|
|
|
@ -12,21 +12,27 @@ import org.elasticsearch.common.settings.Setting;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.IndexSettingProvider;
|
import org.elasticsearch.index.IndexSettingProvider;
|
||||||
import org.elasticsearch.index.IndexVersion;
|
import org.elasticsearch.index.IndexVersion;
|
||||||
|
import org.elasticsearch.index.mapper.Mapper;
|
||||||
import org.elasticsearch.license.LicenseService;
|
import org.elasticsearch.license.LicenseService;
|
||||||
import org.elasticsearch.license.XPackLicenseState;
|
import org.elasticsearch.license.XPackLicenseState;
|
||||||
import org.elasticsearch.plugins.ActionPlugin;
|
import org.elasticsearch.plugins.ActionPlugin;
|
||||||
|
import org.elasticsearch.plugins.MapperPlugin;
|
||||||
import org.elasticsearch.plugins.Plugin;
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.xpack.core.XPackPlugin;
|
import org.elasticsearch.xpack.core.XPackPlugin;
|
||||||
import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction;
|
import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction;
|
||||||
import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction;
|
import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction;
|
||||||
|
import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldMapper;
|
||||||
|
import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldType;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import static java.util.Collections.singletonMap;
|
||||||
import static org.elasticsearch.xpack.logsdb.LogsdbLicenseService.FALLBACK_SETTING;
|
import static org.elasticsearch.xpack.logsdb.LogsdbLicenseService.FALLBACK_SETTING;
|
||||||
|
|
||||||
public class LogsDBPlugin extends Plugin implements ActionPlugin {
|
public class LogsDBPlugin extends Plugin implements ActionPlugin, MapperPlugin {
|
||||||
|
|
||||||
private final Settings settings;
|
private final Settings settings;
|
||||||
private final LogsdbLicenseService licenseService;
|
private final LogsdbLicenseService licenseService;
|
||||||
|
@ -98,6 +104,15 @@ public class LogsDBPlugin extends Plugin implements ActionPlugin {
|
||||||
return actions;
|
return actions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, Mapper.TypeParser> getMappers() {
|
||||||
|
if (PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled()) {
|
||||||
|
return singletonMap(PatternedTextFieldType.CONTENT_TYPE, PatternedTextFieldMapper.PARSER);
|
||||||
|
} else {
|
||||||
|
return Map.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected XPackLicenseState getLicenseState() {
|
protected XPackLicenseState getLicenseState() {
|
||||||
return XPackPlugin.getSharedLicenseState();
|
return XPackPlugin.getSharedLicenseState();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.BinaryDocValues;
|
||||||
|
import org.apache.lucene.index.DocValues;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class PatternedTextDocValues extends BinaryDocValues {
|
||||||
|
private final SortedSetDocValues templateDocValues;
|
||||||
|
private final SortedSetDocValues argsDocValues;
|
||||||
|
|
||||||
|
PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) {
|
||||||
|
this.templateDocValues = templateDocValues;
|
||||||
|
this.argsDocValues = argsDocValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException {
|
||||||
|
SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName);
|
||||||
|
if (templateDocValues.getValueCount() == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName);
|
||||||
|
return new PatternedTextDocValues(templateDocValues, argsDocValues);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getNextStringValue() throws IOException {
|
||||||
|
assert templateDocValues.docValueCount() == 1;
|
||||||
|
String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
|
||||||
|
int argsCount = PatternedTextValueProcessor.countArgs(template);
|
||||||
|
if (argsCount > 0) {
|
||||||
|
assert argsDocValues.docValueCount() == 1;
|
||||||
|
var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
|
||||||
|
var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString());
|
||||||
|
return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args));
|
||||||
|
} else {
|
||||||
|
return template;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef binaryValue() throws IOException {
|
||||||
|
return new BytesRef(getNextStringValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean advanceExact(int i) throws IOException {
|
||||||
|
argsDocValues.advanceExact(i);
|
||||||
|
// If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc
|
||||||
|
return templateDocValues.advanceExact(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return templateDocValues.docID();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
int templateNext = templateDocValues.nextDoc();
|
||||||
|
var argsAdvance = argsDocValues.advance(templateNext);
|
||||||
|
assert argsAdvance >= templateNext;
|
||||||
|
return templateNext;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int i) throws IOException {
|
||||||
|
int templateAdvance = templateDocValues.advance(i);
|
||||||
|
var argsAdvance = argsDocValues.advance(templateAdvance);
|
||||||
|
assert argsAdvance >= templateAdvance;
|
||||||
|
return templateAdvance;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return templateDocValues.cost() + argsDocValues.cost();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,176 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.FieldType;
|
||||||
|
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.util.FeatureFlag;
|
||||||
|
import org.elasticsearch.index.IndexVersion;
|
||||||
|
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||||
|
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||||
|
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
|
||||||
|
import org.elasticsearch.index.mapper.DocumentParserContext;
|
||||||
|
import org.elasticsearch.index.mapper.FieldMapper;
|
||||||
|
import org.elasticsearch.index.mapper.MapperBuilderContext;
|
||||||
|
import org.elasticsearch.index.mapper.TextParams;
|
||||||
|
import org.elasticsearch.index.mapper.TextSearchInfo;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link FieldMapper} that assigns every document the same value.
|
||||||
|
*/
|
||||||
|
public class PatternedTextFieldMapper extends FieldMapper {
|
||||||
|
|
||||||
|
public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text");
|
||||||
|
|
||||||
|
public static class Defaults {
|
||||||
|
public static final FieldType FIELD_TYPE;
|
||||||
|
|
||||||
|
static {
|
||||||
|
final FieldType ft = new FieldType();
|
||||||
|
ft.setTokenized(true);
|
||||||
|
ft.setStored(false);
|
||||||
|
ft.setStoreTermVectors(false);
|
||||||
|
ft.setOmitNorms(true);
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS);
|
||||||
|
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder extends FieldMapper.Builder {
|
||||||
|
|
||||||
|
private final IndexVersion indexCreatedVersion;
|
||||||
|
|
||||||
|
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
|
||||||
|
|
||||||
|
private final TextParams.Analyzers analyzers;
|
||||||
|
|
||||||
|
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
|
||||||
|
super(name);
|
||||||
|
this.indexCreatedVersion = indexCreatedVersion;
|
||||||
|
this.analyzers = new TextParams.Analyzers(
|
||||||
|
indexAnalyzers,
|
||||||
|
m -> ((PatternedTextFieldMapper) m).indexAnalyzer,
|
||||||
|
m -> ((PatternedTextFieldMapper) m).positionIncrementGap,
|
||||||
|
indexCreatedVersion
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Parameter<?>[] getParameters() {
|
||||||
|
return new Parameter<?>[] { meta };
|
||||||
|
}
|
||||||
|
|
||||||
|
private PatternedTextFieldType buildFieldType(MapperBuilderContext context) {
|
||||||
|
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
|
||||||
|
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
|
||||||
|
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
|
||||||
|
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
|
||||||
|
return new PatternedTextFieldType(
|
||||||
|
context.buildFullName(leafName()),
|
||||||
|
tsi,
|
||||||
|
indexAnalyzer,
|
||||||
|
context.isSourceSynthetic(),
|
||||||
|
meta.getValue()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PatternedTextFieldMapper build(MapperBuilderContext context) {
|
||||||
|
return new PatternedTextFieldMapper(leafName(), buildFieldType(context), builderParams(this, context), this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
|
||||||
|
|
||||||
|
private final IndexVersion indexCreatedVersion;
|
||||||
|
private final IndexAnalyzers indexAnalyzers;
|
||||||
|
private final NamedAnalyzer indexAnalyzer;
|
||||||
|
private final int positionIncrementGap;
|
||||||
|
private final FieldType fieldType;
|
||||||
|
|
||||||
|
private PatternedTextFieldMapper(
|
||||||
|
String simpleName,
|
||||||
|
PatternedTextFieldType mappedFieldPatternedTextFieldType,
|
||||||
|
BuilderParams builderParams,
|
||||||
|
Builder builder
|
||||||
|
) {
|
||||||
|
super(simpleName, mappedFieldPatternedTextFieldType, builderParams);
|
||||||
|
assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized();
|
||||||
|
assert mappedFieldPatternedTextFieldType.hasDocValues() == false;
|
||||||
|
this.fieldType = Defaults.FIELD_TYPE;
|
||||||
|
this.indexCreatedVersion = builder.indexCreatedVersion;
|
||||||
|
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
|
||||||
|
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
|
||||||
|
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, NamedAnalyzer> indexAnalyzers() {
|
||||||
|
return Map.of(mappedFieldType.name(), indexAnalyzer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FieldMapper.Builder getMergeBuilder() {
|
||||||
|
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void parseCreateField(DocumentParserContext context) throws IOException {
|
||||||
|
final String value = context.parser().textOrNull();
|
||||||
|
if (value == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var existingValue = context.doc().getField(fieldType().name());
|
||||||
|
if (existingValue != null) {
|
||||||
|
throw new IllegalArgumentException("Multiple values are not allowed for field [" + fieldType().name() + "].");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse template and args.
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value);
|
||||||
|
|
||||||
|
// Add index on original value
|
||||||
|
context.doc().add(new Field(fieldType().name(), value, fieldType));
|
||||||
|
|
||||||
|
// Add template doc_values
|
||||||
|
context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template())));
|
||||||
|
|
||||||
|
// Add args doc_values
|
||||||
|
if (parts.args().isEmpty() == false) {
|
||||||
|
String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts);
|
||||||
|
context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String contentType() {
|
||||||
|
return PatternedTextFieldType.CONTENT_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PatternedTextFieldType fieldType() {
|
||||||
|
return (PatternedTextFieldType) super.fieldType();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected SyntheticSourceSupport syntheticSourceSupport() {
|
||||||
|
return new SyntheticSourceSupport.Native(
|
||||||
|
() -> new CompositeSyntheticFieldLoader(
|
||||||
|
leafName(),
|
||||||
|
fullPath(),
|
||||||
|
new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName())
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,270 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.intervals.Intervals;
|
||||||
|
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||||
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
import org.apache.lucene.search.FieldExistsQuery;
|
||||||
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
|
import org.apache.lucene.search.MultiTermQuery;
|
||||||
|
import org.apache.lucene.search.PrefixQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOFunction;
|
||||||
|
import org.elasticsearch.common.CheckedIntFunction;
|
||||||
|
import org.elasticsearch.common.lucene.Lucene;
|
||||||
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
|
import org.elasticsearch.index.fielddata.FieldDataContext;
|
||||||
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
|
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
|
||||||
|
import org.elasticsearch.index.mapper.BlockDocValuesReader;
|
||||||
|
import org.elasticsearch.index.mapper.BlockLoader;
|
||||||
|
import org.elasticsearch.index.mapper.SourceValueFetcher;
|
||||||
|
import org.elasticsearch.index.mapper.StringFieldType;
|
||||||
|
import org.elasticsearch.index.mapper.TextFieldMapper;
|
||||||
|
import org.elasticsearch.index.mapper.TextSearchInfo;
|
||||||
|
import org.elasticsearch.index.mapper.ValueFetcher;
|
||||||
|
import org.elasticsearch.index.mapper.extras.SourceConfirmedTextQuery;
|
||||||
|
import org.elasticsearch.index.mapper.extras.SourceIntervalsSource;
|
||||||
|
import org.elasticsearch.index.query.SearchExecutionContext;
|
||||||
|
import org.elasticsearch.script.field.KeywordDocValuesField;
|
||||||
|
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
|
||||||
|
import org.elasticsearch.search.lookup.SourceProvider;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public class PatternedTextFieldType extends StringFieldType {
|
||||||
|
|
||||||
|
private static final String TEMPLATE_SUFFIX = ".template";
|
||||||
|
private static final String ARGS_SUFFIX = ".args";
|
||||||
|
|
||||||
|
public static final String CONTENT_TYPE = "patterned_text";
|
||||||
|
|
||||||
|
private final Analyzer indexAnalyzer;
|
||||||
|
private final TextFieldMapper.TextFieldType textFieldType;
|
||||||
|
|
||||||
|
PatternedTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, Map<String, String> meta) {
|
||||||
|
// Though this type is based on doc_values, hasDocValues is set to false as the patterned_text type is not aggregatable.
|
||||||
|
// This does not stop its child .template type from being aggregatable.
|
||||||
|
super(name, true, false, false, tsi, meta);
|
||||||
|
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
|
||||||
|
this.textFieldType = new TextFieldMapper.TextFieldType(name, isSyntheticSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
PatternedTextFieldType(String name) {
|
||||||
|
this(
|
||||||
|
name,
|
||||||
|
new TextSearchInfo(PatternedTextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
|
||||||
|
Lucene.STANDARD_ANALYZER,
|
||||||
|
false,
|
||||||
|
Collections.emptyMap()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String typeName() {
|
||||||
|
return CONTENT_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String familyTypeName() {
|
||||||
|
return TextFieldMapper.CONTENT_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
|
||||||
|
return SourceValueFetcher.toString(name(), context, format);
|
||||||
|
}
|
||||||
|
|
||||||
|
private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> getValueFetcherProvider(
|
||||||
|
SearchExecutionContext searchExecutionContext
|
||||||
|
) {
|
||||||
|
return context -> {
|
||||||
|
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
|
||||||
|
SourceProvider sourceProvider = searchExecutionContext.lookup();
|
||||||
|
valueFetcher.setNextReader(context);
|
||||||
|
return docID -> {
|
||||||
|
try {
|
||||||
|
return valueFetcher.fetchValues(sourceProvider.getSource(context, docID), docID, new ArrayList<>());
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query sourceConfirmedQuery(Query query, SearchExecutionContext context) {
|
||||||
|
// Disable scoring
|
||||||
|
return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer));
|
||||||
|
}
|
||||||
|
|
||||||
|
private IntervalsSource toIntervalsSource(IntervalsSource source, Query approximation, SearchExecutionContext searchExecutionContext) {
|
||||||
|
return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query termQuery(Object query, SearchExecutionContext context) {
|
||||||
|
// Disable scoring
|
||||||
|
return new ConstantScoreQuery(super.termQuery(query, context));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query fuzzyQuery(
|
||||||
|
Object value,
|
||||||
|
Fuzziness fuzziness,
|
||||||
|
int prefixLength,
|
||||||
|
int maxExpansions,
|
||||||
|
boolean transpositions,
|
||||||
|
SearchExecutionContext context,
|
||||||
|
MultiTermQuery.RewriteMethod rewriteMethod
|
||||||
|
) {
|
||||||
|
// Disable scoring
|
||||||
|
return new ConstantScoreQuery(
|
||||||
|
super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context, rewriteMethod)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query existsQuery(SearchExecutionContext context) {
|
||||||
|
return new FieldExistsQuery(templateFieldName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) {
|
||||||
|
return toIntervalsSource(Intervals.term(term), new TermQuery(new Term(name(), term)), context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
|
||||||
|
return toIntervalsSource(
|
||||||
|
Intervals.prefix(term, IndexSearcher.getMaxClauseCount()),
|
||||||
|
new PrefixQuery(new Term(name(), term)),
|
||||||
|
context
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalsSource fuzzyIntervals(
|
||||||
|
String term,
|
||||||
|
int maxDistance,
|
||||||
|
int prefixLength,
|
||||||
|
boolean transpositions,
|
||||||
|
SearchExecutionContext context
|
||||||
|
) {
|
||||||
|
FuzzyQuery fuzzyQuery = new FuzzyQuery(
|
||||||
|
new Term(name(), term),
|
||||||
|
maxDistance,
|
||||||
|
prefixLength,
|
||||||
|
IndexSearcher.getMaxClauseCount(),
|
||||||
|
transpositions,
|
||||||
|
MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE
|
||||||
|
);
|
||||||
|
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
|
||||||
|
return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
|
||||||
|
return toIntervalsSource(
|
||||||
|
Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()),
|
||||||
|
new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be?
|
||||||
|
context
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
|
||||||
|
return toIntervalsSource(
|
||||||
|
Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()),
|
||||||
|
new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
|
||||||
|
context
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IntervalsSource rangeIntervals(
|
||||||
|
BytesRef lowerTerm,
|
||||||
|
BytesRef upperTerm,
|
||||||
|
boolean includeLower,
|
||||||
|
boolean includeUpper,
|
||||||
|
SearchExecutionContext context
|
||||||
|
) {
|
||||||
|
return toIntervalsSource(
|
||||||
|
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()),
|
||||||
|
new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
|
||||||
|
context
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
|
||||||
|
throws IOException {
|
||||||
|
final Query textQuery = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext);
|
||||||
|
return sourceConfirmedQuery(textQuery, queryShardContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext queryShardContext)
|
||||||
|
throws IOException {
|
||||||
|
final Query textQuery = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext);
|
||||||
|
return sourceConfirmedQuery(textQuery, queryShardContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext)
|
||||||
|
throws IOException {
|
||||||
|
final Query textQuery = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext);
|
||||||
|
return sourceConfirmedQuery(textQuery, queryShardContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BlockLoader blockLoader(BlockLoaderContext blContext) {
|
||||||
|
return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
|
||||||
|
if (fieldDataContext.fielddataOperation() != FielddataOperation.SCRIPT) {
|
||||||
|
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
|
||||||
|
}
|
||||||
|
if (textFieldType.isSyntheticSource()) {
|
||||||
|
return new PatternedTextIndexFieldData.Builder(this);
|
||||||
|
}
|
||||||
|
return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
|
||||||
|
name(),
|
||||||
|
CoreValuesSourceType.KEYWORD,
|
||||||
|
SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
|
||||||
|
fieldDataContext.lookupSupplier().get(),
|
||||||
|
KeywordDocValuesField::new
|
||||||
|
);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
String templateFieldName() {
|
||||||
|
return name() + TEMPLATE_SUFFIX;
|
||||||
|
}
|
||||||
|
|
||||||
|
String argsFieldName() {
|
||||||
|
return name() + ARGS_SUFFIX;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,134 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.search.SortField;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.util.BigArrays;
|
||||||
|
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||||
|
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||||
|
import org.elasticsearch.index.fielddata.LeafFieldData;
|
||||||
|
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||||
|
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||||
|
import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
|
||||||
|
import org.elasticsearch.script.field.KeywordDocValuesField;
|
||||||
|
import org.elasticsearch.script.field.ToScriptFieldFactory;
|
||||||
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
|
import org.elasticsearch.search.MultiValueMode;
|
||||||
|
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
|
||||||
|
import org.elasticsearch.search.sort.BucketedSort;
|
||||||
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.UncheckedIOException;
|
||||||
|
|
||||||
|
public class PatternedTextIndexFieldData implements IndexFieldData<LeafFieldData> {
|
||||||
|
|
||||||
|
private final PatternedTextFieldType fieldType;
|
||||||
|
|
||||||
|
static class Builder implements IndexFieldData.Builder {
|
||||||
|
|
||||||
|
final PatternedTextFieldType fieldType;
|
||||||
|
|
||||||
|
Builder(PatternedTextFieldType fieldType) {
|
||||||
|
this.fieldType = fieldType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PatternedTextIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
|
||||||
|
return new PatternedTextIndexFieldData(fieldType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PatternedTextIndexFieldData(PatternedTextFieldType fieldType) {
|
||||||
|
this.fieldType = fieldType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getFieldName() {
|
||||||
|
return fieldType.name();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ValuesSourceType getValuesSourceType() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafFieldData load(LeafReaderContext context) {
|
||||||
|
try {
|
||||||
|
return loadDirect(context);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new UncheckedIOException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafFieldData loadDirect(LeafReaderContext context) throws IOException {
|
||||||
|
LeafReader leafReader = context.reader();
|
||||||
|
PatternedTextDocValues docValues = PatternedTextDocValues.from(
|
||||||
|
leafReader,
|
||||||
|
fieldType.templateFieldName(),
|
||||||
|
fieldType.argsFieldName()
|
||||||
|
);
|
||||||
|
return new LeafFieldData() {
|
||||||
|
|
||||||
|
final ToScriptFieldFactory<SortedBinaryDocValues> factory = KeywordDocValuesField::new;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
|
||||||
|
return factory.getScriptFieldFactory(getBytesValues(), name);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortedBinaryDocValues getBytesValues() {
|
||||||
|
return new SortedBinaryDocValues() {
|
||||||
|
@Override
|
||||||
|
public boolean advanceExact(int doc) throws IOException {
|
||||||
|
return docValues.advanceExact(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docValueCount() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef nextValue() throws IOException {
|
||||||
|
return docValues.binaryValue();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long ramBytesUsed() {
|
||||||
|
return 1L;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
|
||||||
|
throw new IllegalArgumentException("not supported for source patterned text field type");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BucketedSort newBucketedSort(
|
||||||
|
BigArrays bigArrays,
|
||||||
|
Object missingValue,
|
||||||
|
MultiValueMode sortMode,
|
||||||
|
XFieldComparatorSource.Nested nested,
|
||||||
|
SortOrder sortOrder,
|
||||||
|
DocValueFormat format,
|
||||||
|
int bucketSize,
|
||||||
|
BucketedSort.ExtraData extra
|
||||||
|
) {
|
||||||
|
throw new IllegalArgumentException("only supported on numeric fields");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
|
||||||
|
import org.elasticsearch.xcontent.XContentBuilder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer {
|
||||||
|
|
||||||
|
private final String name;
|
||||||
|
private final String templateFieldName;
|
||||||
|
private final String argsFieldName;
|
||||||
|
private PatternedTextSyntheticFieldLoader loader;
|
||||||
|
|
||||||
|
PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName) {
|
||||||
|
this.name = name;
|
||||||
|
this.templateFieldName = templateFieldName;
|
||||||
|
this.argsFieldName = argsFieldName;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long valueCount() {
|
||||||
|
return loader != null && loader.hasValue() ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
|
||||||
|
var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName);
|
||||||
|
if (docValues == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
loader = new PatternedTextSyntheticFieldLoader(docValues);
|
||||||
|
return loader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasValue() {
|
||||||
|
return loader != null && loader.hasValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void write(XContentBuilder b) throws IOException {
|
||||||
|
if (loader != null) {
|
||||||
|
loader.write(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String fieldName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class PatternedTextSyntheticFieldLoader implements DocValuesLoader {
|
||||||
|
private final PatternedTextDocValues docValues;
|
||||||
|
private boolean hasValue = false;
|
||||||
|
|
||||||
|
PatternedTextSyntheticFieldLoader(PatternedTextDocValues docValues) {
|
||||||
|
this.docValues = docValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasValue() {
|
||||||
|
assert docValues.docID() != DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
return hasValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean advanceToDoc(int docId) throws IOException {
|
||||||
|
return hasValue = docValues.advanceExact(docId);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void write(XContentBuilder b) throws IOException {
|
||||||
|
if (hasValue) {
|
||||||
|
b.value(docValues.binaryValue().utf8ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,105 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class PatternedTextValueProcessor {
|
||||||
|
private static final String TEXT_ARG_PLACEHOLDER = "%W";
|
||||||
|
private static final String DELIMITER = "[\\s\\[\\]]";
|
||||||
|
private static final String SPACE = " ";
|
||||||
|
|
||||||
|
record Parts(String template, List<String> args) {}
|
||||||
|
|
||||||
|
static Parts split(String text) {
|
||||||
|
StringBuilder template = new StringBuilder();
|
||||||
|
List<String> args = new ArrayList<>();
|
||||||
|
String[] tokens = text.split(DELIMITER);
|
||||||
|
int textIndex = 0;
|
||||||
|
for (String token : tokens) {
|
||||||
|
if (token.isEmpty()) {
|
||||||
|
if (textIndex < text.length() - 1) {
|
||||||
|
template.append(text.charAt(textIndex++));
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isArg(token)) {
|
||||||
|
args.add(token);
|
||||||
|
template.append(TEXT_ARG_PLACEHOLDER);
|
||||||
|
} else {
|
||||||
|
template.append(token);
|
||||||
|
}
|
||||||
|
textIndex += token.length();
|
||||||
|
if (textIndex < text.length()) {
|
||||||
|
template.append(text.charAt(textIndex++));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (textIndex < text.length()) {
|
||||||
|
template.append(text.charAt(textIndex++));
|
||||||
|
}
|
||||||
|
return new Parts(template.toString(), args);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isArg(String text) {
|
||||||
|
for (int i = 0; i < text.length(); i++) {
|
||||||
|
if (Character.isDigit(text.charAt(i))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String merge(Parts parts) {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
String[] templateParts = parts.template.split(DELIMITER);
|
||||||
|
int i = 0;
|
||||||
|
int templateIndex = 0;
|
||||||
|
for (String part : templateParts) {
|
||||||
|
if (part.equals(TEXT_ARG_PLACEHOLDER)) {
|
||||||
|
builder.append(parts.args.get(i++));
|
||||||
|
templateIndex += TEXT_ARG_PLACEHOLDER.length();
|
||||||
|
} else if (part.isEmpty() == false) {
|
||||||
|
builder.append(part);
|
||||||
|
templateIndex += part.length();
|
||||||
|
}
|
||||||
|
if (templateIndex < parts.template.length()) {
|
||||||
|
builder.append(parts.template.charAt(templateIndex++));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert i == parts.args.size() : "expected " + i + " but got " + parts.args.size();
|
||||||
|
assert builder.toString().contains(TEXT_ARG_PLACEHOLDER) == false : builder.toString();
|
||||||
|
while (templateIndex < parts.template.length()) {
|
||||||
|
builder.append(parts.template.charAt(templateIndex++));
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
static String encodeRemainingArgs(Parts parts) {
|
||||||
|
return String.join(SPACE, parts.args);
|
||||||
|
}
|
||||||
|
|
||||||
|
static List<String> decodeRemainingArgs(String mergedArgs) {
|
||||||
|
return Arrays.asList(mergedArgs.split(SPACE));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int countArgs(String template) {
|
||||||
|
int count = 0;
|
||||||
|
for (int i = 0; i < template.length() - 1; i++) {
|
||||||
|
if (template.charAt(i) == '%') {
|
||||||
|
char next = template.charAt(i + 1);
|
||||||
|
if (next == 'W') {
|
||||||
|
count++;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,174 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
|
||||||
|
public class PatternTextDocValuesTests extends ESTestCase {
|
||||||
|
|
||||||
|
private static PatternedTextDocValues makeDocValueSparseArgs() {
|
||||||
|
var template = new SimpleSortedSetDocValues("%W dog", "cat", "%W mouse %W", "hat %W");
|
||||||
|
var args = new SimpleSortedSetDocValues("1", null, "2 3", "4");
|
||||||
|
return new PatternedTextDocValues(template, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static PatternedTextDocValues makeDocValuesDenseArgs() {
|
||||||
|
var template = new SimpleSortedSetDocValues("%W moose", "%W goose %W", "%W mouse %W", "%W house");
|
||||||
|
var args = new SimpleSortedSetDocValues("1", "4 5", "2 3", "7");
|
||||||
|
return new PatternedTextDocValues(template, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static PatternedTextDocValues makeDocValueMissingValues() {
|
||||||
|
var template = new SimpleSortedSetDocValues("%W cheddar", "cat", null, "%W cheese");
|
||||||
|
var args = new SimpleSortedSetDocValues("1", null, null, "4");
|
||||||
|
return new PatternedTextDocValues(template, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNextDoc() throws IOException {
|
||||||
|
var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs();
|
||||||
|
assertEquals(-1, docValues.docID());
|
||||||
|
assertEquals(0, docValues.nextDoc());
|
||||||
|
assertEquals(1, docValues.nextDoc());
|
||||||
|
assertEquals(2, docValues.nextDoc());
|
||||||
|
assertEquals(3, docValues.nextDoc());
|
||||||
|
assertEquals(NO_MORE_DOCS, docValues.nextDoc());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNextDocMissing() throws IOException {
|
||||||
|
var docValues = makeDocValueMissingValues();
|
||||||
|
assertEquals(-1, docValues.docID());
|
||||||
|
assertEquals(0, docValues.nextDoc());
|
||||||
|
assertEquals(1, docValues.nextDoc());
|
||||||
|
assertEquals(3, docValues.nextDoc());
|
||||||
|
assertEquals(NO_MORE_DOCS, docValues.nextDoc());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAdvance1() throws IOException {
|
||||||
|
var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs();
|
||||||
|
assertEquals(-1, docValues.docID());
|
||||||
|
assertEquals(0, docValues.nextDoc());
|
||||||
|
assertEquals(1, docValues.advance(1));
|
||||||
|
assertEquals(2, docValues.advance(2));
|
||||||
|
assertEquals(3, docValues.advance(3));
|
||||||
|
assertEquals(NO_MORE_DOCS, docValues.advance(4));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAdvanceFarther() throws IOException {
|
||||||
|
var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs();
|
||||||
|
assertEquals(2, docValues.advance(2));
|
||||||
|
// repeats says on value
|
||||||
|
assertEquals(2, docValues.advance(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAdvanceSkipsValuesIfMissing() throws IOException {
|
||||||
|
var docValues = makeDocValueMissingValues();
|
||||||
|
assertEquals(3, docValues.advance(2));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAdvanceExactMissing() throws IOException {
|
||||||
|
var docValues = makeDocValueMissingValues();
|
||||||
|
assertTrue(docValues.advanceExact(1));
|
||||||
|
assertFalse(docValues.advanceExact(2));
|
||||||
|
assertEquals(3, docValues.docID());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testValueAll() throws IOException {
|
||||||
|
var docValues = makeDocValuesDenseArgs();
|
||||||
|
assertEquals(0, docValues.nextDoc());
|
||||||
|
assertEquals("1 moose", docValues.binaryValue().utf8ToString());
|
||||||
|
assertEquals(1, docValues.nextDoc());
|
||||||
|
assertEquals("4 goose 5", docValues.binaryValue().utf8ToString());
|
||||||
|
assertEquals(2, docValues.nextDoc());
|
||||||
|
assertEquals("2 mouse 3", docValues.binaryValue().utf8ToString());
|
||||||
|
assertEquals(3, docValues.nextDoc());
|
||||||
|
assertEquals("7 house", docValues.binaryValue().utf8ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testValueMissing() throws IOException {
|
||||||
|
var docValues = makeDocValueMissingValues();
|
||||||
|
assertEquals(0, docValues.nextDoc());
|
||||||
|
assertEquals("1 cheddar", docValues.binaryValue().utf8ToString());
|
||||||
|
assertEquals(1, docValues.nextDoc());
|
||||||
|
assertEquals("cat", docValues.binaryValue().utf8ToString());
|
||||||
|
assertEquals(3, docValues.nextDoc());
|
||||||
|
assertEquals("4 cheese", docValues.binaryValue().utf8ToString());
|
||||||
|
}
|
||||||
|
|
||||||
|
static class SimpleSortedSetDocValues extends SortedSetDocValues {
|
||||||
|
|
||||||
|
private final List<String> ordToValues;
|
||||||
|
private final List<Integer> docToOrds;
|
||||||
|
private int currDoc = -1;
|
||||||
|
|
||||||
|
// Single value for each docId, null if no value for a docId
|
||||||
|
SimpleSortedSetDocValues(String... docIdToValue) {
|
||||||
|
ordToValues = Arrays.stream(docIdToValue).filter(Objects::nonNull).collect(Collectors.toSet()).stream().sorted().toList();
|
||||||
|
docToOrds = Arrays.stream(docIdToValue).map(v -> v == null ? null : ordToValues.indexOf(v)).toList();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long nextOrd() {
|
||||||
|
return docToOrds.get(currDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docValueCount() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef lookupOrd(long ord) {
|
||||||
|
return new BytesRef(ordToValues.get((int) ord));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getValueCount() {
|
||||||
|
return ordToValues.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean advanceExact(int target) {
|
||||||
|
return advance(target) == target;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int docID() {
|
||||||
|
return currDoc >= docToOrds.size() ? NO_MORE_DOCS : currDoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int nextDoc() throws IOException {
|
||||||
|
return advance(currDoc + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int advance(int target) {
|
||||||
|
for (currDoc = target; currDoc < docToOrds.size(); currDoc++) {
|
||||||
|
if (docToOrds.get(currDoc) != null) {
|
||||||
|
return currDoc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NO_MORE_DOCS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long cost() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,284 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.DocValuesType;
|
||||||
|
import org.apache.lucene.index.IndexOptions;
|
||||||
|
import org.apache.lucene.index.IndexableField;
|
||||||
|
import org.apache.lucene.index.IndexableFieldType;
|
||||||
|
import org.apache.lucene.search.FieldExistsQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.TotalHits;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.tests.analysis.CannedTokenStream;
|
||||||
|
import org.apache.lucene.tests.analysis.Token;
|
||||||
|
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.core.Tuple;
|
||||||
|
import org.elasticsearch.index.mapper.DateFieldMapper;
|
||||||
|
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||||
|
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||||
|
import org.elasticsearch.index.mapper.LuceneDocument;
|
||||||
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
|
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||||
|
import org.elasticsearch.index.mapper.MapperService;
|
||||||
|
import org.elasticsearch.index.mapper.MapperTestCase;
|
||||||
|
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||||
|
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
|
||||||
|
import org.elasticsearch.index.query.SearchExecutionContext;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
|
import org.elasticsearch.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.xcontent.XContentFactory;
|
||||||
|
import org.elasticsearch.xpack.logsdb.LogsDBPlugin;
|
||||||
|
import org.junit.AssumptionViolatedException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.UUID;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.containsString;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
import static org.hamcrest.Matchers.startsWith;
|
||||||
|
|
||||||
|
public class PatternedTextFieldMapperTests extends MapperTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Plugin> getPlugins() {
|
||||||
|
return List.of(new LogsDBPlugin(Settings.EMPTY));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Object getSampleValueForDocument() {
|
||||||
|
return "value";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
|
||||||
|
assertThat(query, instanceOf(FieldExistsQuery.class));
|
||||||
|
FieldExistsQuery fieldExistsQuery = (FieldExistsQuery) query;
|
||||||
|
assertThat(fieldExistsQuery.getField(), startsWith("field"));
|
||||||
|
assertNoFieldNamesField(fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExistsStandardSource() throws IOException {
|
||||||
|
assertExistsQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExistsSyntheticSource() throws IOException {
|
||||||
|
assertExistsQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPhraseQueryStandardSource() throws IOException {
|
||||||
|
assertPhraseQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPhraseQuerySyntheticSource() throws IOException {
|
||||||
|
assertPhraseQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertPhraseQuery(MapperService mapperService) throws IOException {
|
||||||
|
try (Directory directory = newDirectory()) {
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
|
||||||
|
LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox 1"))).rootDoc();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
iw.close();
|
||||||
|
try (DirectoryReader reader = DirectoryReader.open(directory)) {
|
||||||
|
SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader));
|
||||||
|
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox 1");
|
||||||
|
TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1);
|
||||||
|
assertThat(docs.totalHits.value(), equalTo(1L));
|
||||||
|
assertThat(docs.totalHits.relation(), equalTo(TotalHits.Relation.EQUAL_TO));
|
||||||
|
assertThat(docs.scoreDocs[0].doc, equalTo(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void registerParameters(ParameterChecker checker) throws IOException {
|
||||||
|
checker.registerUpdateCheck(
|
||||||
|
b -> { b.field("meta", Collections.singletonMap("format", "mysql.access")); },
|
||||||
|
m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void minimalMapping(XContentBuilder b) throws IOException {
|
||||||
|
b.field("type", "patterned_text");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void minimalStoreMapping(XContentBuilder b) throws IOException {
|
||||||
|
// 'store' is always true
|
||||||
|
minimalMapping(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDefaults() throws IOException {
|
||||||
|
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
|
||||||
|
assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
|
||||||
|
|
||||||
|
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
|
||||||
|
List<IndexableField> fields = doc.rootDoc().getFields("field");
|
||||||
|
assertEquals(1, fields.size());
|
||||||
|
assertEquals("1234", fields.get(0).stringValue());
|
||||||
|
IndexableFieldType fieldType = fields.get(0).fieldType();
|
||||||
|
assertThat(fieldType.omitNorms(), equalTo(true));
|
||||||
|
assertTrue(fieldType.tokenized());
|
||||||
|
assertFalse(fieldType.stored());
|
||||||
|
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
|
||||||
|
assertThat(fieldType.storeTermVectors(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
|
||||||
|
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
|
||||||
|
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNullConfigValuesFail() throws MapperParsingException {
|
||||||
|
Exception e = expectThrows(
|
||||||
|
MapperParsingException.class,
|
||||||
|
() -> createDocumentMapper(fieldMapping(b -> b.field("type", "patterned_text").field("meta", (String) null)))
|
||||||
|
);
|
||||||
|
assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [patterned_text] must not have a [null] value"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSimpleMerge() throws IOException {
|
||||||
|
XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "patterned_text"));
|
||||||
|
MapperService mapperService = createMapperService(startingMapping);
|
||||||
|
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class));
|
||||||
|
|
||||||
|
merge(mapperService, startingMapping);
|
||||||
|
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class));
|
||||||
|
|
||||||
|
XContentBuilder newField = mapping(b -> {
|
||||||
|
b.startObject("field").field("type", "patterned_text").startObject("meta").field("key", "value").endObject().endObject();
|
||||||
|
b.startObject("other_field").field("type", "keyword").endObject();
|
||||||
|
});
|
||||||
|
merge(mapperService, newField);
|
||||||
|
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class));
|
||||||
|
assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisabledSource() throws IOException {
|
||||||
|
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc");
|
||||||
|
{
|
||||||
|
mapping.startObject("properties");
|
||||||
|
{
|
||||||
|
mapping.startObject("foo");
|
||||||
|
{
|
||||||
|
mapping.field("type", "patterned_text");
|
||||||
|
}
|
||||||
|
mapping.endObject();
|
||||||
|
}
|
||||||
|
mapping.endObject();
|
||||||
|
|
||||||
|
mapping.startObject("_source");
|
||||||
|
{
|
||||||
|
mapping.field("enabled", false);
|
||||||
|
}
|
||||||
|
mapping.endObject();
|
||||||
|
}
|
||||||
|
mapping.endObject().endObject();
|
||||||
|
|
||||||
|
MapperService mapperService = createMapperService(mapping);
|
||||||
|
MappedFieldType ft = mapperService.fieldType("foo");
|
||||||
|
SearchExecutionContext context = createSearchExecutionContext(mapperService);
|
||||||
|
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7));
|
||||||
|
|
||||||
|
// Allowed even if source is disabled.
|
||||||
|
ft.phraseQuery(ts, 0, true, context);
|
||||||
|
ft.termQuery("a", context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Object generateRandomInputValue(MappedFieldType ft) {
|
||||||
|
assumeFalse("We don't have a way to assert things here", true);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
|
||||||
|
assumeFalse("We don't have a way to assert things here", true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean supportsIgnoreMalformed() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
|
||||||
|
assertFalse("patterned_text doesn't support ignoreMalformed", ignoreMalformed);
|
||||||
|
return new PatternedTextSyntheticSourceSupport();
|
||||||
|
}
|
||||||
|
|
||||||
|
static class PatternedTextSyntheticSourceSupport implements SyntheticSourceSupport {
|
||||||
|
@Override
|
||||||
|
public SyntheticSourceExample example(int maxValues) {
|
||||||
|
Tuple<String, String> v = generateValue();
|
||||||
|
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Tuple<String, String> generateValue() {
|
||||||
|
StringBuilder builder = new StringBuilder();
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.append(randomAlphaOfLength(5));
|
||||||
|
} else {
|
||||||
|
String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis());
|
||||||
|
builder.append(timestamp);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < randomIntBetween(0, 9); i++) {
|
||||||
|
builder.append(" ");
|
||||||
|
int rand = randomIntBetween(0, 4);
|
||||||
|
switch (rand) {
|
||||||
|
case 0 -> builder.append(randomAlphaOfLength(5));
|
||||||
|
case 1 -> builder.append(randomAlphanumericOfLength(5));
|
||||||
|
case 2 -> builder.append(UUID.randomUUID());
|
||||||
|
case 3 -> builder.append(randomIp(true));
|
||||||
|
case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String value = builder.toString();
|
||||||
|
return Tuple.tuple(value, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void mapping(XContentBuilder b) throws IOException {
|
||||||
|
b.field("type", "patterned_text");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDocValues() throws IOException {
|
||||||
|
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "patterned_text")));
|
||||||
|
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDocValuesSynthetic() throws IOException {
|
||||||
|
MapperService mapper = createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text")));
|
||||||
|
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void testSyntheticSourceKeepArrays() {
|
||||||
|
// This mapper does not allow arrays
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected IngestScriptSupport ingestScriptSupport() {
|
||||||
|
throw new AssumptionViolatedException("not supported");
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,194 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queries.intervals.Intervals;
|
||||||
|
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||||
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.RegexpQuery;
|
||||||
|
import org.apache.lucene.search.TermInSetQuery;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.TermRangeQuery;
|
||||||
|
import org.apache.lucene.tests.analysis.CannedTokenStream;
|
||||||
|
import org.apache.lucene.tests.analysis.Token;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.ElasticsearchException;
|
||||||
|
import org.elasticsearch.common.lucene.BytesRefs;
|
||||||
|
import org.elasticsearch.common.lucene.search.AutomatonQueries;
|
||||||
|
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
|
||||||
|
import org.elasticsearch.common.unit.Fuzziness;
|
||||||
|
import org.elasticsearch.index.mapper.FieldTypeTestCase;
|
||||||
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
|
import org.elasticsearch.index.mapper.extras.SourceIntervalsSource;
|
||||||
|
import org.hamcrest.Matchers;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class PatternedTextFieldTypeTests extends FieldTypeTestCase {
|
||||||
|
|
||||||
|
public void testTermQuery() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null));
|
||||||
|
assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermsQuery() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
List<BytesRef> terms = new ArrayList<>();
|
||||||
|
terms.add(new BytesRef("foo"));
|
||||||
|
terms.add(new BytesRef("123"));
|
||||||
|
assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "123"), null));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeQuery() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
assertEquals(
|
||||||
|
new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false),
|
||||||
|
ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT)
|
||||||
|
);
|
||||||
|
|
||||||
|
ElasticsearchException ee = expectThrows(
|
||||||
|
ElasticsearchException.class,
|
||||||
|
() -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
|
||||||
|
);
|
||||||
|
assertEquals(
|
||||||
|
"[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.",
|
||||||
|
ee.getMessage()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRegexpQuery() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT));
|
||||||
|
|
||||||
|
ElasticsearchException ee = expectThrows(
|
||||||
|
ElasticsearchException.class,
|
||||||
|
() -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
|
||||||
|
);
|
||||||
|
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFuzzyQuery() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
assertEquals(
|
||||||
|
new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)),
|
||||||
|
ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
|
||||||
|
);
|
||||||
|
|
||||||
|
ElasticsearchException ee = expectThrows(
|
||||||
|
ElasticsearchException.class,
|
||||||
|
() -> ft.fuzzyQuery(
|
||||||
|
"foo",
|
||||||
|
Fuzziness.AUTO,
|
||||||
|
randomInt(10) + 1,
|
||||||
|
randomInt(10) + 1,
|
||||||
|
randomBoolean(),
|
||||||
|
MOCK_CONTEXT_DISALLOW_EXPENSIVE
|
||||||
|
)
|
||||||
|
);
|
||||||
|
assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Query unwrapPositionalQuery(Query query) {
|
||||||
|
query = ((ConstantScoreQuery) query).getQuery();
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPhraseQuery() throws IOException {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("1", 4, 7));
|
||||||
|
Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT);
|
||||||
|
Query delegate = unwrapPositionalQuery(query);
|
||||||
|
assertEquals(new PhraseQuery("field", "a", "1").toString(), delegate.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMultiPhraseQuery() throws IOException {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("2", 0, 0, 3), new Token("c", 4, 7));
|
||||||
|
Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT);
|
||||||
|
Query delegate = unwrapPositionalQuery(query);
|
||||||
|
Query expected = new MultiPhraseQuery.Builder().add(new Term[] { new Term("field", "a"), new Term("field", "2") })
|
||||||
|
.add(new Term("field", "c"))
|
||||||
|
.build();
|
||||||
|
assertEquals(expected.toString(), delegate.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPhrasePrefixQuery() throws IOException {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7));
|
||||||
|
Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT);
|
||||||
|
Query delegate = unwrapPositionalQuery(query);
|
||||||
|
MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field");
|
||||||
|
expected.add(new Term[] { new Term("field", "a"), new Term("field", "b") });
|
||||||
|
expected.add(new Term("field", "c"));
|
||||||
|
assertEquals(expected.toString(), delegate.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTermIntervals() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);
|
||||||
|
assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
|
||||||
|
assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPrefixIntervals() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
|
||||||
|
assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
|
||||||
|
assertEquals(
|
||||||
|
Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
|
||||||
|
((SourceIntervalsSource) prefixIntervals).getIntervalsSource()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWildcardIntervals() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
|
||||||
|
assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
|
||||||
|
assertEquals(
|
||||||
|
Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
|
||||||
|
((SourceIntervalsSource) wildcardIntervals).getIntervalsSource()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRegexpIntervals() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
|
||||||
|
assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
|
||||||
|
assertEquals(
|
||||||
|
Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
|
||||||
|
((SourceIntervalsSource) regexpIntervals).getIntervalsSource()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testFuzzyIntervals() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
|
||||||
|
assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRangeIntervals() {
|
||||||
|
MappedFieldType ft = new PatternedTextFieldType("field");
|
||||||
|
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
|
||||||
|
assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
|
||||||
|
assertEquals(
|
||||||
|
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
|
||||||
|
((SourceIntervalsSource) rangeIntervals).getIntervalsSource()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,101 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License
|
||||||
|
* 2.0; you may not use this file except in compliance with the Elastic License
|
||||||
|
* 2.0.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.xpack.logsdb.patternedtext;
|
||||||
|
|
||||||
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
import org.hamcrest.Matchers;
|
||||||
|
|
||||||
|
public class PatternedTextValueProcessorTests extends ESTestCase {
|
||||||
|
|
||||||
|
public void testEmpty() {
|
||||||
|
String text = "";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals(text, parts.template());
|
||||||
|
assertTrue(parts.args().isEmpty());
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWhitespace() {
|
||||||
|
String text = " ";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals(text, parts.template());
|
||||||
|
assertTrue(parts.args().isEmpty());
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWithoutTimestamp() {
|
||||||
|
String text = " some text with arg1 and 2arg2 and 333 ";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals(" some text with %W and %W and %W ", parts.template());
|
||||||
|
assertThat(parts.args(), Matchers.contains("arg1", "2arg2", "333"));
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWithTimestamp() {
|
||||||
|
String text = " 2021-04-13T13:51:38.000Z some text with arg1 and arg2 and arg3";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals(" %W some text with %W and %W and %W", parts.template());
|
||||||
|
assertThat(parts.args(), Matchers.contains("2021-04-13T13:51:38.000Z", "arg1", "arg2", "arg3"));
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWithDateSpaceTime() {
|
||||||
|
String text = " 2021-04-13 13:51:38 some text with arg1 and arg2 and arg3";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals(" %W %W some text with %W and %W and %W", parts.template());
|
||||||
|
assertThat(parts.args(), Matchers.contains("2021-04-13", "13:51:38", "arg1", "arg2", "arg3"));
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMalformedDate() {
|
||||||
|
String text = "2020/09/06 10:11:38 Using namespace: kubernetes-dashboard' | HTTP status: 400, message: [1:395]";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals("%W %W Using namespace: kubernetes-dashboard' | HTTP status: %W message: [%W]", parts.template());
|
||||||
|
assertThat(parts.args(), Matchers.contains("2020/09/06", "10:11:38", "400,", "1:395"));
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testUUID() {
|
||||||
|
String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: [18be2355-6306-4a00-9db9-f0696aa1a225] "
|
||||||
|
+ "some text with arg1 and arg2";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals("[%W][%W][%W][action_controller][INFO]: [%W] some text with %W and %W", parts.template());
|
||||||
|
assertThat(
|
||||||
|
parts.args(),
|
||||||
|
Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "18be2355-6306-4a00-9db9-f0696aa1a225", "arg1", "arg2")
|
||||||
|
);
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testIP() {
|
||||||
|
String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: from 94.168.152.150 and arg1";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals("[%W][%W][%W][action_controller][INFO]: from %W and %W", parts.template());
|
||||||
|
assertThat(parts.args(), Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "94.168.152.150", "arg1"));
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSecondDate() {
|
||||||
|
String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: at 2020-08-18 00:58:56 +0000 and arg1";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals("[%W][%W][%W][action_controller][INFO]: at %W %W %W and %W", parts.template());
|
||||||
|
assertThat(
|
||||||
|
parts.args(),
|
||||||
|
Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "2020-08-18", "00:58:56", "+0000", "arg1")
|
||||||
|
);
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWithTimestamp1() {
|
||||||
|
String text = "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]";
|
||||||
|
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
|
||||||
|
assertEquals("[%W] Found %W errors for service [%W]", parts.template());
|
||||||
|
assertThat(parts.args(), Matchers.contains("2020-08-18T00:58:56", "123", "cheddar1"));
|
||||||
|
assertEquals(text, PatternedTextValueProcessor.merge(parts));
|
||||||
|
}
|
||||||
|
}
|
|
@ -27,6 +27,7 @@ public class LogsdbTestSuiteIT extends ESClientYamlSuiteTestCase {
|
||||||
|
|
||||||
@ClassRule
|
@ClassRule
|
||||||
public static final ElasticsearchCluster cluster = ElasticsearchCluster.local()
|
public static final ElasticsearchCluster cluster = ElasticsearchCluster.local()
|
||||||
|
.module("logsdb")
|
||||||
.distribution(DistributionType.DEFAULT)
|
.distribution(DistributionType.DEFAULT)
|
||||||
.user(USER, PASS, "superuser", false)
|
.user(USER, PASS, "superuser", false)
|
||||||
.setting("xpack.security.autoconfiguration.enabled", "false")
|
.setting("xpack.security.autoconfiguration.enabled", "false")
|
||||||
|
|
|
@ -0,0 +1,333 @@
|
||||||
|
setup:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
mappings:
|
||||||
|
properties:
|
||||||
|
foo:
|
||||||
|
type: patterned_text
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
id: "1"
|
||||||
|
body: {}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
id: "2"
|
||||||
|
body: { "foo": "Found 5 errors for service [cheddar1]" }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
id: "3"
|
||||||
|
body: { "foo": "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]" }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
id: "4"
|
||||||
|
body: { "foo": "Found some errors for cheddar data service" }
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.refresh: {}
|
||||||
|
|
||||||
|
---
|
||||||
|
Field caps:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
field_caps:
|
||||||
|
index: test
|
||||||
|
fields: [ foo ]
|
||||||
|
|
||||||
|
- match: { fields.foo.text.searchable: true }
|
||||||
|
- match: { fields.foo.text.aggregatable: false }
|
||||||
|
|
||||||
|
---
|
||||||
|
Exist query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
exists:
|
||||||
|
field: foo
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Match query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
foo: 5
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 1 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Match Phrase query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match_phrase:
|
||||||
|
foo: "5 errors"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 1 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Match Phrase Prefix query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match_phrase_prefix:
|
||||||
|
foo: "5 err"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 1 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
Query String query with phrase:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
query_string:
|
||||||
|
query: '"5 errors"'
|
||||||
|
default_field: "foo"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 1 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
|
||||||
|
---
|
||||||
|
Regexp query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
regexp:
|
||||||
|
foo: "ser.*ce"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Wildcard query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
wildcard:
|
||||||
|
foo: "ser*ce"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Prefix query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
prefix:
|
||||||
|
foo: "ser"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Fuzzy query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
fuzzy:
|
||||||
|
foo: "errars"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
- match: { "hits.hits.0._score": 1.0 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Span query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
catch: bad_request
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
span_term:
|
||||||
|
foo: errors
|
||||||
|
|
||||||
|
---
|
||||||
|
Term intervals query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
intervals:
|
||||||
|
foo:
|
||||||
|
match:
|
||||||
|
query: "for service"
|
||||||
|
max_gaps: 1
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 2 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Prefix intervals query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
intervals:
|
||||||
|
foo:
|
||||||
|
prefix:
|
||||||
|
prefix: "ser"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Wildcard intervals query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
intervals:
|
||||||
|
foo:
|
||||||
|
wildcard:
|
||||||
|
pattern: "*edda*"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Fuzzy intervals query:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
intervals:
|
||||||
|
foo:
|
||||||
|
fuzzy:
|
||||||
|
term: "servace"
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 3 }
|
||||||
|
|
||||||
|
---
|
||||||
|
Wildcard highlighting:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
foo: "5"
|
||||||
|
highlight:
|
||||||
|
fields:
|
||||||
|
"*": {}
|
||||||
|
|
||||||
|
- match: { hits.total.value: 1 }
|
||||||
|
- match: { hits.hits.0._source.foo: "Found 5 errors for service [cheddar1]" }
|
||||||
|
- match: { hits.hits.0.highlight.foo.0: "Found <em>5</em> errors for service [cheddar1]" }
|
||||||
|
|
||||||
|
---
|
||||||
|
tsdb:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: tsdb_test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
index:
|
||||||
|
mode: time_series
|
||||||
|
routing_path: [ dimension ]
|
||||||
|
time_series:
|
||||||
|
start_time: 2000-01-01T00:00:00Z
|
||||||
|
end_time: 2099-12-31T23:59:59Z
|
||||||
|
mappings:
|
||||||
|
properties:
|
||||||
|
dimension:
|
||||||
|
type: keyword
|
||||||
|
time_series_dimension: true
|
||||||
|
foo:
|
||||||
|
type: patterned_text
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: tsdb_test
|
||||||
|
refresh: true
|
||||||
|
body:
|
||||||
|
"@timestamp": "2000-01-01T00:00:00Z"
|
||||||
|
dimension: "a"
|
||||||
|
foo: "Apache Lucene powers Elasticsearch"
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: tsdb_test
|
||||||
|
- match: { "hits.total.value": 1 }
|
||||||
|
- match:
|
||||||
|
hits.hits.0._source:
|
||||||
|
"@timestamp" : "2000-01-01T00:00:00.000Z"
|
||||||
|
"dimension" : "a"
|
||||||
|
foo: "Apache Lucene powers Elasticsearch"
|
||||||
|
|
||||||
|
---
|
||||||
|
Multiple values:
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test1
|
||||||
|
body:
|
||||||
|
mappings:
|
||||||
|
properties:
|
||||||
|
foo:
|
||||||
|
type: patterned_text
|
||||||
|
- do:
|
||||||
|
catch: bad_request
|
||||||
|
index:
|
||||||
|
index: test1
|
||||||
|
id: "1"
|
||||||
|
body: {
|
||||||
|
"foo": [
|
||||||
|
"Found 5 errors for service [cheddar1]",
|
||||||
|
"[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,76 @@
|
||||||
|
simple:
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
index:
|
||||||
|
mapping.source.mode: synthetic
|
||||||
|
mappings:
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: patterned_text
|
||||||
|
|
||||||
|
- do:
|
||||||
|
bulk:
|
||||||
|
index: test
|
||||||
|
refresh: true
|
||||||
|
body:
|
||||||
|
- '{ "create": { } }'
|
||||||
|
- '{ "id": 1, "message": "some log message with no arg" }'
|
||||||
|
- '{ "create": { } }'
|
||||||
|
- '{ "id": 2, "message": "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" }'
|
||||||
|
- '{ "create": { } }'
|
||||||
|
- '{ "id": 3, "message": "some log message with no arg" }'
|
||||||
|
- '{ "create": { } }'
|
||||||
|
- '{ "id": 4, "message": "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" }'
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: test
|
||||||
|
sort: id
|
||||||
|
|
||||||
|
- match: { hits.hits.0._source.message: "some log message with no arg" }
|
||||||
|
- match: { hits.hits.1._source.message: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" }
|
||||||
|
- match: { hits.hits.2._source.message: "some log message with no arg" }
|
||||||
|
- match: { hits.hits.3._source.message: "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" }
|
||||||
|
|
||||||
|
---
|
||||||
|
synthetic_source with copy_to:
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: synthetic_source_test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
index:
|
||||||
|
mapping.source.mode: synthetic
|
||||||
|
mappings:
|
||||||
|
properties:
|
||||||
|
foo:
|
||||||
|
type: patterned_text
|
||||||
|
copy_to: copy
|
||||||
|
copy:
|
||||||
|
type: keyword
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: synthetic_source_test
|
||||||
|
id: "1"
|
||||||
|
refresh: true
|
||||||
|
body:
|
||||||
|
foo: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9"
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
index: synthetic_source_test
|
||||||
|
body:
|
||||||
|
fields: ["copy"]
|
||||||
|
|
||||||
|
- match: { "hits.total.value": 1 }
|
||||||
|
- match:
|
||||||
|
hits.hits.0._source.foo: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9"
|
||||||
|
- match:
|
||||||
|
hits.hits.0.fields.copy.0: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9"
|
Loading…
Add table
Add a link
Reference in a new issue