Simple version of patterned_text with a single doc value for arguments (#129292)

Initial version of patterned_text mapper. Behaves similarly to match_only_text. This version uses a single SortedSetDocValues for a template and another for arguments. It splits the message by delimiters, the classifies a token as an argument if it contains a digit. All arguments are concatenated and inserted as a single doc value. A single inverted index is used, without positions. Phrase queries are still possible, using the SourceConfirmedTextQuery, but are not fast.
This commit is contained in:
Parker Timmins 2025-06-25 21:31:32 -05:00 committed by GitHub
parent 2df9dd42fb
commit 9aaba25d58
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 2045 additions and 4 deletions

View file

@ -14,4 +14,6 @@ module org.elasticsearch.mapper.extras {
requires org.apache.lucene.core; requires org.apache.lucene.core;
requires org.apache.lucene.memory; requires org.apache.lucene.memory;
requires org.apache.lucene.queries; requires org.apache.lucene.queries;
exports org.elasticsearch.index.mapper.extras;
} }

View file

@ -173,7 +173,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper {
super(name, true, false, false, tsi, meta); super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldType(name, isSyntheticSource); this.textFieldType = new TextFieldType(name, isSyntheticSource);
this.originalName = isSyntheticSource ? name() + "._original" : null; this.originalName = isSyntheticSource ? name + "._original" : null;
} }
public MatchOnlyTextFieldType(String name) { public MatchOnlyTextFieldType(String name) {

View file

@ -124,7 +124,8 @@ public class HighlightPhase implements FetchSubPhase {
if (fieldNameContainsWildcards) { if (fieldNameContainsWildcards) {
if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false
&& fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false && fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false
&& fieldType.typeName().equals("match_only_text") == false) { && fieldType.typeName().equals("match_only_text") == false
&& fieldType.typeName().equals("patterned_text") == false) {
continue; continue;
} }
if (highlighter.canHighlight(fieldType) == false) { if (highlighter.canHighlight(fieldType) == false) {

View file

@ -24,12 +24,13 @@ base {
restResources { restResources {
restApi { restApi {
include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query' include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query', 'field_caps'
} }
} }
dependencies { dependencies {
compileOnly project(path: xpackModule('core')) compileOnly project(path: xpackModule('core'))
implementation project(':modules:mapper-extras')
testImplementation project(':modules:data-streams') testImplementation project(':modules:data-streams')
testImplementation(testArtifact(project(xpackModule('core')))) testImplementation(testArtifact(project(xpackModule('core'))))
javaRestTestImplementation(testArtifact(project(xpackModule('spatial')))) javaRestTestImplementation(testArtifact(project(xpackModule('spatial'))))

View file

@ -12,21 +12,27 @@ import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettingProvider; import org.elasticsearch.index.IndexSettingProvider;
import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.mapper.Mapper;
import org.elasticsearch.license.LicenseService; import org.elasticsearch.license.LicenseService;
import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.license.XPackLicenseState;
import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.ActionPlugin;
import org.elasticsearch.plugins.MapperPlugin;
import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.xpack.core.XPackPlugin; import org.elasticsearch.xpack.core.XPackPlugin;
import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction; import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction;
import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction; import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction;
import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldMapper;
import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldType;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Map;
import static java.util.Collections.singletonMap;
import static org.elasticsearch.xpack.logsdb.LogsdbLicenseService.FALLBACK_SETTING; import static org.elasticsearch.xpack.logsdb.LogsdbLicenseService.FALLBACK_SETTING;
public class LogsDBPlugin extends Plugin implements ActionPlugin { public class LogsDBPlugin extends Plugin implements ActionPlugin, MapperPlugin {
private final Settings settings; private final Settings settings;
private final LogsdbLicenseService licenseService; private final LogsdbLicenseService licenseService;
@ -98,6 +104,15 @@ public class LogsDBPlugin extends Plugin implements ActionPlugin {
return actions; return actions;
} }
@Override
public Map<String, Mapper.TypeParser> getMappers() {
if (PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled()) {
return singletonMap(PatternedTextFieldType.CONTENT_TYPE, PatternedTextFieldMapper.PARSER);
} else {
return Map.of();
}
}
protected XPackLicenseState getLicenseState() { protected XPackLicenseState getLicenseState() {
return XPackPlugin.getSharedLicenseState(); return XPackPlugin.getSharedLicenseState();
} }

View file

@ -0,0 +1,88 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
public class PatternedTextDocValues extends BinaryDocValues {
private final SortedSetDocValues templateDocValues;
private final SortedSetDocValues argsDocValues;
PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) {
this.templateDocValues = templateDocValues;
this.argsDocValues = argsDocValues;
}
static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException {
SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName);
if (templateDocValues.getValueCount() == 0) {
return null;
}
SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName);
return new PatternedTextDocValues(templateDocValues, argsDocValues);
}
private String getNextStringValue() throws IOException {
assert templateDocValues.docValueCount() == 1;
String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString();
int argsCount = PatternedTextValueProcessor.countArgs(template);
if (argsCount > 0) {
assert argsDocValues.docValueCount() == 1;
var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd());
var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString());
return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args));
} else {
return template;
}
}
@Override
public BytesRef binaryValue() throws IOException {
return new BytesRef(getNextStringValue());
}
@Override
public boolean advanceExact(int i) throws IOException {
argsDocValues.advanceExact(i);
// If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc
return templateDocValues.advanceExact(i);
}
@Override
public int docID() {
return templateDocValues.docID();
}
@Override
public int nextDoc() throws IOException {
int templateNext = templateDocValues.nextDoc();
var argsAdvance = argsDocValues.advance(templateNext);
assert argsAdvance >= templateNext;
return templateNext;
}
@Override
public int advance(int i) throws IOException {
int templateAdvance = templateDocValues.advance(i);
var argsAdvance = argsDocValues.advance(templateAdvance);
assert argsAdvance >= templateAdvance;
return templateAdvance;
}
@Override
public long cost() {
return templateDocValues.cost() + argsDocValues.cost();
}
}

View file

@ -0,0 +1,176 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.TextParams;
import org.elasticsearch.index.mapper.TextSearchInfo;
import java.io.IOException;
import java.util.Map;
/**
* A {@link FieldMapper} that assigns every document the same value.
*/
public class PatternedTextFieldMapper extends FieldMapper {
public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text");
public static class Defaults {
public static final FieldType FIELD_TYPE;
static {
final FieldType ft = new FieldType();
ft.setTokenized(true);
ft.setStored(false);
ft.setStoreTermVectors(false);
ft.setOmitNorms(true);
ft.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE = freezeAndDeduplicateFieldType(ft);
}
}
public static class Builder extends FieldMapper.Builder {
private final IndexVersion indexCreatedVersion;
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
private final TextParams.Analyzers analyzers;
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> ((PatternedTextFieldMapper) m).indexAnalyzer,
m -> ((PatternedTextFieldMapper) m).positionIncrementGap,
indexCreatedVersion
);
}
@Override
protected Parameter<?>[] getParameters() {
return new Parameter<?>[] { meta };
}
private PatternedTextFieldType buildFieldType(MapperBuilderContext context) {
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
return new PatternedTextFieldType(
context.buildFullName(leafName()),
tsi,
indexAnalyzer,
context.isSourceSynthetic(),
meta.getValue()
);
}
@Override
public PatternedTextFieldMapper build(MapperBuilderContext context) {
return new PatternedTextFieldMapper(leafName(), buildFieldType(context), builderParams(this, context), this);
}
}
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
private final IndexVersion indexCreatedVersion;
private final IndexAnalyzers indexAnalyzers;
private final NamedAnalyzer indexAnalyzer;
private final int positionIncrementGap;
private final FieldType fieldType;
private PatternedTextFieldMapper(
String simpleName,
PatternedTextFieldType mappedFieldPatternedTextFieldType,
BuilderParams builderParams,
Builder builder
) {
super(simpleName, mappedFieldPatternedTextFieldType, builderParams);
assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized();
assert mappedFieldPatternedTextFieldType.hasDocValues() == false;
this.fieldType = Defaults.FIELD_TYPE;
this.indexCreatedVersion = builder.indexCreatedVersion;
this.indexAnalyzers = builder.analyzers.indexAnalyzers;
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
}
@Override
public Map<String, NamedAnalyzer> indexAnalyzers() {
return Map.of(mappedFieldType.name(), indexAnalyzer);
}
@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
}
@Override
protected void parseCreateField(DocumentParserContext context) throws IOException {
final String value = context.parser().textOrNull();
if (value == null) {
return;
}
var existingValue = context.doc().getField(fieldType().name());
if (existingValue != null) {
throw new IllegalArgumentException("Multiple values are not allowed for field [" + fieldType().name() + "].");
}
// Parse template and args.
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value);
// Add index on original value
context.doc().add(new Field(fieldType().name(), value, fieldType));
// Add template doc_values
context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template())));
// Add args doc_values
if (parts.args().isEmpty() == false) {
String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts);
context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs)));
}
}
@Override
protected String contentType() {
return PatternedTextFieldType.CONTENT_TYPE;
}
@Override
public PatternedTextFieldType fieldType() {
return (PatternedTextFieldType) super.fieldType();
}
@Override
protected SyntheticSourceSupport syntheticSourceSupport() {
return new SyntheticSourceSupport.Native(
() -> new CompositeSyntheticFieldLoader(
leafName(),
fullPath(),
new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName())
)
);
}
}

View file

@ -0,0 +1,270 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOFunction;
import org.elasticsearch.common.CheckedIntFunction;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData;
import org.elasticsearch.index.mapper.BlockDocValuesReader;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.SourceValueFetcher;
import org.elasticsearch.index.mapper.StringFieldType;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.mapper.extras.SourceConfirmedTextQuery;
import org.elasticsearch.index.mapper.extras.SourceIntervalsSource;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.script.field.KeywordDocValuesField;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
import org.elasticsearch.search.lookup.SourceProvider;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class PatternedTextFieldType extends StringFieldType {
private static final String TEMPLATE_SUFFIX = ".template";
private static final String ARGS_SUFFIX = ".args";
public static final String CONTENT_TYPE = "patterned_text";
private final Analyzer indexAnalyzer;
private final TextFieldMapper.TextFieldType textFieldType;
PatternedTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, Map<String, String> meta) {
// Though this type is based on doc_values, hasDocValues is set to false as the patterned_text type is not aggregatable.
// This does not stop its child .template type from being aggregatable.
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldMapper.TextFieldType(name, isSyntheticSource);
}
PatternedTextFieldType(String name) {
this(
name,
new TextSearchInfo(PatternedTextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
Lucene.STANDARD_ANALYZER,
false,
Collections.emptyMap()
);
}
@Override
public String typeName() {
return CONTENT_TYPE;
}
@Override
public String familyTypeName() {
return TextFieldMapper.CONTENT_TYPE;
}
@Override
public ValueFetcher valueFetcher(SearchExecutionContext context, String format) {
return SourceValueFetcher.toString(name(), context, format);
}
private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOException>> getValueFetcherProvider(
SearchExecutionContext searchExecutionContext
) {
return context -> {
ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null);
SourceProvider sourceProvider = searchExecutionContext.lookup();
valueFetcher.setNextReader(context);
return docID -> {
try {
return valueFetcher.fetchValues(sourceProvider.getSource(context, docID), docID, new ArrayList<>());
} catch (IOException e) {
throw new UncheckedIOException(e);
}
};
};
}
private Query sourceConfirmedQuery(Query query, SearchExecutionContext context) {
// Disable scoring
return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer));
}
private IntervalsSource toIntervalsSource(IntervalsSource source, Query approximation, SearchExecutionContext searchExecutionContext) {
return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer);
}
@Override
public Query termQuery(Object query, SearchExecutionContext context) {
// Disable scoring
return new ConstantScoreQuery(super.termQuery(query, context));
}
@Override
public Query fuzzyQuery(
Object value,
Fuzziness fuzziness,
int prefixLength,
int maxExpansions,
boolean transpositions,
SearchExecutionContext context,
MultiTermQuery.RewriteMethod rewriteMethod
) {
// Disable scoring
return new ConstantScoreQuery(
super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context, rewriteMethod)
);
}
@Override
public Query existsQuery(SearchExecutionContext context) {
return new FieldExistsQuery(templateFieldName());
}
@Override
public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) {
return toIntervalsSource(Intervals.term(term), new TermQuery(new Term(name(), term)), context);
}
@Override
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
return toIntervalsSource(
Intervals.prefix(term, IndexSearcher.getMaxClauseCount()),
new PrefixQuery(new Term(name(), term)),
context
);
}
@Override
public IntervalsSource fuzzyIntervals(
String term,
int maxDistance,
int prefixLength,
boolean transpositions,
SearchExecutionContext context
) {
FuzzyQuery fuzzyQuery = new FuzzyQuery(
new Term(name(), term),
maxDistance,
prefixLength,
IndexSearcher.getMaxClauseCount(),
transpositions,
MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE
);
IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term);
return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context);
}
@Override
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
return toIntervalsSource(
Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()),
new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be?
context
);
}
@Override
public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) {
return toIntervalsSource(
Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()),
new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be?
context
);
}
@Override
public IntervalsSource rangeIntervals(
BytesRef lowerTerm,
BytesRef upperTerm,
boolean includeLower,
boolean includeUpper,
SearchExecutionContext context
) {
return toIntervalsSource(
Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()),
new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be?
context
);
}
@Override
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext)
throws IOException {
final Query textQuery = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext);
return sourceConfirmedQuery(textQuery, queryShardContext);
}
@Override
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext queryShardContext)
throws IOException {
final Query textQuery = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext);
return sourceConfirmedQuery(textQuery, queryShardContext);
}
@Override
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext)
throws IOException {
final Query textQuery = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext);
return sourceConfirmedQuery(textQuery, queryShardContext);
}
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name());
}
@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
if (fieldDataContext.fielddataOperation() != FielddataOperation.SCRIPT) {
throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations");
}
if (textFieldType.isSyntheticSource()) {
return new PatternedTextIndexFieldData.Builder(this);
}
return new SourceValueFetcherSortedBinaryIndexFieldData.Builder(
name(),
CoreValuesSourceType.KEYWORD,
SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())),
fieldDataContext.lookupSupplier().get(),
KeywordDocValuesField::new
);
}
String templateFieldName() {
return name() + TEMPLATE_SUFFIX;
}
String argsFieldName() {
return name() + ARGS_SUFFIX;
}
}

View file

@ -0,0 +1,134 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
import org.elasticsearch.index.fielddata.LeafFieldData;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.indices.breaker.CircuitBreakerService;
import org.elasticsearch.script.field.DocValuesScriptFieldFactory;
import org.elasticsearch.script.field.KeywordDocValuesField;
import org.elasticsearch.script.field.ToScriptFieldFactory;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.MultiValueMode;
import org.elasticsearch.search.aggregations.support.ValuesSourceType;
import org.elasticsearch.search.sort.BucketedSort;
import org.elasticsearch.search.sort.SortOrder;
import java.io.IOException;
import java.io.UncheckedIOException;
public class PatternedTextIndexFieldData implements IndexFieldData<LeafFieldData> {
private final PatternedTextFieldType fieldType;
static class Builder implements IndexFieldData.Builder {
final PatternedTextFieldType fieldType;
Builder(PatternedTextFieldType fieldType) {
this.fieldType = fieldType;
}
public PatternedTextIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
return new PatternedTextIndexFieldData(fieldType);
}
}
PatternedTextIndexFieldData(PatternedTextFieldType fieldType) {
this.fieldType = fieldType;
}
@Override
public String getFieldName() {
return fieldType.name();
}
@Override
public ValuesSourceType getValuesSourceType() {
return null;
}
@Override
public LeafFieldData load(LeafReaderContext context) {
try {
return loadDirect(context);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
@Override
public LeafFieldData loadDirect(LeafReaderContext context) throws IOException {
LeafReader leafReader = context.reader();
PatternedTextDocValues docValues = PatternedTextDocValues.from(
leafReader,
fieldType.templateFieldName(),
fieldType.argsFieldName()
);
return new LeafFieldData() {
final ToScriptFieldFactory<SortedBinaryDocValues> factory = KeywordDocValuesField::new;
@Override
public DocValuesScriptFieldFactory getScriptFieldFactory(String name) {
return factory.getScriptFieldFactory(getBytesValues(), name);
}
@Override
public SortedBinaryDocValues getBytesValues() {
return new SortedBinaryDocValues() {
@Override
public boolean advanceExact(int doc) throws IOException {
return docValues.advanceExact(doc);
}
@Override
public int docValueCount() {
return 1;
}
@Override
public BytesRef nextValue() throws IOException {
return docValues.binaryValue();
}
};
}
@Override
public long ramBytesUsed() {
return 1L;
}
};
}
@Override
public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
throw new IllegalArgumentException("not supported for source patterned text field type");
}
@Override
public BucketedSort newBucketedSort(
BigArrays bigArrays,
Object missingValue,
MultiValueMode sortMode,
XFieldComparatorSource.Nested nested,
SortOrder sortOrder,
DocValueFormat format,
int bucketSize,
BucketedSort.ExtraData extra
) {
throw new IllegalArgumentException("only supported on numeric fields");
}
}

View file

@ -0,0 +1,86 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.search.DocIdSetIterator;
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer {
private final String name;
private final String templateFieldName;
private final String argsFieldName;
private PatternedTextSyntheticFieldLoader loader;
PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName) {
this.name = name;
this.templateFieldName = templateFieldName;
this.argsFieldName = argsFieldName;
}
@Override
public long valueCount() {
return loader != null && loader.hasValue() ? 1 : 0;
}
@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName);
if (docValues == null) {
return null;
}
loader = new PatternedTextSyntheticFieldLoader(docValues);
return loader;
}
@Override
public boolean hasValue() {
return loader != null && loader.hasValue();
}
@Override
public void write(XContentBuilder b) throws IOException {
if (loader != null) {
loader.write(b);
}
}
@Override
public String fieldName() {
return name;
}
private static class PatternedTextSyntheticFieldLoader implements DocValuesLoader {
private final PatternedTextDocValues docValues;
private boolean hasValue = false;
PatternedTextSyntheticFieldLoader(PatternedTextDocValues docValues) {
this.docValues = docValues;
}
public boolean hasValue() {
assert docValues.docID() != DocIdSetIterator.NO_MORE_DOCS;
return hasValue;
}
@Override
public boolean advanceToDoc(int docId) throws IOException {
return hasValue = docValues.advanceExact(docId);
}
public void write(XContentBuilder b) throws IOException {
if (hasValue) {
b.value(docValues.binaryValue().utf8ToString());
}
}
}
}

View file

@ -0,0 +1,105 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class PatternedTextValueProcessor {
private static final String TEXT_ARG_PLACEHOLDER = "%W";
private static final String DELIMITER = "[\\s\\[\\]]";
private static final String SPACE = " ";
record Parts(String template, List<String> args) {}
static Parts split(String text) {
StringBuilder template = new StringBuilder();
List<String> args = new ArrayList<>();
String[] tokens = text.split(DELIMITER);
int textIndex = 0;
for (String token : tokens) {
if (token.isEmpty()) {
if (textIndex < text.length() - 1) {
template.append(text.charAt(textIndex++));
}
continue;
}
if (isArg(token)) {
args.add(token);
template.append(TEXT_ARG_PLACEHOLDER);
} else {
template.append(token);
}
textIndex += token.length();
if (textIndex < text.length()) {
template.append(text.charAt(textIndex++));
}
}
while (textIndex < text.length()) {
template.append(text.charAt(textIndex++));
}
return new Parts(template.toString(), args);
}
private static boolean isArg(String text) {
for (int i = 0; i < text.length(); i++) {
if (Character.isDigit(text.charAt(i))) {
return true;
}
}
return false;
}
static String merge(Parts parts) {
StringBuilder builder = new StringBuilder();
String[] templateParts = parts.template.split(DELIMITER);
int i = 0;
int templateIndex = 0;
for (String part : templateParts) {
if (part.equals(TEXT_ARG_PLACEHOLDER)) {
builder.append(parts.args.get(i++));
templateIndex += TEXT_ARG_PLACEHOLDER.length();
} else if (part.isEmpty() == false) {
builder.append(part);
templateIndex += part.length();
}
if (templateIndex < parts.template.length()) {
builder.append(parts.template.charAt(templateIndex++));
}
}
assert i == parts.args.size() : "expected " + i + " but got " + parts.args.size();
assert builder.toString().contains(TEXT_ARG_PLACEHOLDER) == false : builder.toString();
while (templateIndex < parts.template.length()) {
builder.append(parts.template.charAt(templateIndex++));
}
return builder.toString();
}
static String encodeRemainingArgs(Parts parts) {
return String.join(SPACE, parts.args);
}
static List<String> decodeRemainingArgs(String mergedArgs) {
return Arrays.asList(mergedArgs.split(SPACE));
}
static int countArgs(String template) {
int count = 0;
for (int i = 0; i < template.length() - 1; i++) {
if (template.charAt(i) == '%') {
char next = template.charAt(i + 1);
if (next == 'W') {
count++;
i++;
}
}
}
return count;
}
}

View file

@ -0,0 +1,174 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
public class PatternTextDocValuesTests extends ESTestCase {
private static PatternedTextDocValues makeDocValueSparseArgs() {
var template = new SimpleSortedSetDocValues("%W dog", "cat", "%W mouse %W", "hat %W");
var args = new SimpleSortedSetDocValues("1", null, "2 3", "4");
return new PatternedTextDocValues(template, args);
}
private static PatternedTextDocValues makeDocValuesDenseArgs() {
var template = new SimpleSortedSetDocValues("%W moose", "%W goose %W", "%W mouse %W", "%W house");
var args = new SimpleSortedSetDocValues("1", "4 5", "2 3", "7");
return new PatternedTextDocValues(template, args);
}
private static PatternedTextDocValues makeDocValueMissingValues() {
var template = new SimpleSortedSetDocValues("%W cheddar", "cat", null, "%W cheese");
var args = new SimpleSortedSetDocValues("1", null, null, "4");
return new PatternedTextDocValues(template, args);
}
public void testNextDoc() throws IOException {
var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs();
assertEquals(-1, docValues.docID());
assertEquals(0, docValues.nextDoc());
assertEquals(1, docValues.nextDoc());
assertEquals(2, docValues.nextDoc());
assertEquals(3, docValues.nextDoc());
assertEquals(NO_MORE_DOCS, docValues.nextDoc());
}
public void testNextDocMissing() throws IOException {
var docValues = makeDocValueMissingValues();
assertEquals(-1, docValues.docID());
assertEquals(0, docValues.nextDoc());
assertEquals(1, docValues.nextDoc());
assertEquals(3, docValues.nextDoc());
assertEquals(NO_MORE_DOCS, docValues.nextDoc());
}
public void testAdvance1() throws IOException {
var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs();
assertEquals(-1, docValues.docID());
assertEquals(0, docValues.nextDoc());
assertEquals(1, docValues.advance(1));
assertEquals(2, docValues.advance(2));
assertEquals(3, docValues.advance(3));
assertEquals(NO_MORE_DOCS, docValues.advance(4));
}
public void testAdvanceFarther() throws IOException {
var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs();
assertEquals(2, docValues.advance(2));
// repeats says on value
assertEquals(2, docValues.advance(2));
}
public void testAdvanceSkipsValuesIfMissing() throws IOException {
var docValues = makeDocValueMissingValues();
assertEquals(3, docValues.advance(2));
}
public void testAdvanceExactMissing() throws IOException {
var docValues = makeDocValueMissingValues();
assertTrue(docValues.advanceExact(1));
assertFalse(docValues.advanceExact(2));
assertEquals(3, docValues.docID());
}
public void testValueAll() throws IOException {
var docValues = makeDocValuesDenseArgs();
assertEquals(0, docValues.nextDoc());
assertEquals("1 moose", docValues.binaryValue().utf8ToString());
assertEquals(1, docValues.nextDoc());
assertEquals("4 goose 5", docValues.binaryValue().utf8ToString());
assertEquals(2, docValues.nextDoc());
assertEquals("2 mouse 3", docValues.binaryValue().utf8ToString());
assertEquals(3, docValues.nextDoc());
assertEquals("7 house", docValues.binaryValue().utf8ToString());
}
public void testValueMissing() throws IOException {
var docValues = makeDocValueMissingValues();
assertEquals(0, docValues.nextDoc());
assertEquals("1 cheddar", docValues.binaryValue().utf8ToString());
assertEquals(1, docValues.nextDoc());
assertEquals("cat", docValues.binaryValue().utf8ToString());
assertEquals(3, docValues.nextDoc());
assertEquals("4 cheese", docValues.binaryValue().utf8ToString());
}
static class SimpleSortedSetDocValues extends SortedSetDocValues {
private final List<String> ordToValues;
private final List<Integer> docToOrds;
private int currDoc = -1;
// Single value for each docId, null if no value for a docId
SimpleSortedSetDocValues(String... docIdToValue) {
ordToValues = Arrays.stream(docIdToValue).filter(Objects::nonNull).collect(Collectors.toSet()).stream().sorted().toList();
docToOrds = Arrays.stream(docIdToValue).map(v -> v == null ? null : ordToValues.indexOf(v)).toList();
}
@Override
public long nextOrd() {
return docToOrds.get(currDoc);
}
@Override
public int docValueCount() {
return 1;
}
@Override
public BytesRef lookupOrd(long ord) {
return new BytesRef(ordToValues.get((int) ord));
}
@Override
public long getValueCount() {
return ordToValues.size();
}
@Override
public boolean advanceExact(int target) {
return advance(target) == target;
}
@Override
public int docID() {
return currDoc >= docToOrds.size() ? NO_MORE_DOCS : currDoc;
}
@Override
public int nextDoc() throws IOException {
return advance(currDoc + 1);
}
@Override
public int advance(int target) {
for (currDoc = target; currDoc < docToOrds.size(); currDoc++) {
if (docToOrds.get(currDoc) != null) {
return currDoc;
}
}
return NO_MORE_DOCS;
}
@Override
public long cost() {
return 1;
}
}
}

View file

@ -0,0 +1,284 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.MapperTestCase;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xpack.logsdb.LogsDBPlugin;
import org.junit.AssumptionViolatedException;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.UUID;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.startsWith;
public class PatternedTextFieldMapperTests extends MapperTestCase {
@Override
protected Collection<Plugin> getPlugins() {
return List.of(new LogsDBPlugin(Settings.EMPTY));
}
@Override
protected Object getSampleValueForDocument() {
return "value";
}
@Override
protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) {
assertThat(query, instanceOf(FieldExistsQuery.class));
FieldExistsQuery fieldExistsQuery = (FieldExistsQuery) query;
assertThat(fieldExistsQuery.getField(), startsWith("field"));
assertNoFieldNamesField(fields);
}
public void testExistsStandardSource() throws IOException {
assertExistsQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
}
public void testExistsSyntheticSource() throws IOException {
assertExistsQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
}
public void testPhraseQueryStandardSource() throws IOException {
assertPhraseQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
}
public void testPhraseQuerySyntheticSource() throws IOException {
assertPhraseQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text"))));
}
private void assertPhraseQuery(MapperService mapperService) throws IOException {
try (Directory directory = newDirectory()) {
RandomIndexWriter iw = new RandomIndexWriter(random(), directory);
LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox 1"))).rootDoc();
iw.addDocument(doc);
iw.close();
try (DirectoryReader reader = DirectoryReader.open(directory)) {
SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader));
MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox 1");
TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1);
assertThat(docs.totalHits.value(), equalTo(1L));
assertThat(docs.totalHits.relation(), equalTo(TotalHits.Relation.EQUAL_TO));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
}
}
}
@Override
protected void registerParameters(ParameterChecker checker) throws IOException {
checker.registerUpdateCheck(
b -> { b.field("meta", Collections.singletonMap("format", "mysql.access")); },
m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta())
);
}
@Override
protected void minimalMapping(XContentBuilder b) throws IOException {
b.field("type", "patterned_text");
}
@Override
protected void minimalStoreMapping(XContentBuilder b) throws IOException {
// 'store' is always true
minimalMapping(b);
}
public void testDefaults() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping));
assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString());
ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234")));
List<IndexableField> fields = doc.rootDoc().getFields("field");
assertEquals(1, fields.size());
assertEquals("1234", fields.get(0).stringValue());
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.omitNorms(), equalTo(true));
assertTrue(fieldType.tokenized());
assertFalse(fieldType.stored());
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS));
assertThat(fieldType.storeTermVectors(), equalTo(false));
assertThat(fieldType.storeTermVectorOffsets(), equalTo(false));
assertThat(fieldType.storeTermVectorPositions(), equalTo(false));
assertThat(fieldType.storeTermVectorPayloads(), equalTo(false));
assertEquals(DocValuesType.NONE, fieldType.docValuesType());
}
public void testNullConfigValuesFail() throws MapperParsingException {
Exception e = expectThrows(
MapperParsingException.class,
() -> createDocumentMapper(fieldMapping(b -> b.field("type", "patterned_text").field("meta", (String) null)))
);
assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [patterned_text] must not have a [null] value"));
}
public void testSimpleMerge() throws IOException {
XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "patterned_text"));
MapperService mapperService = createMapperService(startingMapping);
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class));
merge(mapperService, startingMapping);
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class));
XContentBuilder newField = mapping(b -> {
b.startObject("field").field("type", "patterned_text").startObject("meta").field("key", "value").endObject().endObject();
b.startObject("other_field").field("type", "keyword").endObject();
});
merge(mapperService, newField);
assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class));
assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class));
}
public void testDisabledSource() throws IOException {
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc");
{
mapping.startObject("properties");
{
mapping.startObject("foo");
{
mapping.field("type", "patterned_text");
}
mapping.endObject();
}
mapping.endObject();
mapping.startObject("_source");
{
mapping.field("enabled", false);
}
mapping.endObject();
}
mapping.endObject().endObject();
MapperService mapperService = createMapperService(mapping);
MappedFieldType ft = mapperService.fieldType("foo");
SearchExecutionContext context = createSearchExecutionContext(mapperService);
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7));
// Allowed even if source is disabled.
ft.phraseQuery(ts, 0, true, context);
ft.termQuery("a", context);
}
@Override
protected Object generateRandomInputValue(MappedFieldType ft) {
assumeFalse("We don't have a way to assert things here", true);
return null;
}
@Override
protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException {
assumeFalse("We don't have a way to assert things here", true);
}
@Override
protected boolean supportsIgnoreMalformed() {
return false;
}
@Override
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
assertFalse("patterned_text doesn't support ignoreMalformed", ignoreMalformed);
return new PatternedTextSyntheticSourceSupport();
}
static class PatternedTextSyntheticSourceSupport implements SyntheticSourceSupport {
@Override
public SyntheticSourceExample example(int maxValues) {
Tuple<String, String> v = generateValue();
return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping);
}
private Tuple<String, String> generateValue() {
StringBuilder builder = new StringBuilder();
if (randomBoolean()) {
builder.append(randomAlphaOfLength(5));
} else {
String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis());
builder.append(timestamp);
}
for (int i = 0; i < randomIntBetween(0, 9); i++) {
builder.append(" ");
int rand = randomIntBetween(0, 4);
switch (rand) {
case 0 -> builder.append(randomAlphaOfLength(5));
case 1 -> builder.append(randomAlphanumericOfLength(5));
case 2 -> builder.append(UUID.randomUUID());
case 3 -> builder.append(randomIp(true));
case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999()));
}
}
String value = builder.toString();
return Tuple.tuple(value, value);
}
private void mapping(XContentBuilder b) throws IOException {
b.field("type", "patterned_text");
}
@Override
public List<SyntheticSourceInvalidExample> invalidExample() throws IOException {
return List.of();
}
}
public void testDocValues() throws IOException {
MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "patterned_text")));
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
}
public void testDocValuesSynthetic() throws IOException {
MapperService mapper = createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text")));
assertScriptDocValues(mapper, "foo", equalTo(List.of("foo")));
}
@Override
public void testSyntheticSourceKeepArrays() {
// This mapper does not allow arrays
}
@Override
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
}
}

View file

@ -0,0 +1,194 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.tests.analysis.CannedTokenStream;
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.extras.SourceIntervalsSource;
import org.hamcrest.Matchers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class PatternedTextFieldTypeTests extends FieldTypeTestCase {
public void testTermQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null));
assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null));
}
public void testTermsQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
List<BytesRef> terms = new ArrayList<>();
terms.add(new BytesRef("foo"));
terms.add(new BytesRef("123"));
assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "123"), null));
}
public void testRangeQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
assertEquals(
new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false),
ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT)
);
ElasticsearchException ee = expectThrows(
ElasticsearchException.class,
() -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
);
assertEquals(
"[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.",
ee.getMessage()
);
}
public void testRegexpQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT));
ElasticsearchException ee = expectThrows(
ElasticsearchException.class,
() -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE)
);
assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
}
public void testFuzzyQuery() {
MappedFieldType ft = new PatternedTextFieldType("field");
assertEquals(
new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)),
ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT)
);
ElasticsearchException ee = expectThrows(
ElasticsearchException.class,
() -> ft.fuzzyQuery(
"foo",
Fuzziness.AUTO,
randomInt(10) + 1,
randomInt(10) + 1,
randomBoolean(),
MOCK_CONTEXT_DISALLOW_EXPENSIVE
)
);
assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage());
}
private Query unwrapPositionalQuery(Query query) {
query = ((ConstantScoreQuery) query).getQuery();
return query;
}
public void testPhraseQuery() throws IOException {
MappedFieldType ft = new PatternedTextFieldType("field");
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("1", 4, 7));
Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT);
Query delegate = unwrapPositionalQuery(query);
assertEquals(new PhraseQuery("field", "a", "1").toString(), delegate.toString());
}
public void testMultiPhraseQuery() throws IOException {
MappedFieldType ft = new PatternedTextFieldType("field");
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("2", 0, 0, 3), new Token("c", 4, 7));
Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT);
Query delegate = unwrapPositionalQuery(query);
Query expected = new MultiPhraseQuery.Builder().add(new Term[] { new Term("field", "a"), new Term("field", "2") })
.add(new Term("field", "c"))
.build();
assertEquals(expected.toString(), delegate.toString());
}
public void testPhrasePrefixQuery() throws IOException {
MappedFieldType ft = new PatternedTextFieldType("field");
TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7));
Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT);
Query delegate = unwrapPositionalQuery(query);
MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field");
expected.add(new Term[] { new Term("field", "a"), new Term("field", "b") });
expected.add(new Term("field", "c"));
assertEquals(expected.toString(), delegate.toString());
}
public void testTermIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource());
}
public void testPrefixIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) prefixIntervals).getIntervalsSource()
);
}
public void testWildcardIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) wildcardIntervals).getIntervalsSource()
);
}
public void testRegexpIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT);
assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) regexpIntervals).getIntervalsSource()
);
}
public void testFuzzyIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT);
assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
}
public void testRangeIntervals() {
MappedFieldType ft = new PatternedTextFieldType("field");
IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT);
assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class));
assertEquals(
Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()),
((SourceIntervalsSource) rangeIntervals).getIntervalsSource()
);
}
}

View file

@ -0,0 +1,101 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.logsdb.patternedtext;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.Matchers;
public class PatternedTextValueProcessorTests extends ESTestCase {
public void testEmpty() {
String text = "";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals(text, parts.template());
assertTrue(parts.args().isEmpty());
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testWhitespace() {
String text = " ";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals(text, parts.template());
assertTrue(parts.args().isEmpty());
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testWithoutTimestamp() {
String text = " some text with arg1 and 2arg2 and 333 ";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals(" some text with %W and %W and %W ", parts.template());
assertThat(parts.args(), Matchers.contains("arg1", "2arg2", "333"));
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testWithTimestamp() {
String text = " 2021-04-13T13:51:38.000Z some text with arg1 and arg2 and arg3";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals(" %W some text with %W and %W and %W", parts.template());
assertThat(parts.args(), Matchers.contains("2021-04-13T13:51:38.000Z", "arg1", "arg2", "arg3"));
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testWithDateSpaceTime() {
String text = " 2021-04-13 13:51:38 some text with arg1 and arg2 and arg3";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals(" %W %W some text with %W and %W and %W", parts.template());
assertThat(parts.args(), Matchers.contains("2021-04-13", "13:51:38", "arg1", "arg2", "arg3"));
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testMalformedDate() {
String text = "2020/09/06 10:11:38 Using namespace: kubernetes-dashboard' | HTTP status: 400, message: [1:395]";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals("%W %W Using namespace: kubernetes-dashboard' | HTTP status: %W message: [%W]", parts.template());
assertThat(parts.args(), Matchers.contains("2020/09/06", "10:11:38", "400,", "1:395"));
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testUUID() {
String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: [18be2355-6306-4a00-9db9-f0696aa1a225] "
+ "some text with arg1 and arg2";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals("[%W][%W][%W][action_controller][INFO]: [%W] some text with %W and %W", parts.template());
assertThat(
parts.args(),
Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "18be2355-6306-4a00-9db9-f0696aa1a225", "arg1", "arg2")
);
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testIP() {
String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: from 94.168.152.150 and arg1";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals("[%W][%W][%W][action_controller][INFO]: from %W and %W", parts.template());
assertThat(parts.args(), Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "94.168.152.150", "arg1"));
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testSecondDate() {
String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: at 2020-08-18 00:58:56 +0000 and arg1";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals("[%W][%W][%W][action_controller][INFO]: at %W %W %W and %W", parts.template());
assertThat(
parts.args(),
Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "2020-08-18", "00:58:56", "+0000", "arg1")
);
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
public void testWithTimestamp1() {
String text = "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]";
PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text);
assertEquals("[%W] Found %W errors for service [%W]", parts.template());
assertThat(parts.args(), Matchers.contains("2020-08-18T00:58:56", "123", "cheddar1"));
assertEquals(text, PatternedTextValueProcessor.merge(parts));
}
}

View file

@ -27,6 +27,7 @@ public class LogsdbTestSuiteIT extends ESClientYamlSuiteTestCase {
@ClassRule @ClassRule
public static final ElasticsearchCluster cluster = ElasticsearchCluster.local() public static final ElasticsearchCluster cluster = ElasticsearchCluster.local()
.module("logsdb")
.distribution(DistributionType.DEFAULT) .distribution(DistributionType.DEFAULT)
.user(USER, PASS, "superuser", false) .user(USER, PASS, "superuser", false)
.setting("xpack.security.autoconfiguration.enabled", "false") .setting("xpack.security.autoconfiguration.enabled", "false")

View file

@ -0,0 +1,333 @@
setup:
- do:
indices.create:
index: test
body:
mappings:
properties:
foo:
type: patterned_text
- do:
index:
index: test
id: "1"
body: {}
- do:
index:
index: test
id: "2"
body: { "foo": "Found 5 errors for service [cheddar1]" }
- do:
index:
index: test
id: "3"
body: { "foo": "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]" }
- do:
index:
index: test
id: "4"
body: { "foo": "Found some errors for cheddar data service" }
- do:
indices.refresh: {}
---
Field caps:
- do:
field_caps:
index: test
fields: [ foo ]
- match: { fields.foo.text.searchable: true }
- match: { fields.foo.text.aggregatable: false }
---
Exist query:
- do:
search:
index: test
body:
query:
exists:
field: foo
- match: { "hits.total.value": 3 }
- match: { "hits.hits.0._score": 1.0 }
---
Match query:
- do:
search:
index: test
body:
query:
match:
foo: 5
- match: { "hits.total.value": 1 }
- match: { "hits.hits.0._score": 1.0 }
---
Match Phrase query:
- do:
search:
index: test
body:
query:
match_phrase:
foo: "5 errors"
- match: { "hits.total.value": 1 }
- match: { "hits.hits.0._score": 1.0 }
---
Match Phrase Prefix query:
- do:
search:
index: test
body:
query:
match_phrase_prefix:
foo: "5 err"
- match: { "hits.total.value": 1 }
- match: { "hits.hits.0._score": 1.0 }
---
Query String query with phrase:
- do:
search:
index: test
body:
query:
query_string:
query: '"5 errors"'
default_field: "foo"
- match: { "hits.total.value": 1 }
- match: { "hits.hits.0._score": 1.0 }
---
Regexp query:
- do:
search:
index: test
body:
query:
regexp:
foo: "ser.*ce"
- match: { "hits.total.value": 3 }
- match: { "hits.hits.0._score": 1.0 }
---
Wildcard query:
- do:
search:
index: test
body:
query:
wildcard:
foo: "ser*ce"
- match: { "hits.total.value": 3 }
- match: { "hits.hits.0._score": 1.0 }
---
Prefix query:
- do:
search:
index: test
body:
query:
prefix:
foo: "ser"
- match: { "hits.total.value": 3 }
- match: { "hits.hits.0._score": 1.0 }
---
Fuzzy query:
- do:
search:
index: test
body:
query:
fuzzy:
foo: "errars"
- match: { "hits.total.value": 3 }
- match: { "hits.hits.0._score": 1.0 }
---
Span query:
- do:
catch: bad_request
search:
index: test
body:
query:
span_term:
foo: errors
---
Term intervals query:
- do:
search:
index: test
body:
query:
intervals:
foo:
match:
query: "for service"
max_gaps: 1
- match: { "hits.total.value": 2 }
---
Prefix intervals query:
- do:
search:
index: test
body:
query:
intervals:
foo:
prefix:
prefix: "ser"
- match: { "hits.total.value": 3 }
---
Wildcard intervals query:
- do:
search:
index: test
body:
query:
intervals:
foo:
wildcard:
pattern: "*edda*"
- match: { "hits.total.value": 3 }
---
Fuzzy intervals query:
- do:
search:
index: test
body:
query:
intervals:
foo:
fuzzy:
term: "servace"
- match: { "hits.total.value": 3 }
---
Wildcard highlighting:
- do:
search:
index: test
body:
query:
match:
foo: "5"
highlight:
fields:
"*": {}
- match: { hits.total.value: 1 }
- match: { hits.hits.0._source.foo: "Found 5 errors for service [cheddar1]" }
- match: { hits.hits.0.highlight.foo.0: "Found <em>5</em> errors for service [cheddar1]" }
---
tsdb:
- do:
indices.create:
index: tsdb_test
body:
settings:
index:
mode: time_series
routing_path: [ dimension ]
time_series:
start_time: 2000-01-01T00:00:00Z
end_time: 2099-12-31T23:59:59Z
mappings:
properties:
dimension:
type: keyword
time_series_dimension: true
foo:
type: patterned_text
- do:
index:
index: tsdb_test
refresh: true
body:
"@timestamp": "2000-01-01T00:00:00Z"
dimension: "a"
foo: "Apache Lucene powers Elasticsearch"
- do:
search:
index: tsdb_test
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source:
"@timestamp" : "2000-01-01T00:00:00.000Z"
"dimension" : "a"
foo: "Apache Lucene powers Elasticsearch"
---
Multiple values:
- do:
indices.create:
index: test1
body:
mappings:
properties:
foo:
type: patterned_text
- do:
catch: bad_request
index:
index: test1
id: "1"
body: {
"foo": [
"Found 5 errors for service [cheddar1]",
"[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]"
]
}

View file

@ -0,0 +1,76 @@
simple:
- do:
indices.create:
index: test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
id:
type: integer
message:
type: patterned_text
- do:
bulk:
index: test
refresh: true
body:
- '{ "create": { } }'
- '{ "id": 1, "message": "some log message with no arg" }'
- '{ "create": { } }'
- '{ "id": 2, "message": "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" }'
- '{ "create": { } }'
- '{ "id": 3, "message": "some log message with no arg" }'
- '{ "create": { } }'
- '{ "id": 4, "message": "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" }'
- do:
search:
index: test
sort: id
- match: { hits.hits.0._source.message: "some log message with no arg" }
- match: { hits.hits.1._source.message: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" }
- match: { hits.hits.2._source.message: "some log message with no arg" }
- match: { hits.hits.3._source.message: "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" }
---
synthetic_source with copy_to:
- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: patterned_text
copy_to: copy
copy:
type: keyword
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9"
- do:
search:
index: synthetic_source_test
body:
fields: ["copy"]
- match: { "hits.total.value": 1 }
- match:
hits.hits.0._source.foo: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9"
- match:
hits.hits.0.fields.copy.0: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9"