From 9aaba25d58efa75d54ff87becb1e14130cbf4c87 Mon Sep 17 00:00:00 2001 From: Parker Timmins Date: Wed, 25 Jun 2025 21:31:32 -0500 Subject: [PATCH] Simple version of patterned_text with a single doc value for arguments (#129292) Initial version of patterned_text mapper. Behaves similarly to match_only_text. This version uses a single SortedSetDocValues for a template and another for arguments. It splits the message by delimiters, the classifies a token as an argument if it contains a digit. All arguments are concatenated and inserted as a single doc value. A single inverted index is used, without positions. Phrase queries are still possible, using the SourceConfirmedTextQuery, but are not fast. --- .../src/main/java/module-info.java | 2 + .../extras/MatchOnlyTextFieldMapper.java | 2 +- .../subphase/highlight/HighlightPhase.java | 3 +- x-pack/plugin/logsdb/build.gradle | 3 +- .../xpack/logsdb/LogsDBPlugin.java | 17 +- .../patternedtext/PatternedTextDocValues.java | 88 +++++ .../PatternedTextFieldMapper.java | 176 +++++++++ .../patternedtext/PatternedTextFieldType.java | 270 ++++++++++++++ .../PatternedTextIndexFieldData.java | 134 +++++++ ...atternedTextSyntheticFieldLoaderLayer.java | 86 +++++ .../PatternedTextValueProcessor.java | 105 ++++++ .../PatternTextDocValuesTests.java | 174 +++++++++ .../PatternedTextFieldMapperTests.java | 284 +++++++++++++++ .../PatternedTextFieldTypeTests.java | 194 ++++++++++ .../PatternedTextValueProcessorTests.java | 101 ++++++ .../xpack/logsdb/LogsdbTestSuiteIT.java | 1 + .../test/patternedtext/10_basic.yml | 333 ++++++++++++++++++ .../patternedtext/20_synthetic_source.yml | 76 ++++ 18 files changed, 2045 insertions(+), 4 deletions(-) create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextDocValues.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextIndexFieldData.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java create mode 100644 x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessor.java create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternTextDocValuesTests.java create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldTypeTests.java create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessorTests.java create mode 100644 x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/10_basic.yml create mode 100644 x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/20_synthetic_source.yml diff --git a/modules/mapper-extras/src/main/java/module-info.java b/modules/mapper-extras/src/main/java/module-info.java index f89224813379..8bdda994e3e5 100644 --- a/modules/mapper-extras/src/main/java/module-info.java +++ b/modules/mapper-extras/src/main/java/module-info.java @@ -14,4 +14,6 @@ module org.elasticsearch.mapper.extras { requires org.apache.lucene.core; requires org.apache.lucene.memory; requires org.apache.lucene.queries; + + exports org.elasticsearch.index.mapper.extras; } diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 3333b004df40..61abda3aeb16 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -173,7 +173,7 @@ public class MatchOnlyTextFieldMapper extends FieldMapper { super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); this.textFieldType = new TextFieldType(name, isSyntheticSource); - this.originalName = isSyntheticSource ? name() + "._original" : null; + this.originalName = isSyntheticSource ? name + "._original" : null; } public MatchOnlyTextFieldType(String name) { diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java index 54c265deb948..cf9e8fbf7ded 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightPhase.java @@ -124,7 +124,8 @@ public class HighlightPhase implements FetchSubPhase { if (fieldNameContainsWildcards) { if (fieldType.typeName().equals(TextFieldMapper.CONTENT_TYPE) == false && fieldType.typeName().equals(KeywordFieldMapper.CONTENT_TYPE) == false - && fieldType.typeName().equals("match_only_text") == false) { + && fieldType.typeName().equals("match_only_text") == false + && fieldType.typeName().equals("patterned_text") == false) { continue; } if (highlighter.canHighlight(fieldType) == false) { diff --git a/x-pack/plugin/logsdb/build.gradle b/x-pack/plugin/logsdb/build.gradle index 4496d5843afc..aebb860f9d5c 100644 --- a/x-pack/plugin/logsdb/build.gradle +++ b/x-pack/plugin/logsdb/build.gradle @@ -24,12 +24,13 @@ base { restResources { restApi { - include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query' + include 'bulk', 'search', '_common', 'indices', 'index', 'cluster', 'data_stream', 'ingest', 'cat', 'capabilities', 'esql.query', 'field_caps' } } dependencies { compileOnly project(path: xpackModule('core')) + implementation project(':modules:mapper-extras') testImplementation project(':modules:data-streams') testImplementation(testArtifact(project(xpackModule('core')))) javaRestTestImplementation(testArtifact(project(xpackModule('spatial')))) diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java index 695406fb9bb3..70236c8e085c 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java @@ -12,21 +12,27 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettingProvider; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.license.LicenseService; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.plugins.ActionPlugin; +import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.core.XPackPlugin; import org.elasticsearch.xpack.core.action.XPackInfoFeatureAction; import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction; +import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldMapper; +import org.elasticsearch.xpack.logsdb.patternedtext.PatternedTextFieldType; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; +import static java.util.Collections.singletonMap; import static org.elasticsearch.xpack.logsdb.LogsdbLicenseService.FALLBACK_SETTING; -public class LogsDBPlugin extends Plugin implements ActionPlugin { +public class LogsDBPlugin extends Plugin implements ActionPlugin, MapperPlugin { private final Settings settings; private final LogsdbLicenseService licenseService; @@ -98,6 +104,15 @@ public class LogsDBPlugin extends Plugin implements ActionPlugin { return actions; } + @Override + public Map getMappers() { + if (PatternedTextFieldMapper.PATTERNED_TEXT_MAPPER.isEnabled()) { + return singletonMap(PatternedTextFieldType.CONTENT_TYPE, PatternedTextFieldMapper.PARSER); + } else { + return Map.of(); + } + } + protected XPackLicenseState getLicenseState() { return XPackPlugin.getSharedLicenseState(); } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextDocValues.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextDocValues.java new file mode 100644 index 000000000000..b7dfdc95683e --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextDocValues.java @@ -0,0 +1,88 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; + +public class PatternedTextDocValues extends BinaryDocValues { + private final SortedSetDocValues templateDocValues; + private final SortedSetDocValues argsDocValues; + + PatternedTextDocValues(SortedSetDocValues templateDocValues, SortedSetDocValues argsDocValues) { + this.templateDocValues = templateDocValues; + this.argsDocValues = argsDocValues; + } + + static PatternedTextDocValues from(LeafReader leafReader, String templateFieldName, String argsFieldName) throws IOException { + SortedSetDocValues templateDocValues = DocValues.getSortedSet(leafReader, templateFieldName); + if (templateDocValues.getValueCount() == 0) { + return null; + } + + SortedSetDocValues argsDocValues = DocValues.getSortedSet(leafReader, argsFieldName); + return new PatternedTextDocValues(templateDocValues, argsDocValues); + } + + private String getNextStringValue() throws IOException { + assert templateDocValues.docValueCount() == 1; + String template = templateDocValues.lookupOrd(templateDocValues.nextOrd()).utf8ToString(); + int argsCount = PatternedTextValueProcessor.countArgs(template); + if (argsCount > 0) { + assert argsDocValues.docValueCount() == 1; + var mergedArgs = argsDocValues.lookupOrd(argsDocValues.nextOrd()); + var args = PatternedTextValueProcessor.decodeRemainingArgs(mergedArgs.utf8ToString()); + return PatternedTextValueProcessor.merge(new PatternedTextValueProcessor.Parts(template, args)); + } else { + return template; + } + } + + @Override + public BytesRef binaryValue() throws IOException { + return new BytesRef(getNextStringValue()); + } + + @Override + public boolean advanceExact(int i) throws IOException { + argsDocValues.advanceExact(i); + // If template has a value, then message has a value. We don't have to check args here, since there may not be args for the doc + return templateDocValues.advanceExact(i); + } + + @Override + public int docID() { + return templateDocValues.docID(); + } + + @Override + public int nextDoc() throws IOException { + int templateNext = templateDocValues.nextDoc(); + var argsAdvance = argsDocValues.advance(templateNext); + assert argsAdvance >= templateNext; + return templateNext; + } + + @Override + public int advance(int i) throws IOException { + int templateAdvance = templateDocValues.advance(i); + var argsAdvance = argsDocValues.advance(templateAdvance); + assert argsAdvance >= templateAdvance; + return templateAdvance; + } + + @Override + public long cost() { + return templateDocValues.cost() + argsDocValues.cost(); + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java new file mode 100644 index 000000000000..55f5616f4ac7 --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapper.java @@ -0,0 +1,176 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.FeatureFlag; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; +import org.elasticsearch.index.mapper.DocumentParserContext; +import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.MapperBuilderContext; +import org.elasticsearch.index.mapper.TextParams; +import org.elasticsearch.index.mapper.TextSearchInfo; + +import java.io.IOException; +import java.util.Map; + +/** + * A {@link FieldMapper} that assigns every document the same value. + */ +public class PatternedTextFieldMapper extends FieldMapper { + + public static final FeatureFlag PATTERNED_TEXT_MAPPER = new FeatureFlag("patterned_text"); + + public static class Defaults { + public static final FieldType FIELD_TYPE; + + static { + final FieldType ft = new FieldType(); + ft.setTokenized(true); + ft.setStored(false); + ft.setStoreTermVectors(false); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE = freezeAndDeduplicateFieldType(ft); + } + } + + public static class Builder extends FieldMapper.Builder { + + private final IndexVersion indexCreatedVersion; + + private final Parameter> meta = Parameter.metaParam(); + + private final TextParams.Analyzers analyzers; + + public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + super(name); + this.indexCreatedVersion = indexCreatedVersion; + this.analyzers = new TextParams.Analyzers( + indexAnalyzers, + m -> ((PatternedTextFieldMapper) m).indexAnalyzer, + m -> ((PatternedTextFieldMapper) m).positionIncrementGap, + indexCreatedVersion + ); + } + + @Override + protected Parameter[] getParameters() { + return new Parameter[] { meta }; + } + + private PatternedTextFieldType buildFieldType(MapperBuilderContext context) { + NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); + NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); + NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); + TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer); + return new PatternedTextFieldType( + context.buildFullName(leafName()), + tsi, + indexAnalyzer, + context.isSourceSynthetic(), + meta.getValue() + ); + } + + @Override + public PatternedTextFieldMapper build(MapperBuilderContext context) { + return new PatternedTextFieldMapper(leafName(), buildFieldType(context), builderParams(this, context), this); + } + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + + private final IndexVersion indexCreatedVersion; + private final IndexAnalyzers indexAnalyzers; + private final NamedAnalyzer indexAnalyzer; + private final int positionIncrementGap; + private final FieldType fieldType; + + private PatternedTextFieldMapper( + String simpleName, + PatternedTextFieldType mappedFieldPatternedTextFieldType, + BuilderParams builderParams, + Builder builder + ) { + super(simpleName, mappedFieldPatternedTextFieldType, builderParams); + assert mappedFieldPatternedTextFieldType.getTextSearchInfo().isTokenized(); + assert mappedFieldPatternedTextFieldType.hasDocValues() == false; + this.fieldType = Defaults.FIELD_TYPE; + this.indexCreatedVersion = builder.indexCreatedVersion; + this.indexAnalyzers = builder.analyzers.indexAnalyzers; + this.indexAnalyzer = builder.analyzers.getIndexAnalyzer(); + this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue(); + } + + @Override + public Map indexAnalyzers() { + return Map.of(mappedFieldType.name(), indexAnalyzer); + } + + @Override + public FieldMapper.Builder getMergeBuilder() { + return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this); + } + + @Override + protected void parseCreateField(DocumentParserContext context) throws IOException { + final String value = context.parser().textOrNull(); + if (value == null) { + return; + } + + var existingValue = context.doc().getField(fieldType().name()); + if (existingValue != null) { + throw new IllegalArgumentException("Multiple values are not allowed for field [" + fieldType().name() + "]."); + } + + // Parse template and args. + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(value); + + // Add index on original value + context.doc().add(new Field(fieldType().name(), value, fieldType)); + + // Add template doc_values + context.doc().add(new SortedSetDocValuesField(fieldType().templateFieldName(), new BytesRef(parts.template()))); + + // Add args doc_values + if (parts.args().isEmpty() == false) { + String remainingArgs = PatternedTextValueProcessor.encodeRemainingArgs(parts); + context.doc().add(new SortedSetDocValuesField(fieldType().argsFieldName(), new BytesRef(remainingArgs))); + } + } + + @Override + protected String contentType() { + return PatternedTextFieldType.CONTENT_TYPE; + } + + @Override + public PatternedTextFieldType fieldType() { + return (PatternedTextFieldType) super.fieldType(); + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport() { + return new SyntheticSourceSupport.Native( + () -> new CompositeSyntheticFieldLoader( + leafName(), + fullPath(), + new PatternedTextSyntheticFieldLoaderLayer(fieldType().name(), fieldType().templateFieldName(), fieldType().argsFieldName()) + ) + ); + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java new file mode 100644 index 000000000000..4c712d10e0aa --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldType.java @@ -0,0 +1,270 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOFunction; +import org.elasticsearch.common.CheckedIntFunction; +import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.fielddata.FieldDataContext; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.SourceValueFetcherSortedBinaryIndexFieldData; +import org.elasticsearch.index.mapper.BlockDocValuesReader; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.SourceValueFetcher; +import org.elasticsearch.index.mapper.StringFieldType; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.mapper.extras.SourceConfirmedTextQuery; +import org.elasticsearch.index.mapper.extras.SourceIntervalsSource; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.script.field.KeywordDocValuesField; +import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.lookup.SourceProvider; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public class PatternedTextFieldType extends StringFieldType { + + private static final String TEMPLATE_SUFFIX = ".template"; + private static final String ARGS_SUFFIX = ".args"; + + public static final String CONTENT_TYPE = "patterned_text"; + + private final Analyzer indexAnalyzer; + private final TextFieldMapper.TextFieldType textFieldType; + + PatternedTextFieldType(String name, TextSearchInfo tsi, Analyzer indexAnalyzer, boolean isSyntheticSource, Map meta) { + // Though this type is based on doc_values, hasDocValues is set to false as the patterned_text type is not aggregatable. + // This does not stop its child .template type from being aggregatable. + super(name, true, false, false, tsi, meta); + this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); + this.textFieldType = new TextFieldMapper.TextFieldType(name, isSyntheticSource); + } + + PatternedTextFieldType(String name) { + this( + name, + new TextSearchInfo(PatternedTextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + Lucene.STANDARD_ANALYZER, + false, + Collections.emptyMap() + ); + } + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + @Override + public String familyTypeName() { + return TextFieldMapper.CONTENT_TYPE; + } + + @Override + public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { + return SourceValueFetcher.toString(name(), context, format); + } + + private IOFunction, IOException>> getValueFetcherProvider( + SearchExecutionContext searchExecutionContext + ) { + return context -> { + ValueFetcher valueFetcher = valueFetcher(searchExecutionContext, null); + SourceProvider sourceProvider = searchExecutionContext.lookup(); + valueFetcher.setNextReader(context); + return docID -> { + try { + return valueFetcher.fetchValues(sourceProvider.getSource(context, docID), docID, new ArrayList<>()); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }; + }; + } + + private Query sourceConfirmedQuery(Query query, SearchExecutionContext context) { + // Disable scoring + return new ConstantScoreQuery(new SourceConfirmedTextQuery(query, getValueFetcherProvider(context), indexAnalyzer)); + } + + private IntervalsSource toIntervalsSource(IntervalsSource source, Query approximation, SearchExecutionContext searchExecutionContext) { + return new SourceIntervalsSource(source, approximation, getValueFetcherProvider(searchExecutionContext), indexAnalyzer); + } + + @Override + public Query termQuery(Object query, SearchExecutionContext context) { + // Disable scoring + return new ConstantScoreQuery(super.termQuery(query, context)); + } + + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + SearchExecutionContext context, + MultiTermQuery.RewriteMethod rewriteMethod + ) { + // Disable scoring + return new ConstantScoreQuery( + super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context, rewriteMethod) + ); + } + + @Override + public Query existsQuery(SearchExecutionContext context) { + return new FieldExistsQuery(templateFieldName()); + } + + @Override + public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) { + return toIntervalsSource(Intervals.term(term), new TermQuery(new Term(name(), term)), context); + } + + @Override + public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) { + return toIntervalsSource( + Intervals.prefix(term, IndexSearcher.getMaxClauseCount()), + new PrefixQuery(new Term(name(), term)), + context + ); + } + + @Override + public IntervalsSource fuzzyIntervals( + String term, + int maxDistance, + int prefixLength, + boolean transpositions, + SearchExecutionContext context + ) { + FuzzyQuery fuzzyQuery = new FuzzyQuery( + new Term(name(), term), + maxDistance, + prefixLength, + IndexSearcher.getMaxClauseCount(), + transpositions, + MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE + ); + IntervalsSource fuzzyIntervals = Intervals.multiterm(fuzzyQuery.getAutomata(), IndexSearcher.getMaxClauseCount(), term); + return toIntervalsSource(fuzzyIntervals, fuzzyQuery, context); + } + + @Override + public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) { + return toIntervalsSource( + Intervals.wildcard(pattern, IndexSearcher.getMaxClauseCount()), + new MatchAllDocsQuery(), // wildcard queries can be expensive, what should the approximation be? + context + ); + } + + @Override + public IntervalsSource regexpIntervals(BytesRef pattern, SearchExecutionContext context) { + return toIntervalsSource( + Intervals.regexp(pattern, IndexSearcher.getMaxClauseCount()), + new MatchAllDocsQuery(), // regexp queries can be expensive, what should the approximation be? + context + ); + } + + @Override + public IntervalsSource rangeIntervals( + BytesRef lowerTerm, + BytesRef upperTerm, + boolean includeLower, + boolean includeUpper, + SearchExecutionContext context + ) { + return toIntervalsSource( + Intervals.range(lowerTerm, upperTerm, includeLower, includeUpper, IndexSearcher.getMaxClauseCount()), + new MatchAllDocsQuery(), // range queries can be expensive, what should the approximation be? + context + ); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, SearchExecutionContext queryShardContext) + throws IOException { + final Query textQuery = textFieldType.phraseQuery(stream, slop, enablePosIncrements, queryShardContext); + return sourceConfirmedQuery(textQuery, queryShardContext); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, SearchExecutionContext queryShardContext) + throws IOException { + final Query textQuery = textFieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, queryShardContext); + return sourceConfirmedQuery(textQuery, queryShardContext); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, SearchExecutionContext queryShardContext) + throws IOException { + final Query textQuery = textFieldType.phrasePrefixQuery(stream, slop, maxExpansions, queryShardContext); + return sourceConfirmedQuery(textQuery, queryShardContext); + } + + @Override + public BlockLoader blockLoader(BlockLoaderContext blContext) { + return new BlockDocValuesReader.BytesRefsFromBinaryBlockLoader(name()); + } + + @Override + public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) { + if (fieldDataContext.fielddataOperation() != FielddataOperation.SCRIPT) { + throw new IllegalArgumentException(CONTENT_TYPE + " fields do not support sorting and aggregations"); + } + if (textFieldType.isSyntheticSource()) { + return new PatternedTextIndexFieldData.Builder(this); + } + return new SourceValueFetcherSortedBinaryIndexFieldData.Builder( + name(), + CoreValuesSourceType.KEYWORD, + SourceValueFetcher.toString(fieldDataContext.sourcePathsLookup().apply(name())), + fieldDataContext.lookupSupplier().get(), + KeywordDocValuesField::new + ); + + } + + String templateFieldName() { + return name() + TEMPLATE_SUFFIX; + } + + String argsFieldName() { + return name() + ARGS_SUFFIX; + } + +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextIndexFieldData.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextIndexFieldData.java new file mode 100644 index 000000000000..8e532a9dd5a3 --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextIndexFieldData.java @@ -0,0 +1,134 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.index.fielddata.IndexFieldData; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; +import org.elasticsearch.index.fielddata.LeafFieldData; +import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.script.field.DocValuesScriptFieldFactory; +import org.elasticsearch.script.field.KeywordDocValuesField; +import org.elasticsearch.script.field.ToScriptFieldFactory; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.MultiValueMode; +import org.elasticsearch.search.aggregations.support.ValuesSourceType; +import org.elasticsearch.search.sort.BucketedSort; +import org.elasticsearch.search.sort.SortOrder; + +import java.io.IOException; +import java.io.UncheckedIOException; + +public class PatternedTextIndexFieldData implements IndexFieldData { + + private final PatternedTextFieldType fieldType; + + static class Builder implements IndexFieldData.Builder { + + final PatternedTextFieldType fieldType; + + Builder(PatternedTextFieldType fieldType) { + this.fieldType = fieldType; + } + + public PatternedTextIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) { + return new PatternedTextIndexFieldData(fieldType); + } + } + + PatternedTextIndexFieldData(PatternedTextFieldType fieldType) { + this.fieldType = fieldType; + } + + @Override + public String getFieldName() { + return fieldType.name(); + } + + @Override + public ValuesSourceType getValuesSourceType() { + return null; + } + + @Override + public LeafFieldData load(LeafReaderContext context) { + try { + return loadDirect(context); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public LeafFieldData loadDirect(LeafReaderContext context) throws IOException { + LeafReader leafReader = context.reader(); + PatternedTextDocValues docValues = PatternedTextDocValues.from( + leafReader, + fieldType.templateFieldName(), + fieldType.argsFieldName() + ); + return new LeafFieldData() { + + final ToScriptFieldFactory factory = KeywordDocValuesField::new; + + @Override + public DocValuesScriptFieldFactory getScriptFieldFactory(String name) { + return factory.getScriptFieldFactory(getBytesValues(), name); + } + + @Override + public SortedBinaryDocValues getBytesValues() { + return new SortedBinaryDocValues() { + @Override + public boolean advanceExact(int doc) throws IOException { + return docValues.advanceExact(doc); + } + + @Override + public int docValueCount() { + return 1; + } + + @Override + public BytesRef nextValue() throws IOException { + return docValues.binaryValue(); + } + }; + } + + @Override + public long ramBytesUsed() { + return 1L; + } + }; + } + + @Override + public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) { + throw new IllegalArgumentException("not supported for source patterned text field type"); + } + + @Override + public BucketedSort newBucketedSort( + BigArrays bigArrays, + Object missingValue, + MultiValueMode sortMode, + XFieldComparatorSource.Nested nested, + SortOrder sortOrder, + DocValueFormat format, + int bucketSize, + BucketedSort.ExtraData extra + ) { + throw new IllegalArgumentException("only supported on numeric fields"); + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java new file mode 100644 index 000000000000..f05fa31671cd --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextSyntheticFieldLoaderLayer.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.search.DocIdSetIterator; +import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; + +class PatternedTextSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer { + + private final String name; + private final String templateFieldName; + private final String argsFieldName; + private PatternedTextSyntheticFieldLoader loader; + + PatternedTextSyntheticFieldLoaderLayer(String name, String templateFieldName, String argsFieldName) { + this.name = name; + this.templateFieldName = templateFieldName; + this.argsFieldName = argsFieldName; + } + + @Override + public long valueCount() { + return loader != null && loader.hasValue() ? 1 : 0; + } + + @Override + public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException { + var docValues = PatternedTextDocValues.from(leafReader, templateFieldName, argsFieldName); + if (docValues == null) { + return null; + } + loader = new PatternedTextSyntheticFieldLoader(docValues); + return loader; + } + + @Override + public boolean hasValue() { + return loader != null && loader.hasValue(); + } + + @Override + public void write(XContentBuilder b) throws IOException { + if (loader != null) { + loader.write(b); + } + } + + @Override + public String fieldName() { + return name; + } + + private static class PatternedTextSyntheticFieldLoader implements DocValuesLoader { + private final PatternedTextDocValues docValues; + private boolean hasValue = false; + + PatternedTextSyntheticFieldLoader(PatternedTextDocValues docValues) { + this.docValues = docValues; + } + + public boolean hasValue() { + assert docValues.docID() != DocIdSetIterator.NO_MORE_DOCS; + return hasValue; + } + + @Override + public boolean advanceToDoc(int docId) throws IOException { + return hasValue = docValues.advanceExact(docId); + } + + public void write(XContentBuilder b) throws IOException { + if (hasValue) { + b.value(docValues.binaryValue().utf8ToString()); + } + } + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessor.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessor.java new file mode 100644 index 000000000000..c4551777c319 --- /dev/null +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessor.java @@ -0,0 +1,105 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class PatternedTextValueProcessor { + private static final String TEXT_ARG_PLACEHOLDER = "%W"; + private static final String DELIMITER = "[\\s\\[\\]]"; + private static final String SPACE = " "; + + record Parts(String template, List args) {} + + static Parts split(String text) { + StringBuilder template = new StringBuilder(); + List args = new ArrayList<>(); + String[] tokens = text.split(DELIMITER); + int textIndex = 0; + for (String token : tokens) { + if (token.isEmpty()) { + if (textIndex < text.length() - 1) { + template.append(text.charAt(textIndex++)); + } + continue; + } + if (isArg(token)) { + args.add(token); + template.append(TEXT_ARG_PLACEHOLDER); + } else { + template.append(token); + } + textIndex += token.length(); + if (textIndex < text.length()) { + template.append(text.charAt(textIndex++)); + } + } + while (textIndex < text.length()) { + template.append(text.charAt(textIndex++)); + } + return new Parts(template.toString(), args); + } + + private static boolean isArg(String text) { + for (int i = 0; i < text.length(); i++) { + if (Character.isDigit(text.charAt(i))) { + return true; + } + } + return false; + } + + static String merge(Parts parts) { + StringBuilder builder = new StringBuilder(); + String[] templateParts = parts.template.split(DELIMITER); + int i = 0; + int templateIndex = 0; + for (String part : templateParts) { + if (part.equals(TEXT_ARG_PLACEHOLDER)) { + builder.append(parts.args.get(i++)); + templateIndex += TEXT_ARG_PLACEHOLDER.length(); + } else if (part.isEmpty() == false) { + builder.append(part); + templateIndex += part.length(); + } + if (templateIndex < parts.template.length()) { + builder.append(parts.template.charAt(templateIndex++)); + } + } + assert i == parts.args.size() : "expected " + i + " but got " + parts.args.size(); + assert builder.toString().contains(TEXT_ARG_PLACEHOLDER) == false : builder.toString(); + while (templateIndex < parts.template.length()) { + builder.append(parts.template.charAt(templateIndex++)); + } + return builder.toString(); + } + + static String encodeRemainingArgs(Parts parts) { + return String.join(SPACE, parts.args); + } + + static List decodeRemainingArgs(String mergedArgs) { + return Arrays.asList(mergedArgs.split(SPACE)); + } + + static int countArgs(String template) { + int count = 0; + for (int i = 0; i < template.length() - 1; i++) { + if (template.charAt(i) == '%') { + char next = template.charAt(i + 1); + if (next == 'W') { + count++; + i++; + } + } + } + return count; + } +} diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternTextDocValuesTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternTextDocValuesTests.java new file mode 100644 index 000000000000..85eeac12abfb --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternTextDocValuesTests.java @@ -0,0 +1,174 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; + +public class PatternTextDocValuesTests extends ESTestCase { + + private static PatternedTextDocValues makeDocValueSparseArgs() { + var template = new SimpleSortedSetDocValues("%W dog", "cat", "%W mouse %W", "hat %W"); + var args = new SimpleSortedSetDocValues("1", null, "2 3", "4"); + return new PatternedTextDocValues(template, args); + } + + private static PatternedTextDocValues makeDocValuesDenseArgs() { + var template = new SimpleSortedSetDocValues("%W moose", "%W goose %W", "%W mouse %W", "%W house"); + var args = new SimpleSortedSetDocValues("1", "4 5", "2 3", "7"); + return new PatternedTextDocValues(template, args); + } + + private static PatternedTextDocValues makeDocValueMissingValues() { + var template = new SimpleSortedSetDocValues("%W cheddar", "cat", null, "%W cheese"); + var args = new SimpleSortedSetDocValues("1", null, null, "4"); + return new PatternedTextDocValues(template, args); + } + + public void testNextDoc() throws IOException { + var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs(); + assertEquals(-1, docValues.docID()); + assertEquals(0, docValues.nextDoc()); + assertEquals(1, docValues.nextDoc()); + assertEquals(2, docValues.nextDoc()); + assertEquals(3, docValues.nextDoc()); + assertEquals(NO_MORE_DOCS, docValues.nextDoc()); + } + + public void testNextDocMissing() throws IOException { + var docValues = makeDocValueMissingValues(); + assertEquals(-1, docValues.docID()); + assertEquals(0, docValues.nextDoc()); + assertEquals(1, docValues.nextDoc()); + assertEquals(3, docValues.nextDoc()); + assertEquals(NO_MORE_DOCS, docValues.nextDoc()); + } + + public void testAdvance1() throws IOException { + var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs(); + assertEquals(-1, docValues.docID()); + assertEquals(0, docValues.nextDoc()); + assertEquals(1, docValues.advance(1)); + assertEquals(2, docValues.advance(2)); + assertEquals(3, docValues.advance(3)); + assertEquals(NO_MORE_DOCS, docValues.advance(4)); + } + + public void testAdvanceFarther() throws IOException { + var docValues = randomBoolean() ? makeDocValueSparseArgs() : makeDocValuesDenseArgs(); + assertEquals(2, docValues.advance(2)); + // repeats says on value + assertEquals(2, docValues.advance(2)); + } + + public void testAdvanceSkipsValuesIfMissing() throws IOException { + var docValues = makeDocValueMissingValues(); + assertEquals(3, docValues.advance(2)); + } + + public void testAdvanceExactMissing() throws IOException { + var docValues = makeDocValueMissingValues(); + assertTrue(docValues.advanceExact(1)); + assertFalse(docValues.advanceExact(2)); + assertEquals(3, docValues.docID()); + } + + public void testValueAll() throws IOException { + var docValues = makeDocValuesDenseArgs(); + assertEquals(0, docValues.nextDoc()); + assertEquals("1 moose", docValues.binaryValue().utf8ToString()); + assertEquals(1, docValues.nextDoc()); + assertEquals("4 goose 5", docValues.binaryValue().utf8ToString()); + assertEquals(2, docValues.nextDoc()); + assertEquals("2 mouse 3", docValues.binaryValue().utf8ToString()); + assertEquals(3, docValues.nextDoc()); + assertEquals("7 house", docValues.binaryValue().utf8ToString()); + } + + public void testValueMissing() throws IOException { + var docValues = makeDocValueMissingValues(); + assertEquals(0, docValues.nextDoc()); + assertEquals("1 cheddar", docValues.binaryValue().utf8ToString()); + assertEquals(1, docValues.nextDoc()); + assertEquals("cat", docValues.binaryValue().utf8ToString()); + assertEquals(3, docValues.nextDoc()); + assertEquals("4 cheese", docValues.binaryValue().utf8ToString()); + } + + static class SimpleSortedSetDocValues extends SortedSetDocValues { + + private final List ordToValues; + private final List docToOrds; + private int currDoc = -1; + + // Single value for each docId, null if no value for a docId + SimpleSortedSetDocValues(String... docIdToValue) { + ordToValues = Arrays.stream(docIdToValue).filter(Objects::nonNull).collect(Collectors.toSet()).stream().sorted().toList(); + docToOrds = Arrays.stream(docIdToValue).map(v -> v == null ? null : ordToValues.indexOf(v)).toList(); + } + + @Override + public long nextOrd() { + return docToOrds.get(currDoc); + } + + @Override + public int docValueCount() { + return 1; + } + + @Override + public BytesRef lookupOrd(long ord) { + return new BytesRef(ordToValues.get((int) ord)); + } + + @Override + public long getValueCount() { + return ordToValues.size(); + } + + @Override + public boolean advanceExact(int target) { + return advance(target) == target; + } + + @Override + public int docID() { + return currDoc >= docToOrds.size() ? NO_MORE_DOCS : currDoc; + } + + @Override + public int nextDoc() throws IOException { + return advance(currDoc + 1); + } + + @Override + public int advance(int target) { + for (currDoc = target; currDoc < docToOrds.size(); currDoc++) { + if (docToOrds.get(currDoc) != null) { + return currDoc; + } + } + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return 1; + } + } +} diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java new file mode 100644 index 000000000000..2a707eafa285 --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldMapperTests.java @@ -0,0 +1,284 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.search.FieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.analysis.CannedTokenStream; +import org.apache.lucene.tests.analysis.Token; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.mapper.DateFieldMapper; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.KeywordFieldMapper; +import org.elasticsearch.index.mapper.LuceneDocument; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MapperParsingException; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.MapperTestCase; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.query.MatchPhraseQueryBuilder; +import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.logsdb.LogsDBPlugin; +import org.junit.AssumptionViolatedException; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.UUID; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.startsWith; + +public class PatternedTextFieldMapperTests extends MapperTestCase { + + @Override + protected Collection getPlugins() { + return List.of(new LogsDBPlugin(Settings.EMPTY)); + } + + @Override + protected Object getSampleValueForDocument() { + return "value"; + } + + @Override + protected void assertExistsQuery(MappedFieldType fieldType, Query query, LuceneDocument fields) { + assertThat(query, instanceOf(FieldExistsQuery.class)); + FieldExistsQuery fieldExistsQuery = (FieldExistsQuery) query; + assertThat(fieldExistsQuery.getField(), startsWith("field")); + assertNoFieldNamesField(fields); + } + + public void testExistsStandardSource() throws IOException { + assertExistsQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + public void testExistsSyntheticSource() throws IOException { + assertExistsQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + public void testPhraseQueryStandardSource() throws IOException { + assertPhraseQuery(createMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + public void testPhraseQuerySyntheticSource() throws IOException { + assertPhraseQuery(createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text")))); + } + + private void assertPhraseQuery(MapperService mapperService) throws IOException { + try (Directory directory = newDirectory()) { + RandomIndexWriter iw = new RandomIndexWriter(random(), directory); + LuceneDocument doc = mapperService.documentMapper().parse(source(b -> b.field("field", "the quick brown fox 1"))).rootDoc(); + iw.addDocument(doc); + iw.close(); + try (DirectoryReader reader = DirectoryReader.open(directory)) { + SearchExecutionContext context = createSearchExecutionContext(mapperService, newSearcher(reader)); + MatchPhraseQueryBuilder queryBuilder = new MatchPhraseQueryBuilder("field", "brown fox 1"); + TopDocs docs = context.searcher().search(queryBuilder.toQuery(context), 1); + assertThat(docs.totalHits.value(), equalTo(1L)); + assertThat(docs.totalHits.relation(), equalTo(TotalHits.Relation.EQUAL_TO)); + assertThat(docs.scoreDocs[0].doc, equalTo(0)); + } + } + } + + @Override + protected void registerParameters(ParameterChecker checker) throws IOException { + checker.registerUpdateCheck( + b -> { b.field("meta", Collections.singletonMap("format", "mysql.access")); }, + m -> assertEquals(Collections.singletonMap("format", "mysql.access"), m.fieldType().meta()) + ); + } + + @Override + protected void minimalMapping(XContentBuilder b) throws IOException { + b.field("type", "patterned_text"); + } + + @Override + protected void minimalStoreMapping(XContentBuilder b) throws IOException { + // 'store' is always true + minimalMapping(b); + } + + public void testDefaults() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(Strings.toString(fieldMapping(this::minimalMapping)), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + List fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.size()); + assertEquals("1234", fields.get(0).stringValue()); + IndexableFieldType fieldType = fields.get(0).fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertTrue(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + } + + public void testNullConfigValuesFail() throws MapperParsingException { + Exception e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper(fieldMapping(b -> b.field("type", "patterned_text").field("meta", (String) null))) + ); + assertThat(e.getMessage(), containsString("[meta] on mapper [field] of type [patterned_text] must not have a [null] value")); + } + + public void testSimpleMerge() throws IOException { + XContentBuilder startingMapping = fieldMapping(b -> b.field("type", "patterned_text")); + MapperService mapperService = createMapperService(startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class)); + + merge(mapperService, startingMapping); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class)); + + XContentBuilder newField = mapping(b -> { + b.startObject("field").field("type", "patterned_text").startObject("meta").field("key", "value").endObject().endObject(); + b.startObject("other_field").field("type", "keyword").endObject(); + }); + merge(mapperService, newField); + assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(PatternedTextFieldMapper.class)); + assertThat(mapperService.documentMapper().mappers().getMapper("other_field"), instanceOf(KeywordFieldMapper.class)); + } + + public void testDisabledSource() throws IOException { + XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("_doc"); + { + mapping.startObject("properties"); + { + mapping.startObject("foo"); + { + mapping.field("type", "patterned_text"); + } + mapping.endObject(); + } + mapping.endObject(); + + mapping.startObject("_source"); + { + mapping.field("enabled", false); + } + mapping.endObject(); + } + mapping.endObject().endObject(); + + MapperService mapperService = createMapperService(mapping); + MappedFieldType ft = mapperService.fieldType("foo"); + SearchExecutionContext context = createSearchExecutionContext(mapperService); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 4, 7)); + + // Allowed even if source is disabled. + ft.phraseQuery(ts, 0, true, context); + ft.termQuery("a", context); + } + + @Override + protected Object generateRandomInputValue(MappedFieldType ft) { + assumeFalse("We don't have a way to assert things here", true); + return null; + } + + @Override + protected void randomFetchTestFieldConfig(XContentBuilder b) throws IOException { + assumeFalse("We don't have a way to assert things here", true); + } + + @Override + protected boolean supportsIgnoreMalformed() { + return false; + } + + @Override + protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { + assertFalse("patterned_text doesn't support ignoreMalformed", ignoreMalformed); + return new PatternedTextSyntheticSourceSupport(); + } + + static class PatternedTextSyntheticSourceSupport implements SyntheticSourceSupport { + @Override + public SyntheticSourceExample example(int maxValues) { + Tuple v = generateValue(); + return new SyntheticSourceExample(v.v1(), v.v2(), this::mapping); + } + + private Tuple generateValue() { + StringBuilder builder = new StringBuilder(); + if (randomBoolean()) { + builder.append(randomAlphaOfLength(5)); + } else { + String timestamp = DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(System.currentTimeMillis()); + builder.append(timestamp); + } + for (int i = 0; i < randomIntBetween(0, 9); i++) { + builder.append(" "); + int rand = randomIntBetween(0, 4); + switch (rand) { + case 0 -> builder.append(randomAlphaOfLength(5)); + case 1 -> builder.append(randomAlphanumericOfLength(5)); + case 2 -> builder.append(UUID.randomUUID()); + case 3 -> builder.append(randomIp(true)); + case 4 -> builder.append(DateFieldMapper.DEFAULT_DATE_TIME_FORMATTER.formatMillis(randomMillisUpToYear9999())); + } + } + String value = builder.toString(); + return Tuple.tuple(value, value); + } + + private void mapping(XContentBuilder b) throws IOException { + b.field("type", "patterned_text"); + } + + @Override + public List invalidExample() throws IOException { + return List.of(); + } + } + + public void testDocValues() throws IOException { + MapperService mapper = createMapperService(fieldMapping(b -> b.field("type", "patterned_text"))); + assertScriptDocValues(mapper, "foo", equalTo(List.of("foo"))); + } + + public void testDocValuesSynthetic() throws IOException { + MapperService mapper = createSytheticSourceMapperService(fieldMapping(b -> b.field("type", "patterned_text"))); + assertScriptDocValues(mapper, "foo", equalTo(List.of("foo"))); + } + + @Override + public void testSyntheticSourceKeepArrays() { + // This mapper does not allow arrays + } + + @Override + protected IngestScriptSupport ingestScriptSupport() { + throw new AssumptionViolatedException("not supported"); + } +} diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldTypeTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldTypeTests.java new file mode 100644 index 000000000000..2e07c4c0d839 --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextFieldTypeTests.java @@ -0,0 +1,194 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.index.Term; +import org.apache.lucene.queries.intervals.Intervals; +import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.tests.analysis.CannedTokenStream; +import org.apache.lucene.tests.analysis.Token; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.search.AutomatonQueries; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.extras.SourceIntervalsSource; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class PatternedTextFieldTypeTests extends FieldTypeTestCase { + + public void testTermQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals(new ConstantScoreQuery(new TermQuery(new Term("field", "foo"))), ft.termQuery("foo", null)); + assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); + } + + public void testTermsQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + List terms = new ArrayList<>(); + terms.add(new BytesRef("foo")); + terms.add(new BytesRef("123")); + assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "123"), null)); + } + + public void testRangeQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals( + new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), + ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT) + ); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) + ); + assertEquals( + "[range] queries on [text] or [keyword] fields cannot be executed when " + "'search.allow_expensive_queries' is set to false.", + ee.getMessage() + ); + } + + public void testRegexpQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals(new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, null, MOCK_CONTEXT)); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.regexpQuery("foo.*", randomInt(10), 0, randomInt(10) + 1, null, MOCK_CONTEXT_DISALLOW_EXPENSIVE) + ); + assertEquals("[regexp] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + public void testFuzzyQuery() { + MappedFieldType ft = new PatternedTextFieldType("field"); + assertEquals( + new ConstantScoreQuery(new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true)), + ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_CONTEXT) + ); + + ElasticsearchException ee = expectThrows( + ElasticsearchException.class, + () -> ft.fuzzyQuery( + "foo", + Fuzziness.AUTO, + randomInt(10) + 1, + randomInt(10) + 1, + randomBoolean(), + MOCK_CONTEXT_DISALLOW_EXPENSIVE + ) + ); + assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + + private Query unwrapPositionalQuery(Query query) { + query = ((ConstantScoreQuery) query).getQuery(); + return query; + } + + public void testPhraseQuery() throws IOException { + MappedFieldType ft = new PatternedTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("1", 4, 7)); + Query query = ft.phraseQuery(ts, 0, true, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + assertEquals(new PhraseQuery("field", "a", "1").toString(), delegate.toString()); + } + + public void testMultiPhraseQuery() throws IOException { + MappedFieldType ft = new PatternedTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("2", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.multiPhraseQuery(ts, 0, true, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + Query expected = new MultiPhraseQuery.Builder().add(new Term[] { new Term("field", "a"), new Term("field", "2") }) + .add(new Term("field", "c")) + .build(); + assertEquals(expected.toString(), delegate.toString()); + } + + public void testPhrasePrefixQuery() throws IOException { + MappedFieldType ft = new PatternedTextFieldType("field"); + TokenStream ts = new CannedTokenStream(new Token("a", 0, 3), new Token("b", 0, 0, 3), new Token("c", 4, 7)); + Query query = ft.phrasePrefixQuery(ts, 0, 10, MOCK_CONTEXT); + Query delegate = unwrapPositionalQuery(query); + MultiPhrasePrefixQuery expected = new MultiPhrasePrefixQuery("field"); + expected.add(new Term[] { new Term("field", "a"), new Term("field", "b") }); + expected.add(new Term("field", "c")); + assertEquals(expected.toString(), delegate.toString()); + } + + public void testTermIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource termIntervals = ft.termIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(termIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals(Intervals.term(new BytesRef("foo")), ((SourceIntervalsSource) termIntervals).getIntervalsSource()); + } + + public void testPrefixIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource prefixIntervals = ft.prefixIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(prefixIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.prefix(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) prefixIntervals).getIntervalsSource() + ); + } + + public void testWildcardIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource wildcardIntervals = ft.wildcardIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(wildcardIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.wildcard(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) wildcardIntervals).getIntervalsSource() + ); + } + + public void testRegexpIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource regexpIntervals = ft.regexpIntervals(new BytesRef("foo"), MOCK_CONTEXT); + assertThat(regexpIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.regexp(new BytesRef("foo"), IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) regexpIntervals).getIntervalsSource() + ); + } + + public void testFuzzyIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource fuzzyIntervals = ft.fuzzyIntervals("foo", 1, 2, true, MOCK_CONTEXT); + assertThat(fuzzyIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + } + + public void testRangeIntervals() { + MappedFieldType ft = new PatternedTextFieldType("field"); + IntervalsSource rangeIntervals = ft.rangeIntervals(new BytesRef("foo"), new BytesRef("foo1"), true, true, MOCK_CONTEXT); + assertThat(rangeIntervals, Matchers.instanceOf(SourceIntervalsSource.class)); + assertEquals( + Intervals.range(new BytesRef("foo"), new BytesRef("foo1"), true, true, IndexSearcher.getMaxClauseCount()), + ((SourceIntervalsSource) rangeIntervals).getIntervalsSource() + ); + } +} diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessorTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessorTests.java new file mode 100644 index 000000000000..58266b3dae1e --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/patternedtext/PatternedTextValueProcessorTests.java @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb.patternedtext; + +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matchers; + +public class PatternedTextValueProcessorTests extends ESTestCase { + + public void testEmpty() { + String text = ""; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(text, parts.template()); + assertTrue(parts.args().isEmpty()); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWhitespace() { + String text = " "; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(text, parts.template()); + assertTrue(parts.args().isEmpty()); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWithoutTimestamp() { + String text = " some text with arg1 and 2arg2 and 333 "; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(" some text with %W and %W and %W ", parts.template()); + assertThat(parts.args(), Matchers.contains("arg1", "2arg2", "333")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWithTimestamp() { + String text = " 2021-04-13T13:51:38.000Z some text with arg1 and arg2 and arg3"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(" %W some text with %W and %W and %W", parts.template()); + assertThat(parts.args(), Matchers.contains("2021-04-13T13:51:38.000Z", "arg1", "arg2", "arg3")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWithDateSpaceTime() { + String text = " 2021-04-13 13:51:38 some text with arg1 and arg2 and arg3"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals(" %W %W some text with %W and %W and %W", parts.template()); + assertThat(parts.args(), Matchers.contains("2021-04-13", "13:51:38", "arg1", "arg2", "arg3")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testMalformedDate() { + String text = "2020/09/06 10:11:38 Using namespace: kubernetes-dashboard' | HTTP status: 400, message: [1:395]"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("%W %W Using namespace: kubernetes-dashboard' | HTTP status: %W message: [%W]", parts.template()); + assertThat(parts.args(), Matchers.contains("2020/09/06", "10:11:38", "400,", "1:395")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testUUID() { + String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: [18be2355-6306-4a00-9db9-f0696aa1a225] " + + "some text with arg1 and arg2"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("[%W][%W][%W][action_controller][INFO]: [%W] some text with %W and %W", parts.template()); + assertThat( + parts.args(), + Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "18be2355-6306-4a00-9db9-f0696aa1a225", "arg1", "arg2") + ); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testIP() { + String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: from 94.168.152.150 and arg1"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("[%W][%W][%W][action_controller][INFO]: from %W and %W", parts.template()); + assertThat(parts.args(), Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "94.168.152.150", "arg1")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testSecondDate() { + String text = "[2020-08-18T00:58:56.751+00:00][15][2354][action_controller][INFO]: at 2020-08-18 00:58:56 +0000 and arg1"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("[%W][%W][%W][action_controller][INFO]: at %W %W %W and %W", parts.template()); + assertThat( + parts.args(), + Matchers.contains("2020-08-18T00:58:56.751+00:00", "15", "2354", "2020-08-18", "00:58:56", "+0000", "arg1") + ); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } + + public void testWithTimestamp1() { + String text = "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]"; + PatternedTextValueProcessor.Parts parts = PatternedTextValueProcessor.split(text); + assertEquals("[%W] Found %W errors for service [%W]", parts.template()); + assertThat(parts.args(), Matchers.contains("2020-08-18T00:58:56", "123", "cheddar1")); + assertEquals(text, PatternedTextValueProcessor.merge(parts)); + } +} diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java b/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java index 4d7935f7d6bb..acb146d2af54 100644 --- a/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java +++ b/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java @@ -27,6 +27,7 @@ public class LogsdbTestSuiteIT extends ESClientYamlSuiteTestCase { @ClassRule public static final ElasticsearchCluster cluster = ElasticsearchCluster.local() + .module("logsdb") .distribution(DistributionType.DEFAULT) .user(USER, PASS, "superuser", false) .setting("xpack.security.autoconfiguration.enabled", "false") diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/10_basic.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/10_basic.yml new file mode 100644 index 000000000000..e25a2d2e76a7 --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/10_basic.yml @@ -0,0 +1,333 @@ +setup: + + - do: + indices.create: + index: test + body: + mappings: + properties: + foo: + type: patterned_text + + - do: + index: + index: test + id: "1" + body: {} + + - do: + index: + index: test + id: "2" + body: { "foo": "Found 5 errors for service [cheddar1]" } + + - do: + index: + index: test + id: "3" + body: { "foo": "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]" } + + - do: + index: + index: test + id: "4" + body: { "foo": "Found some errors for cheddar data service" } + + - do: + indices.refresh: {} + +--- +Field caps: + + - do: + field_caps: + index: test + fields: [ foo ] + + - match: { fields.foo.text.searchable: true } + - match: { fields.foo.text.aggregatable: false } + +--- +Exist query: + + - do: + search: + index: test + body: + query: + exists: + field: foo + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Match query: + + - do: + search: + index: test + body: + query: + match: + foo: 5 + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Match Phrase query: + + - do: + search: + index: test + body: + query: + match_phrase: + foo: "5 errors" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Match Phrase Prefix query: + + - do: + search: + index: test + body: + query: + match_phrase_prefix: + foo: "5 err" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + + +--- +Query String query with phrase: + + - do: + search: + index: test + body: + query: + query_string: + query: '"5 errors"' + default_field: "foo" + + - match: { "hits.total.value": 1 } + - match: { "hits.hits.0._score": 1.0 } + + +--- +Regexp query: + + - do: + search: + index: test + body: + query: + regexp: + foo: "ser.*ce" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Wildcard query: + + - do: + search: + index: test + body: + query: + wildcard: + foo: "ser*ce" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Prefix query: + + - do: + search: + index: test + body: + query: + prefix: + foo: "ser" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Fuzzy query: + + - do: + search: + index: test + body: + query: + fuzzy: + foo: "errars" + + - match: { "hits.total.value": 3 } + - match: { "hits.hits.0._score": 1.0 } + +--- +Span query: + + - do: + catch: bad_request + search: + index: test + body: + query: + span_term: + foo: errors + +--- +Term intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + match: + query: "for service" + max_gaps: 1 + + - match: { "hits.total.value": 2 } + +--- +Prefix intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + prefix: + prefix: "ser" + + - match: { "hits.total.value": 3 } + +--- +Wildcard intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + wildcard: + pattern: "*edda*" + + - match: { "hits.total.value": 3 } + +--- +Fuzzy intervals query: + + - do: + search: + index: test + body: + query: + intervals: + foo: + fuzzy: + term: "servace" + + - match: { "hits.total.value": 3 } + +--- +Wildcard highlighting: + + - do: + search: + index: test + body: + query: + match: + foo: "5" + highlight: + fields: + "*": {} + + - match: { hits.total.value: 1 } + - match: { hits.hits.0._source.foo: "Found 5 errors for service [cheddar1]" } + - match: { hits.hits.0.highlight.foo.0: "Found 5 errors for service [cheddar1]" } + +--- +tsdb: + + - do: + indices.create: + index: tsdb_test + body: + settings: + index: + mode: time_series + routing_path: [ dimension ] + time_series: + start_time: 2000-01-01T00:00:00Z + end_time: 2099-12-31T23:59:59Z + mappings: + properties: + dimension: + type: keyword + time_series_dimension: true + foo: + type: patterned_text + + - do: + index: + index: tsdb_test + refresh: true + body: + "@timestamp": "2000-01-01T00:00:00Z" + dimension: "a" + foo: "Apache Lucene powers Elasticsearch" + + - do: + search: + index: tsdb_test + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source: + "@timestamp" : "2000-01-01T00:00:00.000Z" + "dimension" : "a" + foo: "Apache Lucene powers Elasticsearch" + +--- +Multiple values: + - do: + indices.create: + index: test1 + body: + mappings: + properties: + foo: + type: patterned_text + - do: + catch: bad_request + index: + index: test1 + id: "1" + body: { + "foo": [ + "Found 5 errors for service [cheddar1]", + "[2020-08-18T00:58:56] Found 123 errors for service [cheddar1]" + ] + } + + diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/20_synthetic_source.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/20_synthetic_source.yml new file mode 100644 index 000000000000..a21ee18ac642 --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/patternedtext/20_synthetic_source.yml @@ -0,0 +1,76 @@ +simple: + - do: + indices.create: + index: test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + id: + type: integer + message: + type: patterned_text + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 1, "message": "some log message with no arg" }' + - '{ "create": { } }' + - '{ "id": 2, "message": "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" }' + - '{ "create": { } }' + - '{ "id": 3, "message": "some log message with no arg" }' + - '{ "create": { } }' + - '{ "id": 4, "message": "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" }' + + - do: + search: + index: test + sort: id + + - match: { hits.hits.0._source.message: "some log message with no arg" } + - match: { hits.hits.1._source.message: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" } + - match: { hits.hits.2._source.message: "some log message with no arg" } + - match: { hits.hits.3._source.message: "another log message with arg 1234 and arg 8765 and a mixed one ABCD1" } + +--- +synthetic_source with copy_to: + + - do: + indices.create: + index: synthetic_source_test + body: + settings: + index: + mapping.source.mode: synthetic + mappings: + properties: + foo: + type: patterned_text + copy_to: copy + copy: + type: keyword + + - do: + index: + index: synthetic_source_test + id: "1" + refresh: true + body: + foo: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" + + - do: + search: + index: synthetic_source_test + body: + fields: ["copy"] + + - match: { "hits.total.value": 1 } + - match: + hits.hits.0._source.foo: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9" + - match: + hits.hits.0.fields.copy.0: "another log message with arg 1234 and arg 5678 and a mixed one ABCD9"