diff --git a/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/doc/DocsTestPluginFuncTest.groovy b/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/doc/DocsTestPluginFuncTest.groovy index 4c542d371c32..934ff5233ec1 100644 --- a/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/doc/DocsTestPluginFuncTest.groovy +++ b/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/doc/DocsTestPluginFuncTest.groovy @@ -45,7 +45,7 @@ mapper-annotated-text.asciidoc[51:69](console)// TEST[setup:seats] """) } - def "can console candidates"() { + def "can list console candidates"() { when: def result = gradleRunner("listConsoleCandidates").build() then: diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/AsciidocSnippetParser.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/AsciidocSnippetParser.java index 7b35fd29fbd1..f291566d526f 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/AsciidocSnippetParser.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/AsciidocSnippetParser.java @@ -8,296 +8,84 @@ package org.elasticsearch.gradle.internal.doc; -import org.gradle.api.InvalidUserDataException; - -import java.io.File; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.function.BiConsumer; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.util.stream.Collectors; -import java.util.stream.Stream; -public class AsciidocSnippetParser implements SnippetParser { +public class AsciidocSnippetParser extends SnippetParser { public static final Pattern SNIPPET_PATTERN = Pattern.compile("-{4,}\\s*"); + public static final Pattern TEST_RESPONSE_PATTERN = Pattern.compile("\\/\\/\s*TESTRESPONSE(\\[(.+)\\])?\s*"); + public static final Pattern SOURCE_PATTERN = Pattern.compile( + "\\[\"?source\"?(?:\\.[^,]+)?,\\s*\"?([-\\w]+)\"?(,((?!id=).)*(id=\"?([-\\w]+)\"?)?(.*))?].*" + ); - private static final String CATCH = "catch:\\s*((?:\\/[^\\/]+\\/)|[^ \\]]+)"; - private static final String SKIP_REGEX = "skip:([^\\]]+)"; - private static final String SETUP = "setup:([^ \\]]+)"; - private static final String TEARDOWN = "teardown:([^ \\]]+)"; - private static final String WARNING = "warning:(.+)"; - private static final String NON_JSON = "(non_json)"; - private static final String SCHAR = "(?:\\\\\\/|[^\\/])"; - private static final String SUBSTITUTION = "s\\/(" + SCHAR + "+)\\/(" + SCHAR + "*)\\/"; - private static final String TEST_SYNTAX = "(?:" - + CATCH - + "|" - + SUBSTITUTION - + "|" - + SKIP_REGEX - + "|(continued)|" - + SETUP - + "|" - + TEARDOWN - + "|" - + WARNING - + "|(skip_shard_failures)) ?"; - - private final Map defaultSubstitutions; + public static final String CONSOLE_REGEX = "\\/\\/\s*CONSOLE\s*"; + public static final String NOTCONSOLE_REGEX = "\\/\\/\s*NOTCONSOLE\s*"; + public static final String TESTSETUP_REGEX = "\\/\\/\s*TESTSETUP\s*"; + public static final String TEARDOWN_REGEX = "\\/\\/\s*TEARDOWN\s*"; public AsciidocSnippetParser(Map defaultSubstitutions) { - this.defaultSubstitutions = defaultSubstitutions; + super(defaultSubstitutions); } @Override - public List parseDoc(File rootDir, File docFile, List> substitutions) { - String lastLanguage = null; - Snippet snippet = null; - String name = null; - int lastLanguageLine = 0; - StringBuilder contents = null; - List snippets = new ArrayList<>(); - - try (Stream lines = Files.lines(docFile.toPath(), StandardCharsets.UTF_8)) { - List linesList = lines.collect(Collectors.toList()); - for (int lineNumber = 0; lineNumber < linesList.size(); lineNumber++) { - String line = linesList.get(lineNumber); - if (SNIPPET_PATTERN.matcher(line).matches()) { - if (snippet == null) { - Path path = rootDir.toPath().relativize(docFile.toPath()); - snippet = new Snippet(path, lineNumber + 1, name); - snippets.add(snippet); - if (lastLanguageLine == lineNumber - 1) { - snippet.language = lastLanguage; - } - name = null; - } else { - snippet.end = lineNumber + 1; - } - continue; - } - - Source source = matchSource(line); - if (source.matches) { - lastLanguage = source.language; - lastLanguageLine = lineNumber; - name = source.name; - continue; - } - if (consoleHandled(docFile.getName(), lineNumber, line, snippet)) { - continue; - } - if (testHandled(docFile.getName(), lineNumber, line, snippet, substitutions)) { - continue; - } - if (testResponseHandled(docFile.getName(), lineNumber, line, snippet, substitutions)) { - continue; - } - if (line.matches("\\/\\/\s*TESTSETUP\s*")) { - snippet.testSetup = true; - continue; - } - if (line.matches("\\/\\/\s*TEARDOWN\s*")) { - snippet.testTearDown = true; - continue; - } - if (snippet == null) { - // Outside - continue; - } - if (snippet.end == Snippet.NOT_FINISHED) { - // Inside - if (contents == null) { - contents = new StringBuilder(); - } - // We don't need the annotations - line = line.replaceAll("<\\d+>", ""); - // Nor any trailing spaces - line = line.replaceAll("\s+$", ""); - contents.append(line).append("\n"); - continue; - } - // Allow line continuations for console snippets within lists - if (snippet != null && line.trim().equals("+")) { - continue; - } - finalizeSnippet(snippet, contents.toString(), defaultSubstitutions, substitutions); - substitutions = new ArrayList<>(); - ; - snippet = null; - contents = null; - } - if (snippet != null) { - finalizeSnippet(snippet, contents.toString(), defaultSubstitutions, substitutions); - contents = null; - snippet = null; - substitutions = new ArrayList<>(); - } - } catch (IOException e) { - e.printStackTrace(); - } - return snippets; + protected Pattern testResponsePattern() { + return TEST_RESPONSE_PATTERN; } - static Snippet finalizeSnippet( - final Snippet snippet, - String contents, - Map defaultSubstitutions, - Collection> substitutions - ) { - snippet.contents = contents.toString(); - snippet.validate(); - escapeSubstitutions(snippet, defaultSubstitutions, substitutions); - return snippet; + protected Pattern testPattern() { + return Pattern.compile("\\/\\/\s*TEST(\\[(.+)\\])?\s*"); } - private static void escapeSubstitutions( - Snippet snippet, - Map defaultSubstitutions, - Collection> substitutions - ) { - BiConsumer doSubstitution = (pattern, subst) -> { - /* - * $body is really common but it looks like a - * backreference so we just escape it here to make the - * tests cleaner. - */ - subst = subst.replace("$body", "\\$body"); - subst = subst.replace("$_path", "\\$_path"); - subst = subst.replace("\\n", "\n"); - snippet.contents = snippet.contents.replaceAll(pattern, subst); - }; - defaultSubstitutions.forEach(doSubstitution); + private int lastLanguageLine = 0; + private String currentName = null; + private String lastLanguage = null; - if (substitutions != null) { - substitutions.forEach(e -> doSubstitution.accept(e.getKey(), e.getValue())); + protected void parseLine(List snippets, int lineNumber, String line) { + if (SNIPPET_PATTERN.matcher(line).matches()) { + if (snippetBuilder == null) { + snippetBuilder = newSnippetBuilder().withLineNumber(lineNumber + 1) + .withName(currentName) + .withSubstitutions(defaultSubstitutions); + if (lastLanguageLine == lineNumber - 1) { + snippetBuilder.withLanguage(lastLanguage); + } + currentName = null; + } else { + snippetBuilder.withEnd(lineNumber + 1); + } + return; } + + Source source = matchSource(line); + if (source.matches) { + lastLanguage = source.language; + lastLanguageLine = lineNumber; + currentName = source.name; + return; + } + handleCommons(snippets, line); } - private boolean testResponseHandled( - String name, - int lineNumber, - String line, - Snippet snippet, - final List> substitutions - ) { - Matcher matcher = Pattern.compile("\\/\\/\s*TESTRESPONSE(\\[(.+)\\])?\s*").matcher(line); - if (matcher.matches()) { - if (snippet == null) { - throw new InvalidUserDataException(name + ":" + lineNumber + ": TESTRESPONSE not paired with a snippet at "); - } - snippet.testResponse = true; - if (matcher.group(2) != null) { - String loc = name + ":" + lineNumber; - ParsingUtils.parse( - loc, - matcher.group(2), - "(?:" + SUBSTITUTION + "|" + NON_JSON + "|" + SKIP_REGEX + ") ?", - (Matcher m, Boolean last) -> { - if (m.group(1) != null) { - // TESTRESPONSE[s/adsf/jkl/] - substitutions.add(Map.entry(m.group(1), m.group(2))); - } else if (m.group(3) != null) { - // TESTRESPONSE[non_json] - substitutions.add(Map.entry("^", "/")); - substitutions.add(Map.entry("\n$", "\\\\s*/")); - substitutions.add(Map.entry("( +)", "$1\\\\s+")); - substitutions.add(Map.entry("\n", "\\\\s*\n ")); - } else if (m.group(4) != null) { - // TESTRESPONSE[skip:reason] - snippet.skip = m.group(4); - } - } - ); - } - return true; - } - return false; + protected String getTestSetupRegex() { + return TESTSETUP_REGEX; } - private boolean testHandled(String name, int lineNumber, String line, Snippet snippet, List> substitutions) { - Matcher matcher = Pattern.compile("\\/\\/\s*TEST(\\[(.+)\\])?\s*").matcher(line); - if (matcher.matches()) { - if (snippet == null) { - throw new InvalidUserDataException(name + ":" + lineNumber + ": TEST not paired with a snippet at "); - } - snippet.test = true; - if (matcher.group(2) != null) { - String loc = name + ":" + lineNumber; - ParsingUtils.parse(loc, matcher.group(2), TEST_SYNTAX, (Matcher m, Boolean last) -> { - if (m.group(1) != null) { - snippet.catchPart = m.group(1); - return; - } - if (m.group(2) != null) { - substitutions.add(Map.entry(m.group(2), m.group(3))); - return; - } - if (m.group(4) != null) { - snippet.skip = m.group(4); - return; - } - if (m.group(5) != null) { - snippet.continued = true; - return; - } - if (m.group(6) != null) { - snippet.setup = m.group(6); - return; - } - if (m.group(7) != null) { - snippet.teardown = m.group(7); - return; - } - if (m.group(8) != null) { - snippet.warnings.add(m.group(8)); - return; - } - if (m.group(9) != null) { - snippet.skipShardsFailures = true; - return; - } - throw new InvalidUserDataException("Invalid test marker: " + line); - }); - } - return true; - } - return false; + protected String getTeardownRegex() { + return TEARDOWN_REGEX; } - private boolean consoleHandled(String fileName, int lineNumber, String line, Snippet snippet) { - if (line.matches("\\/\\/\s*CONSOLE\s*")) { - if (snippet == null) { - throw new InvalidUserDataException(fileName + ":" + lineNumber + ": CONSOLE not paired with a snippet"); - } - if (snippet.console != null) { - throw new InvalidUserDataException(fileName + ":" + lineNumber + ": Can't be both CONSOLE and NOTCONSOLE"); - } - snippet.console = true; - return true; - } else if (line.matches("\\/\\/\s*NOTCONSOLE\s*")) { - if (snippet == null) { - throw new InvalidUserDataException(fileName + ":" + lineNumber + ": NOTCONSOLE not paired with a snippet"); - } - if (snippet.console != null) { - throw new InvalidUserDataException(fileName + ":" + lineNumber + ": Can't be both CONSOLE and NOTCONSOLE"); - } - snippet.console = false; - return true; - } - return false; + protected String getNotconsoleRegex() { + return NOTCONSOLE_REGEX; + } + + protected String getConsoleRegex() { + return CONSOLE_REGEX; } static Source matchSource(String line) { - Pattern pattern = Pattern.compile("\\[\"?source\"?(?:\\.[^,]+)?,\\s*\"?([-\\w]+)\"?(,((?!id=).)*(id=\"?([-\\w]+)\"?)?(.*))?].*"); - Matcher matcher = pattern.matcher(line); + Matcher matcher = SOURCE_PATTERN.matcher(line); if (matcher.matches()) { return new Source(true, matcher.group(1), matcher.group(5)); } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocSnippetTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocSnippetTask.java index 87f0621d53fb..07e3bc93bb6a 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocSnippetTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocSnippetTask.java @@ -8,19 +8,17 @@ package org.elasticsearch.gradle.internal.doc; -import org.apache.commons.collections.map.HashedMap; import org.gradle.api.Action; import org.gradle.api.DefaultTask; import org.gradle.api.InvalidUserDataException; import org.gradle.api.file.ConfigurableFileTree; +import org.gradle.api.provider.MapProperty; import org.gradle.api.tasks.Input; import org.gradle.api.tasks.InputFiles; import org.gradle.api.tasks.TaskAction; import java.io.File; -import java.util.ArrayList; import java.util.List; -import java.util.Map; public abstract class DocSnippetTask extends DefaultTask { @@ -36,7 +34,6 @@ public abstract class DocSnippetTask extends DefaultTask { * directory. */ private ConfigurableFileTree docs; - private Map defaultSubstitutions = new HashedMap(); @InputFiles public ConfigurableFileTree getDocs() { @@ -51,36 +48,32 @@ public abstract class DocSnippetTask extends DefaultTask { * Substitutions done on every snippet's contents. */ @Input - public Map getDefaultSubstitutions() { - return defaultSubstitutions; - } + abstract MapProperty getDefaultSubstitutions(); @TaskAction void executeTask() { for (File file : docs) { - List snippets = parseDocFile(docs.getDir(), file, new ArrayList<>()); + List snippets = parseDocFile(docs.getDir(), file); if (perSnippet != null) { snippets.forEach(perSnippet::execute); } } } - List parseDocFile(File rootDir, File docFile, List> substitutions) { + List parseDocFile(File rootDir, File docFile) { SnippetParser parser = parserForFileType(docFile); - return parser.parseDoc(rootDir, docFile, substitutions); + return parser.parseDoc(rootDir, docFile); } private SnippetParser parserForFileType(File docFile) { if (docFile.getName().endsWith(".asciidoc")) { - return new AsciidocSnippetParser(defaultSubstitutions); + return new AsciidocSnippetParser(getDefaultSubstitutions().get()); + } else if (docFile.getName().endsWith(".mdx")) { + return new MdxSnippetParser(getDefaultSubstitutions().get()); } throw new InvalidUserDataException("Unsupported file type: " + docFile.getName()); } - public void setDefaultSubstitutions(Map defaultSubstitutions) { - this.defaultSubstitutions = defaultSubstitutions; - } - public void setPerSnippet(Action perSnippet) { this.perSnippet = perSnippet; } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocsTestPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocsTestPlugin.java index bbb5102dd669..2504ea1e74a3 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocsTestPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/DocsTestPlugin.java @@ -75,14 +75,14 @@ public class DocsTestPlugin implements Plugin { project.getTasks().register("listSnippets", DocSnippetTask.class, task -> { task.setGroup("Docs"); task.setDescription("List each snippet"); - task.setDefaultSubstitutions(commonDefaultSubstitutions); - task.setPerSnippet(snippet -> System.out.println(snippet)); + task.getDefaultSubstitutions().putAll(commonDefaultSubstitutions); + task.setPerSnippet(System.out::println); }); project.getTasks().register("listConsoleCandidates", DocSnippetTask.class, task -> { task.setGroup("Docs"); task.setDescription("List snippets that probably should be marked // CONSOLE"); - task.setDefaultSubstitutions(commonDefaultSubstitutions); + task.getDefaultSubstitutions().putAll(commonDefaultSubstitutions); task.setPerSnippet(snippet -> { if (snippet.isConsoleCandidate()) { System.out.println(snippet); @@ -93,8 +93,9 @@ public class DocsTestPlugin implements Plugin { Provider restRootDir = projectLayout.getBuildDirectory().dir("rest"); TaskProvider buildRestTests = project.getTasks() .register("buildRestTests", RestTestsFromDocSnippetTask.class, task -> { - task.setDefaultSubstitutions(commonDefaultSubstitutions); + task.getDefaultSubstitutions().putAll(commonDefaultSubstitutions); task.getTestRoot().convention(restRootDir); + task.getMigrationMode().set(Boolean.getBoolean("gradle.docs.migration")); task.doFirst(task1 -> fileOperations.delete(restRootDir.get())); }); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/MdxSnippetParser.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/MdxSnippetParser.java new file mode 100644 index 000000000000..0a0bb6328491 --- /dev/null +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/MdxSnippetParser.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.gradle.internal.doc; + +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class MdxSnippetParser extends SnippetParser { + + public static final Pattern SNIPPET_PATTERN = Pattern.compile("```(.*)"); + + public static final Pattern TEST_RESPONSE_PATTERN = Pattern.compile("\\{\\/\\*\s*TESTRESPONSE(\\[(.*)\\])?\s\\*\\/\\}"); + public static final Pattern TEST_PATTERN = Pattern.compile("\\{\\/\\*\s*TEST(\\[(.*)\\])?\s\\*\\/\\}"); + public static final String CONSOLE_REGEX = "\\{\\/\\*\s*CONSOLE\s\\*\\/\\}"; + public static final String NOTCONSOLE_REGEX = "\\{\\/\\*\s*NOTCONSOLE\s\\*\\/\\}"; + public static final String TESTSETUP_REGEX = "\\{\\/\\*\s*TESTSETUP\s\\*\\/\\}"; + public static final String TEARDOWN_REGEX = "\\{\\/\\*\s*TEARDOWN\s\\*\\/\\}"; + + public MdxSnippetParser(Map defaultSubstitutions) { + super(defaultSubstitutions); + } + + @Override + protected void parseLine(List snippets, int lineNumber, String line) { + Matcher snippetStartMatcher = SNIPPET_PATTERN.matcher(line); + if (snippetStartMatcher.matches()) { + if (snippetBuilder == null) { + if (snippetStartMatcher.groupCount() == 1) { + String language = snippetStartMatcher.group(1); + snippetBuilder = newSnippetBuilder().withLineNumber(lineNumber + 1) + .withName(null) + .withSubstitutions(defaultSubstitutions) + .withLanguage(language); + } + } else { + snippetBuilder.withEnd(lineNumber + 1); + } + return; + } + handleCommons(snippets, line); + } + + @Override + protected String getTestSetupRegex() { + return TESTSETUP_REGEX; + } + + @Override + protected String getTeardownRegex() { + return TEARDOWN_REGEX; + } + + @Override + protected String getNotconsoleRegex() { + return NOTCONSOLE_REGEX; + } + + @Override + protected String getConsoleRegex() { + return CONSOLE_REGEX; + } + + @Override + protected Pattern testResponsePattern() { + return TEST_RESPONSE_PATTERN; + } + + @Override + protected Pattern testPattern() { + return TEST_PATTERN; + } +} diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/ParsingUtils.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/ParsingUtils.java index b17dd4c7e21d..53009e1ce597 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/ParsingUtils.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/ParsingUtils.java @@ -16,15 +16,13 @@ import java.util.regex.Pattern; public class ParsingUtils { - static void extraContent(String message, String content, int offset, String location, String pattern) { + static void extraContent(String message, String content, int offset, String pattern) { StringBuilder cutOut = new StringBuilder(); cutOut.append(content.substring(offset - 6, offset)); cutOut.append('*'); cutOut.append(content.substring(offset, Math.min(offset + 5, content.length()))); String cutOutNoNl = cutOut.toString().replace("\n", "\\n"); - throw new InvalidUserDataException( - location + ": Extra content " + message + " ('" + cutOutNoNl + "') matching [" + pattern + "]: " + content - ); + throw new InvalidUserDataException("Extra content " + message + " ('" + cutOutNoNl + "') matching [" + pattern + "]: " + content); } /** @@ -33,7 +31,7 @@ public class ParsingUtils { * match then blow up. If the closure takes two parameters then the second * one is "is this the last match?". */ - static void parse(String location, String content, String pattern, BiConsumer testHandler) { + static void parse(String content, String pattern, BiConsumer testHandler) { if (content == null) { return; // Silly null, only real stuff gets to match! } @@ -41,16 +39,16 @@ public class ParsingUtils { int offset = 0; while (m.find()) { if (m.start() != offset) { - extraContent("between [$offset] and [${m.start()}]", content, offset, location, pattern); + extraContent("between [$offset] and [${m.start()}]", content, offset, pattern); } offset = m.end(); testHandler.accept(m, offset == content.length()); } if (offset == 0) { - throw new InvalidUserDataException(location + ": Didn't match " + pattern + ": " + content); + throw new InvalidUserDataException("Didn't match " + pattern + ": " + content); } if (offset != content.length()) { - extraContent("after [" + offset + "]", content, offset, location, pattern); + extraContent("after [" + offset + "]", content, offset, pattern); } } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTask.java index c5b1d67627dd..28733ff05277 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTask.java @@ -10,10 +10,12 @@ package org.elasticsearch.gradle.internal.doc; import groovy.transform.PackageScope; +import org.gradle.api.GradleException; import org.gradle.api.InvalidUserDataException; import org.gradle.api.file.DirectoryProperty; -import org.gradle.api.internal.file.FileOperations; -import org.gradle.api.model.ObjectFactory; +import org.gradle.api.provider.ListProperty; +import org.gradle.api.provider.MapProperty; +import org.gradle.api.provider.Property; import org.gradle.api.tasks.Input; import org.gradle.api.tasks.Internal; import org.gradle.api.tasks.OutputDirectory; @@ -25,8 +27,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -36,35 +38,32 @@ import javax.inject.Inject; public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { - private Map setups = new HashMap<>(); - - private Map teardowns = new HashMap(); + /** + * For easier migration from asciidoc to mdx we support a migration mode that + * allows generation from the same file name but different extensions. The task + * will compare the generated tests from the asciidoc and mdx files and fail if + * they are not equal (ignoring the line numbers). + * */ + @Input + public abstract Property getMigrationMode(); /** * Test setups defined in the build instead of the docs so they can be * shared between many doc files. */ + private Map setups = new LinkedHashMap<>(); + @Input public Map getSetups() { return setups; } - public void setSetups(Map setups) { - this.setups = setups; - } - /** * Test teardowns defined in the build instead of the docs so they can be * shared between many doc files. */ @Input - public Map getTeardowns() { - return teardowns; - } - - public void setTeardowns(Map teardowns) { - this.teardowns = teardowns; - } + public abstract MapProperty getTeardowns(); /** * A list of files that contain snippets that *probably* should be @@ -73,36 +72,8 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { * If there are unconverted snippets not in this list then this task will * fail. All files are paths relative to the docs dir. */ - private List expectedUnconvertedCandidates; - @Input - public List getExpectedUnconvertedCandidates() { - return expectedUnconvertedCandidates; - } - - public void setExpectedUnconvertedCandidates(List expectedUnconvertedCandidates) { - this.expectedUnconvertedCandidates = expectedUnconvertedCandidates; - } - - /** - * Root directory of the tests being generated. To make rest tests happy - * we generate them in a testRoot which is contained in this directory. - */ - private DirectoryProperty testRoot; - - private Set names = new HashSet<>(); - - @Internal - public Set getNames() { - return names; - } - - public void setNames(Set names) { - this.names = names; - } - - @Inject - public abstract FileOperations getFileOperations(); + public abstract ListProperty getExpectedUnconvertedCandidates(); /** * Root directory containing all the files generated by this task. It is @@ -110,23 +81,27 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { */ @OutputDirectory File getOutputRoot() { - return new File(testRoot.get().getAsFile(), "/rest-api-spec/test"); + return new File(getTestRoot().get().getAsFile(), "/rest-api-spec/test"); } - @OutputDirectory - DirectoryProperty getTestRoot() { - return testRoot; - } + /** + * Root directory of the tests being generated. To make rest tests happy + * we generate them in a testRoot which is contained in this directory. + */ + @Internal + abstract DirectoryProperty getTestRoot(); @Inject - public RestTestsFromDocSnippetTask(ObjectFactory objectFactory) { - testRoot = objectFactory.directoryProperty(); + public RestTestsFromDocSnippetTask() { TestBuilder builder = new TestBuilder(); - - setPerSnippet(snippet -> builder.handleSnippet(snippet)); + setPerSnippet(builder::handleSnippet); + getMigrationMode().convention(false); doLast(task -> { builder.finishLastTest(); builder.checkUnconverted(); + if (getMigrationMode().get()) { + assertEqualTestSnippetFromMigratedDocs(); + } }); } @@ -223,38 +198,37 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { */ public void handleSnippet(Snippet snippet) { if (snippet.isConsoleCandidate()) { - unconvertedCandidates.add(snippet.path.toString().replace('\\', '/')); + unconvertedCandidates.add(snippet.path().toString().replace('\\', '/')); } - if (BAD_LANGUAGES.contains(snippet.language)) { - throw new InvalidUserDataException(snippet + ": Use `js` instead of `" + snippet.language + "`."); + if (BAD_LANGUAGES.contains(snippet.language())) { + throw new InvalidUserDataException(snippet + ": Use `js` instead of `" + snippet.language() + "`."); } - if (snippet.testSetup) { + if (snippet.testSetup()) { testSetup(snippet); previousTest = snippet; return; } - if (snippet.testTearDown) { + if (snippet.testTearDown()) { testTearDown(snippet); previousTest = snippet; return; } - if (snippet.testResponse || snippet.language.equals("console-result")) { + if (snippet.testResponse() || snippet.language().equals("console-result")) { if (previousTest == null) { throw new InvalidUserDataException(snippet + ": No paired previous test"); } - if (previousTest.path.equals(snippet.path) == false) { + if (previousTest.path().equals(snippet.path()) == false) { throw new InvalidUserDataException(snippet + ": Result can't be first in file"); } response(snippet); return; } - if (("js".equals(snippet.language)) && snippet.console != null && snippet.console) { + if (("js".equals(snippet.language())) && snippet.console() != null && snippet.console()) { throw new InvalidUserDataException(snippet + ": Use `[source,console]` instead of `// CONSOLE`."); } - if (snippet.test || snippet.language.equals("console")) { + if (snippet.test() || snippet.language().equals("console")) { test(snippet); previousTest = snippet; - return; } // Must be an unmarked snippet.... } @@ -262,27 +236,27 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { private void test(Snippet test) { setupCurrent(test); - if (test.continued) { + if (test.continued()) { /* Catch some difficult to debug errors with // TEST[continued] * and throw a helpful error message. */ - if (previousTest == null || previousTest.path.equals(test.path) == false) { + if (previousTest == null || previousTest.path().equals(test.path()) == false) { throw new InvalidUserDataException("// TEST[continued] " + "cannot be on first snippet in a file: " + test); } - if (previousTest != null && previousTest.testSetup) { + if (previousTest != null && previousTest.testSetup()) { throw new InvalidUserDataException("// TEST[continued] " + "cannot immediately follow // TESTSETUP: " + test); } - if (previousTest != null && previousTest.testTearDown) { + if (previousTest != null && previousTest.testSetup()) { throw new InvalidUserDataException("// TEST[continued] " + "cannot immediately follow // TEARDOWN: " + test); } } else { current.println("---"); - if (test.name != null && test.name.isBlank() == false) { - if (names.add(test.name) == false) { - throw new InvalidUserDataException("Duplicated snippet name '" + test.name + "': " + test); + if (test.name() != null && test.name().isBlank() == false) { + if (names.add(test.name()) == false) { + throw new InvalidUserDataException("Duplicated snippet name '" + test.name() + "': " + test); } - current.println("\"" + test.name + "\":"); + current.println("\"" + test.name() + "\":"); } else { - current.println("\"line_" + test.start + "\":"); + current.println("\"line_" + test.start() + "\":"); } /* The Elasticsearch test runner doesn't support quite a few * constructs unless we output this skip. We don't know if @@ -296,36 +270,36 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { current.println(" - stash_path_replace"); current.println(" - warnings"); } - if (test.skip != null) { - if (test.continued) { + if (test.skip() != null) { + if (test.continued()) { throw new InvalidUserDataException("Continued snippets " + "can't be skipped"); } current.println(" - always_skip"); - current.println(" reason: " + test.skip); + current.println(" reason: " + test.skip()); } - if (test.setup != null) { + if (test.setup() != null) { setup(test); } body(test, false); - if (test.teardown != null) { + if (test.teardown() != null) { teardown(test); } } private void response(Snippet response) { - if (null == response.skip) { + if (null == response.skip()) { current.println(" - match:"); current.println(" $body:"); - replaceBlockQuote(response.contents).lines().forEach(line -> current.println(" " + line)); + replaceBlockQuote(response.contents()).lines().forEach(line -> current.println(" " + line)); } } private void teardown(final Snippet snippet) { // insert a teardown defined outside of the docs - for (final String name : snippet.teardown.split(",")) { - final String teardown = teardowns.get(name); + for (final String name : snippet.teardown().split(",")) { + final String teardown = getTeardowns().get().get(name); if (teardown == null) { throw new InvalidUserDataException("Couldn't find named teardown $name for " + snippet); } @@ -335,7 +309,7 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { } private void testTearDown(Snippet snippet) { - if (previousTest != null && previousTest.testSetup == false && lastDocsPath == snippet.path) { + if (previousTest != null && previousTest.testSetup() == false && lastDocsPath.equals(snippet.path())) { throw new InvalidUserDataException(snippet + " must follow test setup or be first"); } setupCurrent(snippet); @@ -411,7 +385,7 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { } private void body(Snippet snippet, boolean inSetup) { - ParsingUtils.parse(snippet.getLocation(), snippet.contents, SYNTAX, (matcher, last) -> { + ParsingUtils.parse(snippet.contents(), SYNTAX, (matcher, last) -> { if (matcher.group("comment") != null) { // Comment return; @@ -424,30 +398,43 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { String method = matcher.group("method"); String pathAndQuery = matcher.group("pathAndQuery"); String body = matcher.group("body"); - String catchPart = last ? snippet.catchPart : null; + String catchPart = last ? snippet.catchPart() : null; if (pathAndQuery.startsWith("/")) { // Leading '/'s break the generated paths pathAndQuery = pathAndQuery.substring(1); } - emitDo(method, pathAndQuery, body, catchPart, snippet.warnings, inSetup, snippet.skipShardsFailures); + emitDo(method, pathAndQuery, body, catchPart, snippet.warnings(), inSetup, snippet.skipShardsFailures()); }); - } private PrintWriter setupCurrent(Snippet test) { - if (test.path.equals(lastDocsPath)) { + if (test.path().equals(lastDocsPath)) { return current; } names.clear(); finishLastTest(); - lastDocsPath = test.path; + lastDocsPath = test.path(); // Make the destination file: // Shift the path into the destination directory tree - Path dest = getOutputRoot().toPath().resolve(test.path); + Path dest = getOutputRoot().toPath().resolve(test.path()); // Replace the extension String fileName = dest.getName(dest.getNameCount() - 1).toString(); - dest = dest.getParent().resolve(fileName.replace(".asciidoc", ".yml")); + if (hasMultipleDocImplementations(test.path())) { + String fileNameWithoutExt = dest.getName(dest.getNameCount() - 1).toString().replace(".asciidoc", "").replace(".mdx", ""); + + if (getMigrationMode().get() == false) { + throw new InvalidUserDataException( + "Found multiple files with the same name '" + fileNameWithoutExt + "' but different extensions: [asciidoc, mdx]" + ); + } + getLogger().warn("Found multiple doc file types for " + test.path() + ". Generating tests for all of them."); + dest = dest.getParent().resolve(fileName + ".yml"); + + } else { + dest = dest.getParent().resolve(fileName.replace(".asciidoc", ".yml").replace(".mdx", ".yml")); + + } // Now setup the writer try { @@ -460,7 +447,7 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { } private void testSetup(Snippet snippet) { - if (lastDocsPath == snippet.path) { + if (lastDocsPath == snippet.path()) { throw new InvalidUserDataException( snippet + ": wasn't first. TESTSETUP can only be used in the first snippet of a document." ); @@ -468,7 +455,7 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { setupCurrent(snippet); current.println("---"); current.println("setup:"); - if (snippet.setup != null) { + if (snippet.setup() != null) { setup(snippet); } body(snippet, true); @@ -476,8 +463,8 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { private void setup(final Snippet snippet) { // insert a setup defined outside of the docs - for (final String name : snippet.setup.split(",")) { - final String setup = setups.get(name); + for (final String name : snippet.setup().split(",")) { + final String setup = getSetups().get(name); if (setup == null) { throw new InvalidUserDataException("Couldn't find named setup " + name + " for " + snippet); } @@ -488,7 +475,7 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { public void checkUnconverted() { List listedButNotFound = new ArrayList<>(); - for (String listed : expectedUnconvertedCandidates) { + for (String listed : getExpectedUnconvertedCandidates().get()) { if (false == unconvertedCandidates.remove(listed)) { listedButNotFound.add(listed); } @@ -523,4 +510,54 @@ public abstract class RestTestsFromDocSnippetTask extends DocSnippetTask { } } + private void assertEqualTestSnippetFromMigratedDocs() { + getTestRoot().getAsFileTree().matching(patternSet -> { patternSet.include("**/*asciidoc.yml"); }).forEach(asciidocFile -> { + File mdxFile = new File(asciidocFile.getAbsolutePath().replace(".asciidoc.yml", ".mdx.yml")); + if (mdxFile.exists() == false) { + throw new InvalidUserDataException("Couldn't find the corresponding mdx file for " + asciidocFile.getAbsolutePath()); + } + try { + List asciidocLines = Files.readAllLines(asciidocFile.toPath()); + List mdxLines = Files.readAllLines(mdxFile.toPath()); + if (asciidocLines.size() != mdxLines.size()) { + throw new GradleException( + "Yaml rest specs (" + + asciidocFile.toPath() + + " and " + + mdxFile.getAbsolutePath() + + ") are not equal, different line count" + ); + + } + for (int i = 0; i < asciidocLines.size(); i++) { + if (asciidocLines.get(i) + .replaceAll("line_\\d+", "line_0") + .equals(mdxLines.get(i).replaceAll("line_\\d+", "line_0")) == false) { + throw new GradleException( + "Yaml rest specs (" + + asciidocFile.toPath() + + " and " + + mdxFile.getAbsolutePath() + + ") are not equal, difference on line: " + + (i + 1) + ); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + private boolean hasMultipleDocImplementations(Path path) { + File dir = getDocs().getDir(); + String fileName = path.getName(path.getNameCount() - 1).toString(); + if (fileName.endsWith("asciidoc")) { + return new File(dir, path.toString().replace(".asciidoc", ".mdx")).exists(); + } else if (fileName.endsWith("mdx")) { + return new File(dir, path.toString().replace(".mdx", ".asciidoc")).exists(); + } + return false; + } + } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/Snippet.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/Snippet.java index b8aa864734f4..227ecbcbfd38 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/Snippet.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/Snippet.java @@ -8,113 +8,30 @@ package org.elasticsearch.gradle.internal.doc; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.core.JsonParser; - -import org.gradle.api.InvalidUserDataException; - -import java.io.IOException; import java.nio.file.Path; -import java.util.ArrayList; import java.util.List; -public class Snippet { - static final int NOT_FINISHED = -1; - - /** - * Path to the file containing this snippet. Relative to docs.dir of the - * SnippetsTask that created it. - */ - Path path; - int start; - int end = NOT_FINISHED; - public String contents; - - Boolean console = null; - boolean test = false; - boolean testResponse = false; - boolean testSetup = false; - boolean testTearDown = false; - String skip = null; - boolean continued = false; - String language = null; - String catchPart = null; - String setup = null; - String teardown = null; - boolean curl; - List warnings = new ArrayList(); - boolean skipShardsFailures = false; - String name; - - public Snippet(Path path, int start, String name) { - this.path = path; - this.start = start; - this.name = name; - } - - public void validate() { - if (language == null) { - throw new InvalidUserDataException( - name - + ": " - + "Snippet missing a language. This is required by " - + "Elasticsearch's doc testing infrastructure so we " - + "be sure we don't accidentally forget to test a " - + "snippet." - ); - } - assertValidCurlInput(); - assertValidJsonInput(); - } - - String getLocation() { - return path + "[" + start + ":" + end + "]"; - } - - private void assertValidCurlInput() { - // Try to detect snippets that contain `curl` - if ("sh".equals(language) || "shell".equals(language)) { - curl = contents.contains("curl"); - if (console == Boolean.FALSE && curl == false) { - throw new InvalidUserDataException(name + ": " + "No need for NOTCONSOLE if snippet doesn't " + "contain `curl`."); - } - } - } - - private void assertValidJsonInput() { - if (testResponse && ("js" == language || "console-result" == language) && null == skip) { - String quoted = contents - // quote values starting with $ - .replaceAll("([:,])\\s*(\\$[^ ,\\n}]+)", "$1 \"$2\"") - // quote fields starting with $ - .replaceAll("(\\$[^ ,\\n}]+)\\s*:", "\"$1\":"); - - JsonFactory jf = new JsonFactory(); - jf.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); - JsonParser jsonParser; - - try { - jsonParser = jf.createParser(quoted); - while (jsonParser.isClosed() == false) { - jsonParser.nextToken(); - } - } catch (JsonParseException e) { - throw new InvalidUserDataException( - "Invalid json in " - + name - + ". The error is:\n" - + e.getMessage() - + ".\n" - + "After substitutions and munging, the json looks like:\n" - + quoted, - e - ); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } +public record Snippet( + Path path, + int start, + int end, + String contents, + Boolean console, + boolean test, + boolean testResponse, + boolean testSetup, + boolean testTearDown, + String skip, + boolean continued, + String language, + String catchPart, + String setup, + String teardown, + boolean curl, + List warnings, + boolean skipShardsFailures, + String name +) { @Override public String toString() { diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetBuilder.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetBuilder.java new file mode 100644 index 000000000000..36d15b9eb33c --- /dev/null +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetBuilder.java @@ -0,0 +1,273 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.gradle.internal.doc; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.core.JsonParser; + +import org.apache.commons.collections.map.MultiValueMap; +import org.gradle.api.InvalidUserDataException; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +class SnippetBuilder { + static final int NOT_FINISHED = -1; + + private Path path; + private int lineNumber; + private String name; + private String language; + private int end = NOT_FINISHED; + private boolean testSetup; + private boolean testTeardown; + // some tests rely on ugly regex substitutions using the same key multiple times + private MultiValueMap substitutions = MultiValueMap.decorate(new LinkedHashMap()); + private String catchPart; + private boolean test; + private String skip; + private boolean continued; + private String setup; + private String teardown; + private List warnings = new ArrayList<>(); + private boolean skipShardsFailures; + private boolean testResponse; + private boolean curl; + + private StringBuilder contentBuilder = new StringBuilder(); + private Boolean console = null; + + public SnippetBuilder withPath(Path path) { + this.path = path; + return this; + } + + public SnippetBuilder withLineNumber(int lineNumber) { + this.lineNumber = lineNumber; + return this; + } + + public SnippetBuilder withName(String currentName) { + this.name = currentName; + return this; + } + + public SnippetBuilder withLanguage(String language) { + this.language = language; + return this; + } + + public SnippetBuilder withEnd(int end) { + this.end = end; + return this; + } + + public SnippetBuilder withTestSetup(boolean testSetup) { + this.testSetup = testSetup; + return this; + } + + public SnippetBuilder withTestTearDown(boolean testTeardown) { + this.testTeardown = testTeardown; + return this; + } + + public boolean notFinished() { + return end == NOT_FINISHED; + } + + public SnippetBuilder withSubstitutions(Map substitutions) { + this.substitutions.putAll(substitutions); + return this; + } + + public SnippetBuilder withSubstitution(String key, String value) { + this.substitutions.put(key, value); + return this; + } + + public SnippetBuilder withTest(boolean test) { + this.test = test; + return this; + } + + public SnippetBuilder withCatchPart(String catchPart) { + this.catchPart = catchPart; + return this; + } + + public SnippetBuilder withSkip(String skip) { + this.skip = skip; + return this; + } + + public SnippetBuilder withContinued(boolean continued) { + this.continued = continued; + return this; + } + + public SnippetBuilder withSetup(String setup) { + this.setup = setup; + return this; + } + + public SnippetBuilder withTeardown(String teardown) { + this.teardown = teardown; + return this; + } + + public SnippetBuilder withWarning(String warning) { + this.warnings.add(warning); + return this; + } + + public SnippetBuilder withSkipShardsFailures(boolean skipShardsFailures) { + this.skipShardsFailures = skipShardsFailures; + return this; + } + + public SnippetBuilder withTestResponse(boolean testResponse) { + this.testResponse = testResponse; + return this; + } + + public SnippetBuilder withContent(String content) { + return withContent(content, false); + } + + public SnippetBuilder withContent(String content, boolean newLine) { + contentBuilder.append(content); + if (newLine) { + contentBuilder.append("\n"); + } + return this; + } + + private String escapeSubstitutions(String contents) { + Set>> set = substitutions.entrySet(); + for (Map.Entry> substitution : set) { + String pattern = substitution.getKey(); + for (String subst : substitution.getValue()) { + /* + * $body is really common, but it looks like a + * backreference, so we just escape it here to make the + * tests cleaner. + */ + subst = subst.replace("$body", "\\$body"); + subst = subst.replace("$_path", "\\$_path"); + subst = subst.replace("\\n", "\n"); + contents = contents.replaceAll(pattern, subst); + } + } + return contents; + } + + public Snippet build() { + String content = contentBuilder.toString(); + validate(content); + String finalContent = escapeSubstitutions(content); + return new Snippet( + path, + lineNumber, + end, + finalContent, + console, + test, + testResponse, + testSetup, + testTeardown, + skip, + continued, + language, + catchPart, + setup, + teardown, + curl, + warnings, + skipShardsFailures, + name + ); + } + + public void validate(String content) { + if (language == null) { + throw new InvalidUserDataException( + name + + ": " + + "Snippet missing a language. This is required by " + + "Elasticsearch's doc testing infrastructure so we " + + "be sure we don't accidentally forget to test a " + + "snippet." + ); + } + assertValidCurlInput(content); + assertValidJsonInput(content); + + } + + private void assertValidCurlInput(String content) { + // Try to detect snippets that contain `curl` + if ("sh".equals(language) || "shell".equals(language)) { + curl = content.contains("curl"); + if (console == Boolean.FALSE && curl == false) { + throw new InvalidUserDataException(name + ": " + "No need for NOTCONSOLE if snippet doesn't " + "contain `curl`."); + } + } + } + + private void assertValidJsonInput(String content) { + if (testResponse && ("js" == language || "console-result" == language) && null == skip) { + String quoted = content + // quote values starting with $ + .replaceAll("([:,])\\s*(\\$[^ ,\\n}]+)", "$1 \"$2\"") + // quote fields starting with $ + .replaceAll("(\\$[^ ,\\n}]+)\\s*:", "\"$1\":"); + + JsonFactory jf = new JsonFactory(); + jf.configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true); + JsonParser jsonParser; + + try { + jsonParser = jf.createParser(quoted); + while (jsonParser.isClosed() == false) { + jsonParser.nextToken(); + } + } catch (JsonParseException e) { + throw new InvalidUserDataException( + "Invalid json in " + + name + + ". The error is:\n" + + e.getMessage() + + ".\n" + + "After substitutions and munging, the json looks like:\n" + + quoted, + e + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + public SnippetBuilder withConsole(Boolean console) { + this.console = console; + return this; + } + + public boolean consoleDefined() { + return console != null; + + } +} diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetParser.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetParser.java index 064c1c460feb..c4ae0b90127a 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetParser.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetParser.java @@ -8,10 +8,251 @@ package org.elasticsearch.gradle.internal.doc; +import org.gradle.api.InvalidUserDataException; + import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Stream; + +abstract class SnippetParser { + protected static final String SCHAR = "(?:\\\\\\/|[^\\/])"; + protected static final String NON_JSON = "(non_json)"; + protected static final String SKIP_REGEX = "skip:([^\\]]+)"; + protected static final String SUBSTITUTION = "s\\/(" + SCHAR + "+)\\/(" + SCHAR + "*)\\/"; + + private static final String CATCH = "catch:\\s*((?:\\/[^\\/]+\\/)|[^ \\]]+)"; + private static final String SETUP = "setup:([^ \\]]+)"; + private static final String TEARDOWN = "teardown:([^ \\]]+)"; + private static final String WARNING = "warning:(.+)"; + private static final String TEST_SYNTAX = "(?:" + + CATCH + + "|" + + SUBSTITUTION + + "|" + + SKIP_REGEX + + "|(continued)|" + + SETUP + + "|" + + TEARDOWN + + "|" + + WARNING + + "|(skip_shard_failures)) ?"; + + protected final Map defaultSubstitutions; + + protected SnippetBuilder snippetBuilder = null; + + private Path currentPath; + + SnippetParser(Map defaultSubstitutions) { + this.defaultSubstitutions = defaultSubstitutions; + } + + public List parseDoc(File rootDir, File docFile) { + List snippets = new ArrayList<>(); + this.currentPath = rootDir.toPath().relativize(docFile.toPath()); + try (Stream lines = Files.lines(docFile.toPath(), StandardCharsets.UTF_8)) { + List linesList = lines.toList(); + parseLines(docFile, linesList, snippets); + } catch (IOException e) { + throw new SnippetParserException("Failed to parse file " + docFile, e); + } finally { + this.currentPath = null; + this.snippetBuilder = null; + } + return snippets; + } + + void parseLines(File file, List linesList, List snippets) { + for (int lineNumber = 0; lineNumber < linesList.size(); lineNumber++) { + String line = linesList.get(lineNumber); + try { + parseLine(snippets, lineNumber, line); + } catch (InvalidUserDataException e) { + throw new SnippetParserException(file, lineNumber, e); + } + } + fileParsingFinished(snippets); + } + + protected void handleCommons(List snippets, String line) { + if (consoleHandled(line, snippetBuilder)) { + return; + } + if (testHandled(line, snippetBuilder)) { + return; + } + if (testResponseHandled(line, snippetBuilder)) { + return; + } + if (line.matches(getTestSetupRegex())) { + snippetBuilder.withTestSetup(true); + return; + } + if (line.matches(getTeardownRegex())) { + snippetBuilder.withTestTearDown(true); + return; + } + if (snippetBuilder == null) { + // Outside + return; + } + if (snippetBuilder.notFinished()) { + // Inside + // We don't need the annotations + line = line.replaceAll("<\\d+>", ""); + // nor bookmarks + line = line.replaceAll("\\[\\^\\d+\\]", ""); + // Nor any trailing spaces + line = line.replaceAll("\s+$", ""); + snippetBuilder.withContent(line, true); + return; + } + // Allow line continuations for console snippets within lists + if (snippetBuilder != null && line.trim().equals("+")) { + return; + } + snippets.add(snippetBuilder.build()); + snippetBuilder = null; + } + + protected SnippetBuilder newSnippetBuilder() { + snippetBuilder = new SnippetBuilder().withPath(currentPath); + return snippetBuilder; + } + + void fileParsingFinished(List snippets) { + if (snippetBuilder != null) { + snippets.add(snippetBuilder.build()); + snippetBuilder = null; + } + } + + protected abstract void parseLine(List snippets, int lineNumber, String line); + + boolean testResponseHandled(String line, SnippetBuilder snippetBuilder) { + Matcher matcher = testResponsePattern().matcher(line); + if (matcher.matches()) { + if (snippetBuilder == null) { + throw new InvalidUserDataException("TESTRESPONSE not paired with a snippet at "); + } + snippetBuilder.withTestResponse(true); + if (matcher.group(2) != null) { + ParsingUtils.parse( + matcher.group(2), + "(?:" + SUBSTITUTION + "|" + NON_JSON + "|" + SKIP_REGEX + ") ?", + (Matcher m, Boolean last) -> { + if (m.group(1) != null) { + // TESTRESPONSE[s/adsf/jkl/] + snippetBuilder.withSubstitution(m.group(1), m.group(2)); + } else if (m.group(3) != null) { + // TESTRESPONSE[non_json] + snippetBuilder.withSubstitution("^", "/"); + snippetBuilder.withSubstitution("\n$", "\\\\s*/"); + snippetBuilder.withSubstitution("( +)", "$1\\\\s+"); + snippetBuilder.withSubstitution("\n", "\\\\s*\n "); + } else if (m.group(4) != null) { + // TESTRESPONSE[skip:reason] + snippetBuilder.withSkip(m.group(4)); + } + } + ); + } + return true; + } + return false; + } + + protected boolean testHandled(String line, SnippetBuilder snippetBuilder) { + Matcher matcher = testPattern().matcher(line); + if (matcher.matches()) { + if (snippetBuilder == null) { + throw new InvalidUserDataException("TEST not paired with a snippet at "); + } + snippetBuilder.withTest(true); + if (matcher.group(2) != null) { + ParsingUtils.parse(matcher.group(2), TEST_SYNTAX, (Matcher m, Boolean last) -> { + if (m.group(1) != null) { + snippetBuilder.withCatchPart(m.group(1)); + return; + } + if (m.group(2) != null) { + snippetBuilder.withSubstitution(m.group(2), m.group(3)); + return; + } + if (m.group(4) != null) { + snippetBuilder.withSkip(m.group(4)); + return; + } + if (m.group(5) != null) { + snippetBuilder.withContinued(true); + return; + } + if (m.group(6) != null) { + snippetBuilder.withSetup(m.group(6)); + return; + } + if (m.group(7) != null) { + snippetBuilder.withTeardown(m.group(7)); + return; + } + if (m.group(8) != null) { + snippetBuilder.withWarning(m.group(8)); + return; + } + if (m.group(9) != null) { + snippetBuilder.withSkipShardsFailures(true); + return; + } + throw new InvalidUserDataException("Invalid test marker: " + line); + }); + } + return true; + } + return false; + } + + protected boolean consoleHandled(String line, SnippetBuilder snippet) { + if (line.matches(getConsoleRegex())) { + if (snippetBuilder == null) { + throw new InvalidUserDataException("CONSOLE not paired with a snippet"); + } + if (snippetBuilder.consoleDefined()) { + throw new InvalidUserDataException("Can't be both CONSOLE and NOTCONSOLE"); + } + snippetBuilder.withConsole(Boolean.TRUE); + return true; + } else if (line.matches(getNotconsoleRegex())) { + if (snippet == null) { + throw new InvalidUserDataException("NOTCONSOLE not paired with a snippet"); + } + if (snippetBuilder.consoleDefined()) { + throw new InvalidUserDataException("Can't be both CONSOLE and NOTCONSOLE"); + } + snippet.withConsole(Boolean.FALSE); + return true; + } + return false; + } + + protected abstract String getTestSetupRegex(); + + protected abstract String getTeardownRegex(); + + protected abstract String getConsoleRegex(); + + protected abstract String getNotconsoleRegex(); + + protected abstract Pattern testPattern(); + + protected abstract Pattern testResponsePattern(); -public interface SnippetParser { - List parseDoc(File rootDir, File docFile, List> substitutions); } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetParserException.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetParserException.java new file mode 100644 index 000000000000..79563a97de11 --- /dev/null +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/doc/SnippetParserException.java @@ -0,0 +1,38 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.gradle.internal.doc; + +import org.gradle.api.InvalidUserDataException; + +import java.io.File; + +public class SnippetParserException extends RuntimeException { + private final File file; + private final int lineNumber; + + public SnippetParserException(String message, Throwable cause) { + super(message, cause); + this.file = null; + this.lineNumber = -1; + } + + public SnippetParserException(File file, int lineNumber, InvalidUserDataException e) { + super("Error parsing snippet in " + file.getName() + " at line " + lineNumber, e); + this.file = file; + this.lineNumber = lineNumber; + } + + public File getFile() { + return file; + } + + public int getLineNumber() { + return lineNumber; + } +} diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/AbstractSnippetParserSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/AbstractSnippetParserSpec.groovy new file mode 100644 index 000000000000..8690c738f0d9 --- /dev/null +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/AbstractSnippetParserSpec.groovy @@ -0,0 +1,191 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.gradle.internal.doc + +import spock.lang.Specification + +import org.gradle.api.InvalidUserDataException + +import java.nio.file.Path + +abstract class AbstractSnippetParserSpec extends Specification { + + abstract SnippetParser parser() + abstract String docSnippetWithTestResponses() + abstract String docSnippetWithTest() + abstract String docSnippetWithRepetitiveSubstiutions() + abstract String docSnippetWithConsole() + abstract String docSnippetWithNotConsole() + abstract String docSnippetWithMixedConsoleNotConsole() + + def "can parse snippet with console"() { + when: + def snippets = parse(docSnippetWithConsole()) + then: + snippets*.console() == [true] + } + + def "can parse snippet with notconsole"() { + when: + def snippets = parse(docSnippetWithNotConsole()) + then: + snippets*.console() == [false] + } + + def "fails on mixing console and notconsole"() { + when: + def snippets = parse(docSnippetWithMixedConsoleNotConsole()) + then: + def e = thrown(SnippetParserException) + e.message.matches("Error parsing snippet in acme.xyz at line \\d") + e.file.name == "acme.xyz" + e.lineNumber > 0 + } + + def "can parse snippet with test"() { + when: + def snippets = parse(docSnippetWithTest()) + then: + snippets*.test() == [true] + snippets*.testResponse() == [false] + snippets*.language() == ["console"] + snippets*.catchPart() == ["/painless_explain_error/"] + snippets*.teardown() == ["some_teardown"] + snippets*.setup() == ["seats"] + snippets*.warnings() == [["some_warning"]] + snippets*.contents() == ["""PUT /hockey/_doc/1?refresh +{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1]} + +POST /hockey/_explain/1?error_trace=false +{ + "query": { + "script": { + "script": "Debug.explain(doc.goals)" + } + } +} +"""] + } + + def "can parse snippet with test responses"() { + when: + def snippets = parse(docSnippetWithTestResponses()) + then: + snippets*.testResponse() == [true] + snippets*.test() == [false] + snippets*.language() == ["console-result"] + snippets*.skip() == ["some_skip_message"] + snippets*.contents() == ["""{ + "docs" : [ + { + "processor_results" : [ + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field2" : "_value2", + "foo" : "bar" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : \$body.docs.0.processor_results.0.doc._ingest.timestamp + } + } + }, + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field3" : "_value3", + "field2" : "_value2", + "foo" : "bar" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : \$body.docs.0.processor_results.0.doc._ingest.timestamp + } + } + } + ] + }, + { + "processor_results" : [ + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field2" : "_value2", + "foo" : "rab" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : \$body.docs.1.processor_results.0.doc._ingest.timestamp + } + } + }, + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field3" : "_value3", + "field2" : "_value2", + "foo" : "rab" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : \$body.docs.1.processor_results.0.doc._ingest.timestamp + } + } + } + ] + } + ] +} +"""] + } + + def "can parse snippet with repetitive regex substitutions"() { + when: + def snippets = parse(docSnippetWithRepetitiveSubstiutions()) + then: + snippets*.test() == [true] + snippets*.testResponse() == [false] + snippets*.language() == ["console"] + snippets*.contents() == ["""PUT /_snapshot/repo1 +{"type": "fs", "settings": {"location": "repo/1"}} +PUT /_snapshot/repo1/snap2?wait_for_completion=true +PUT /_snapshot/repo1/snap1?wait_for_completion=true +GET /_cat/snapshots/repo1?v=true&s=id +"""] + } + + List parse(String docSnippet) { + List snippets = new ArrayList<>() + def lines = docSnippet.lines().toList() + parser().parseLines(new File("acme.xyz"), lines, snippets) + return snippets + } + +} diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/AsciidocParserSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/AsciidocParserSpec.groovy index b7ac363ef7ad..a80215cd82f0 100644 --- a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/AsciidocParserSpec.groovy +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/AsciidocParserSpec.groovy @@ -6,17 +6,11 @@ * Side Public License, v 1. */ -package org.elasticsearch.gradle.internal.doc; +package org.elasticsearch.gradle.internal.doc -import spock.lang.Specification -import spock.lang.Unroll +import static org.elasticsearch.gradle.internal.doc.AsciidocSnippetParser.matchSource -import org.gradle.api.InvalidUserDataException - -import static org.elasticsearch.gradle.internal.doc.AsciidocSnippetParser.finalizeSnippet; -import static org.elasticsearch.gradle.internal.doc.AsciidocSnippetParser.matchSource; - -class AsciidocParserSpec extends Specification { +class AsciidocParserSpec extends AbstractSnippetParserSpec { def testMatchSource() { expect: @@ -78,107 +72,174 @@ class AsciidocParserSpec extends Specification { } } - @Unroll - def "checks for valid json for #languageParam"() { - given: - def snippet = snippet() { - language = languageParam - testResponse = true - } - def json = """{ - "name": "John Doe", - "age": 30, - "isMarried": true, - "address": { - "street": "123 Main Street", - "city": "Springfield", - "state": "IL", - "zip": "62701" - }, - "hobbies": ["Reading", "Cooking", "Traveling"] -}""" - when: - def result = finalizeSnippet(snippet, json, [:], [:].entrySet()) - then: - result != null - - when: - finalizeSnippet(snippet, "some no valid json", [:], [:].entrySet()) - then: - def e = thrown(InvalidUserDataException) - e.message.contains("Invalid json in") - - when: - snippet.skip = "true" - result = finalizeSnippet(snippet, "some no valid json", [:], [:].entrySet()) - then: - result != null - - where: - languageParam << ["js", "console-result"] + @Override + SnippetParser parser() { + return new AsciidocSnippetParser([:]); } - def "test finalized snippet handles substitutions"() { - given: - def snippet = snippet() { - language = "console" - } - when: - finalizeSnippet(snippet, "snippet-content substDefault subst", [substDefault: "\$body"], [subst: 'substValue'].entrySet()) - then: - snippet.contents == "snippet-content \$body substValue" - } + @Override + String docSnippetWithTest() { + return """[source,console] +--------------------------------------------------------- +PUT /hockey/_doc/1?refresh +{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1]} - def snippetMustHaveLanguage() { - given: - def snippet = snippet() - when: - finalizeSnippet(snippet, "snippet-content", [:], []) - then: - def e = thrown(InvalidUserDataException) - e.message.contains("Snippet missing a language.") - } - - def testEmit() { - given: - def snippet = snippet() { - language = "console" - } - when: - finalizeSnippet(snippet, "snippet-content", [:], []) - then: - snippet.contents == "snippet-content" - } - - def testSnippetsWithCurl() { - given: - def snippet = snippet() { - language = "sh" - name = "snippet-name-1" - } - when: - finalizeSnippet(snippet, "curl substDefault subst", [:], [:].entrySet()) - then: - snippet.curl == true - } - - def "test snippets with no curl no console"() { - given: - def snippet = snippet() { - console = false - language = "shell" - } - when: - finalizeSnippet(snippet, "hello substDefault subst", [:], [:].entrySet()) - then: - def e = thrown(InvalidUserDataException) - e.message.contains("No need for NOTCONSOLE if snippet doesn't contain `curl`") - } - - Snippet snippet(Closure configClosure = {}) { - def snippet = new Snippet(new File("SomePath").toPath(), 0, "snippet-name-1") - configClosure.delegate = snippet - configClosure() - return snippet +POST /hockey/_explain/1 +{ + "query": { + "script": { + "script": "Debug.explain(doc.goals)" } + } +} +--------------------------------------------------------- +// TEST[s/_explain\\/1/_explain\\/1?error_trace=false/ catch:/painless_explain_error/] +// TEST[teardown:some_teardown] +// TEST[setup:seats] +// TEST[warning:some_warning] +// TEST[skip_shard_failures] + +""" + } + + @Override + String docSnippetWithRepetitiveSubstiutions() { + return """ +[source,console] +-------------------------------------------------- +GET /_cat/snapshots/repo1?v=true&s=id +-------------------------------------------------- +// TEST[s/^/PUT \\/_snapshot\\/repo1\\/snap1?wait_for_completion=true\\n/] +// TEST[s/^/PUT \\/_snapshot\\/repo1\\/snap2?wait_for_completion=true\\n/] +// TEST[s/^/PUT \\/_snapshot\\/repo1\\n{"type": "fs", "settings": {"location": "repo\\/1"}}\\n/] +""" + } + + @Override + String docSnippetWithConsole() { + return """ +[source,console] +---- +// CONSOLE +---- +""" + } + + @Override + String docSnippetWithNotConsole() { + return """ +[source,console] +---- +// NOTCONSOLE +---- +""" + } + + @Override + String docSnippetWithMixedConsoleNotConsole() { + return """ +[source,console] +---- +// NOTCONSOLE +// CONSOLE +---- +""" + } + + @Override + String docSnippetWithTestResponses() { + return """ +[source,console-result] +---- +{ + "docs" : [ + { + "processor_results" : [ + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field2" : "_value2", + "foo" : "bar" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251836Z" + } + } + }, + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field3" : "_value3", + "field2" : "_value2", + "foo" : "bar" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251836Z" + } + } + } + ] + }, + { + "processor_results" : [ + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field2" : "_value2", + "foo" : "rab" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251863Z" + } + } + }, + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field3" : "_value3", + "field2" : "_value2", + "foo" : "rab" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251863Z" + } + } + } + ] + } + ] +} +---- +// TESTRESPONSE[s/"2020-07-30T01:21:24.251836Z"/\$body.docs.0.processor_results.0.doc._ingest.timestamp/] +// TESTRESPONSE[s/"2020-07-30T01:21:24.251836Z"/\$body.docs.0.processor_results.1.doc._ingest.timestamp/] +// TESTRESPONSE[s/"2020-07-30T01:21:24.251863Z"/\$body.docs.1.processor_results.0.doc._ingest.timestamp/] +// TESTRESPONSE[s/"2020-07-30T01:21:24.251863Z"/\$body.docs.1.processor_results.1.doc._ingest.timestamp/] +// TESTRESPONSE[skip:some_skip_message] +""" + } + } diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocSnippetTaskSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocSnippetTaskSpec.groovy index 894e6e9b51ab..2b6582bd633e 100644 --- a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocSnippetTaskSpec.groovy +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocSnippetTaskSpec.groovy @@ -10,8 +10,8 @@ package org.elasticsearch.gradle.internal.doc import spock.lang.Specification import spock.lang.TempDir +import spock.lang.Unroll -import org.gradle.api.InvalidUserDataException import org.gradle.testfixtures.ProjectBuilder import static org.elasticsearch.gradle.internal.test.TestUtils.normalizeString @@ -21,559 +21,60 @@ class DocSnippetTaskSpec extends Specification { @TempDir File tempDir - def "handling test parsing multiple snippets per file"() { - given: - def project = ProjectBuilder.builder().build() - def task = project.tasks.register("docSnippetTask", DocSnippetTask).get() + @Unroll + def "handling test parsing multiple snippets per #fileType file"() { when: - def substitutions = [] - def snippets = task.parseDocFile( - tempDir, docFile( - """ -[[mapper-annotated-text]] -=== Mapper annotated text plugin + def snippets = parseFile("example-1.$fileType") -experimental[] - -The mapper-annotated-text plugin provides the ability to index text that is a -combination of free-text and special markup that is typically used to identify -items of interest such as people or organisations (see NER or Named Entity Recognition -tools). - - -The elasticsearch markup allows one or more additional tokens to be injected, unchanged, into the token -stream at the same position as the underlying text it annotates. - -:plugin_name: mapper-annotated-text -include::install_remove.asciidoc[] - -[[mapper-annotated-text-usage]] -==== Using the `annotated-text` field - -The `annotated-text` tokenizes text content as per the more common {ref}/text.html[`text`] field (see -"limitations" below) but also injects any marked-up annotation tokens directly into -the search index: - -[source,console] --------------------------- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "my_field": { - "type": "annotated_text" - } - } - } -} --------------------------- - -Such a mapping would allow marked-up text eg wikipedia articles to be indexed as both text -and structured tokens. The annotations use a markdown-like syntax using URL encoding of -one or more values separated by the `&` symbol. - - -We can use the "_analyze" api to test how an example annotation would be stored as tokens -in the search index: - - -[source,js] --------------------------- -GET my-index-000001/_analyze -{ - "field": "my_field", - "text":"Investors in [Apple](Apple+Inc.) rejoiced." -} --------------------------- -// NOTCONSOLE - -Response: - -[source,js] --------------------------------------------------- -{ - "tokens": [ - { - "token": "investors", - "start_offset": 0, - "end_offset": 9, - "type": "", - "position": 0 - }, - { - "token": "in", - "start_offset": 10, - "end_offset": 12, - "type": "", - "position": 1 - }, - { - "token": "Apple Inc.", <1> - "start_offset": 13, - "end_offset": 18, - "type": "annotation", - "position": 2 - }, - { - "token": "apple", - "start_offset": 13, - "end_offset": 18, - "type": "", - "position": 2 - }, - { - "token": "rejoiced", - "start_offset": 19, - "end_offset": 27, - "type": "", - "position": 3 - } - ] -} --------------------------------------------------- -// NOTCONSOLE - -<1> Note the whole annotation token `Apple Inc.` is placed, unchanged as a single token in -the token stream and at the same position (position 2) as the text token (`apple`) it annotates. - - -We can now perform searches for annotations using regular `term` queries that don't tokenize -the provided search values. Annotations are a more precise way of matching as can be seen -in this example where a search for `Beck` will not match `Jeff Beck` : - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "my_field": "[Beck](Beck) announced a new tour"<1> -} - -PUT my-index-000001/_doc/2 -{ - "my_field": "[Jeff Beck](Jeff+Beck&Guitarist) plays a strat"<2> -} - -# Example search -GET my-index-000001/_search -{ - "query": { - "term": { - "my_field": "Beck" <3> - } - } -} --------------------------- - -<1> As well as tokenising the plain text into single words e.g. `beck`, here we -inject the single token value `Beck` at the same position as `beck` in the token stream. -<2> Note annotations can inject multiple tokens at the same position - here we inject both -the very specific value `Jeff Beck` and the broader term `Guitarist`. This enables -broader positional queries e.g. finding mentions of a `Guitarist` near to `strat`. -<3> A benefit of searching with these carefully defined annotation tokens is that a query for -`Beck` will not match document 2 that contains the tokens `jeff`, `beck` and `Jeff Beck` - -WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will -cause the document to be rejected with a parse failure. In future we hope to have a use for -the equals signs so wil actively reject documents that contain this today. - - -[[mapper-annotated-text-tips]] -==== Data modelling tips -===== Use structured and unstructured fields - -Annotations are normally a way of weaving structured information into unstructured text for -higher-precision search. - -`Entity resolution` is a form of document enrichment undertaken by specialist software or people -where references to entities in a document are disambiguated by attaching a canonical ID. -The ID is used to resolve any number of aliases or distinguish between people with the -same name. The hyperlinks connecting Wikipedia's articles are a good example of resolved -entity IDs woven into text. - -These IDs can be embedded as annotations in an annotated_text field but it often makes -sense to include them in dedicated structured fields to support discovery via aggregations: - -[source,console] --------------------------- -PUT my-index-000001 -{ - "mappings": { - "properties": { - "my_unstructured_text_field": { - "type": "annotated_text" - }, - "my_structured_people_field": { - "type": "text", - "fields": { - "keyword" : { - "type": "keyword" - } - } - } - } - } -} --------------------------- - -Applications would then typically provide content and discover it as follows: - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "my_unstructured_text_field": "[Shay](%40kimchy) created elasticsearch", - "my_twitter_handles": ["@kimchy"] <1> -} - -GET my-index-000001/_search -{ - "query": { - "query_string": { - "query": "elasticsearch OR logstash OR kibana",<2> - "default_field": "my_unstructured_text_field" - } - }, - "aggregations": { - \t"top_people" :{ - \t "significant_terms" : { <3> -\t "field" : "my_twitter_handles.keyword" - \t } - \t} - } -} --------------------------- - -<1> Note the `my_twitter_handles` contains a list of the annotation values -also used in the unstructured text. (Note the annotated_text syntax requires escaping). -By repeating the annotation values in a structured field this application has ensured that -the tokens discovered in the structured field can be used for search and highlighting -in the unstructured field. -<2> In this example we search for documents that talk about components of the elastic stack -<3> We use the `my_twitter_handles` field here to discover people who are significantly -associated with the elastic stack. - -===== Avoiding over-matching annotations -By design, the regular text tokens and the annotation tokens co-exist in the same indexed -field but in rare cases this can lead to some over-matching. - -The value of an annotation often denotes a _named entity_ (a person, place or company). -The tokens for these named entities are inserted untokenized, and differ from typical text -tokens because they are normally: - -* Mixed case e.g. `Madonna` -* Multiple words e.g. `Jeff Beck` -* Can have punctuation or numbers e.g. `Apple Inc.` or `@kimchy` - -This means, for the most part, a search for a named entity in the annotated text field will -not have any false positives e.g. when selecting `Apple Inc.` from an aggregation result -you can drill down to highlight uses in the text without "over matching" on any text tokens -like the word `apple` in this context: - - the apple was very juicy - -However, a problem arises if your named entity happens to be a single term and lower-case e.g. the -company `elastic`. In this case, a search on the annotated text field for the token `elastic` -may match a text document such as this: - - they fired an elastic band - -To avoid such false matches users should consider prefixing annotation values to ensure -they don't name clash with text tokens e.g. - - [elastic](Company_elastic) released version 7.0 of the elastic stack today - - - - -[[mapper-annotated-text-highlighter]] -==== Using the `annotated` highlighter - -The `annotated-text` plugin includes a custom highlighter designed to mark up search hits -in a way which is respectful of the original markup: - -[source,console] --------------------------- -# Example documents -PUT my-index-000001/_doc/1 -{ - "my_field": "The cat sat on the [mat](sku3578)" -} - -GET my-index-000001/_search -{ - "query": { - "query_string": { - "query": "cats" - } - }, - "highlight": { - "fields": { - "my_field": { - "type": "annotated", <1> - "require_field_match": false - } - } - } -} --------------------------- - -<1> The `annotated` highlighter type is designed for use with annotated_text fields - -The annotated highlighter is based on the `unified` highlighter and supports the same -settings but does not use the `pre_tags` or `post_tags` parameters. Rather than using -html-like markup such as `cat` the annotated highlighter uses the same -markdown-like syntax used for annotations and injects a key=value annotation where `_hit_term` -is the key and the matched search term is the value e.g. - - The [cat](_hit_term=cat) sat on the [mat](sku3578) - -The annotated highlighter tries to be respectful of any existing markup in the original -text: - -* If the search term matches exactly the location of an existing annotation then the -`_hit_term` key is merged into the url-like syntax used in the `(...)` part of the -existing annotation. -* However, if the search term overlaps the span of an existing annotation it would break -the markup formatting so the original annotation is removed in favour of a new annotation -with just the search hit information in the results. -* Any non-overlapping annotations in the original text are preserved in highlighter -selections - - -[[mapper-annotated-text-limitations]] -==== Limitations - -The annotated_text field type supports the same mapping settings as the `text` field type -but with the following exceptions: - -* No support for `fielddata` or `fielddata_frequency_filter` -* No support for `index_prefixes` or `index_phrases` indexing - -""" - ), substitutions - ) then: snippets*.test == [false, false, false, false, false, false, false] snippets*.catchPart == [null, null, null, null, null, null, null] - } + snippets*.setup == [null, null, null, null, null, null, null] + snippets*.teardown == [null, null, null, null, null, null, null] + snippets*.testResponse == [false, false, false, false, false, false, false] + snippets*.skip == [null, null, null, null, null, null, null] + snippets*.continued == [false, false, false, false, false, false, false] + snippets*.language == ["console", "js", "js", "console", "console", "console", "console"] + snippets*.contents*.empty == [false, false, false, false, false, false, false] + snippets*.start == expectedSnippetStarts + snippets*.end == expectedSnippetEnds - def "handling test parsing"() { - when: - def substitutions = [] - def snippets = task().parseDocFile( - tempDir, docFile( - """ -[source,console] ----- -POST logs-my_app-default/_rollover/ ----- -// TEST[s/_explain\\/1/_explain\\/1?error_trace=false/ catch:/painless_explain_error/] -""" - ), substitutions - ) - then: - snippets*.test == [true] - snippets*.catchPart == ["/painless_explain_error/"] - substitutions.size() == 1 - substitutions[0].key == "_explain\\/1" - substitutions[0].value == "_explain\\/1?error_trace=false" - - when: - substitutions = [] - snippets = task().parseDocFile( - tempDir, docFile( - """ - -[source,console] ----- -PUT _snapshot/my_hdfs_repository + // test two snippet explicitly for content. + // More coverage on actual parsing is done in unit tests + normalizeString(snippets[0].contents) == """PUT my-index-000001 { - "type": "hdfs", - "settings": { - "uri": "hdfs://namenode:8020/", - "path": "elasticsearch/repositories/my_hdfs_repository", - "conf.dfs.client.read.shortcircuit": "true" + "mappings": { + "properties": { + "my_field": { + "type": "annotated_text" + } + } } -} ----- -// TEST[skip:we don't have hdfs set up while testing this] -""" - ), substitutions - ) - then: - snippets*.test == [true] - snippets*.skip == ["we don't have hdfs set up while testing this"] - } +}""" - def "handling testresponse parsing"() { - when: - def substitutions = [] - def snippets = task().parseDocFile( - tempDir, docFile( - """ -[source,console] ----- -POST logs-my_app-default/_rollover/ ----- -// TESTRESPONSE[s/\\.\\.\\./"script_stack": \$body.error.caused_by.script_stack, "script": \$body.error.caused_by.script, "lang": \$body.error.caused_by.lang, "position": \$body.error.caused_by.position, "caused_by": \$body.error.caused_by.caused_by, "reason": \$body.error.caused_by.reason/] -""" - ), substitutions - ) - then: - snippets*.test == [false] - snippets*.testResponse == [true] - substitutions.size() == 1 - substitutions[0].key == "\\.\\.\\." - substitutions[0].value == - "\"script_stack\": \$body.error.caused_by.script_stack, \"script\": \$body.error.caused_by.script, \"lang\": \$body.error.caused_by.lang, \"position\": \$body.error.caused_by.position, \"caused_by\": \$body.error.caused_by.caused_by, \"reason\": \$body.error.caused_by.reason" - - when: - snippets = task().parseDocFile( - tempDir, docFile( - """ -[source,console] ----- -POST logs-my_app-default/_rollover/ ----- -// TESTRESPONSE[skip:no setup made for this example yet] -""" - ), [] - ) - then: - snippets*.test == [false] - snippets*.testResponse == [true] - snippets*.skip == ["no setup made for this example yet"] - - when: - substitutions = [] - snippets = task().parseDocFile( - tempDir, docFile( - """ -[source,txt] ---------------------------------------------------------------------------- -my-index-000001 0 p RELOCATING 3014 31.1mb 192.168.56.10 H5dfFeA -> -> 192.168.56.30 bGG90GE ---------------------------------------------------------------------------- -// TESTRESPONSE[non_json] -""" - ), substitutions - ) - then: - snippets*.test == [false] - snippets*.testResponse == [true] - substitutions.size() == 4 - } - - - def "handling console parsing"() { - when: - def snippets = task().parseDocFile( - tempDir, docFile( - """ -[source,console] ----- - -// $firstToken ----- -""" - ), [] - ) - then: - snippets*.console == [firstToken.equals("CONSOLE")] - - - when: - task().parseDocFile( - tempDir, docFile( - """ -[source,console] ----- -// $firstToken -// $secondToken ----- -""" - ), [] - ) - then: - def e = thrown(InvalidUserDataException) - e.message == "mapping-charfilter.asciidoc:4: Can't be both CONSOLE and NOTCONSOLE" - - when: - task().parseDocFile( - tempDir, docFile( - """ -// $firstToken -// $secondToken -""" - ), [] - ) - then: - e = thrown(InvalidUserDataException) - e.message == "mapping-charfilter.asciidoc:1: $firstToken not paired with a snippet" + normalizeString(snippets[1].contents) == """GET my-index-000001/_analyze +{ + "field": "my_field", + "text":"Investors in [Apple](Apple+Inc.) rejoiced." +}""" where: - firstToken << ["CONSOLE", "NOTCONSOLE"] - secondToken << ["NOTCONSOLE", "CONSOLE"] + fileType << ["asciidoc", "mdx"] + expectedSnippetStarts << [[10, 24, 36, 59, 86, 108, 135], [9, 22, 33, 55, 80, 101, 127]] + expectedSnippetEnds << [[21, 30, 55, 75, 105, 132, 158], [20, 28, 52, 71, 99, 125, 150]] } - def "test parsing snippet from doc"() { - def doc = docFile( - """ -[source,console] ----- -GET /_analyze -{ - "tokenizer": "keyword", - "char_filter": [ - { - "type": "mapping", - "mappings": [ - "e => 0", - "m => 1", - "p => 2", - "t => 3", - "y => 4" - ] - } - ], - "text": "My license plate is empty" -} ----- -""" + List parseFile(String fileName) { + def task = ProjectBuilder.builder().build().tasks.register("docSnippetTask", DocSnippetTask).get() + def docFileToParse = docFile(fileName, DocTestUtils.SAMPLE_TEST_DOCS[fileName]) + return task.parseDocFile( + tempDir, docFileToParse ) - def snippets = task().parseDocFile(tempDir, doc, []) - expect: - snippets[0].start == 3 - snippets[0].language == "console" - normalizeString(snippets[0].contents, tempDir) == """GET /_analyze -{ - "tokenizer": "keyword", - "char_filter": [ - { - "type": "mapping", - "mappings": [ - "e => 0", - "m => 1", - "p => 2", - "t => 3", - "y => 4" - ] - } - ], - "text": "My license plate is empty" -}""" } - File docFile(String docContent) { - def file = tempDir.toPath().resolve("mapping-charfilter.asciidoc").toFile() + File docFile(String filename, String docContent) { + def file = tempDir.toPath().resolve(filename).toFile() file.text = docContent return file } - - - private DocSnippetTask task() { - ProjectBuilder.builder().build().tasks.register("docSnippetTask", DocSnippetTask).get() - } - } diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocTestUtils.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocTestUtils.groovy new file mode 100644 index 000000000000..350d8638c800 --- /dev/null +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocTestUtils.groovy @@ -0,0 +1,745 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.gradle.internal.doc; + +class DocTestUtils { + public static Map SAMPLE_TEST_DOCS = Map.of( + "example-1.mdx", """ +# mapper-annotated-text +### Mapper annotated text plugin + +experimental[] + +some text + +```console +PUT my-index-000001 +{ + "mappings": { + "properties": { + "my_field": { + "type": "annotated_text" + } + } + } +} +``` + +```js +GET my-index-000001/_analyze +{ + "field": "my_field", + "text":"Investors in [Apple](Apple+Inc.) rejoiced." +} +``` +{/* NOTCONSOLE */} + +Response: + +```js +{ + "tokens": [ + { + "token": "investors", + "start_offset": 0, + "end_offset": 9, + "type": "", + "position": 0 + }, + { + "token": "in", + "start_offset": 10, + "end_offset": 12, + "type": "", + "position": 1 + } + ] +} +``` +{/* NOTCONSOLE */} + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_field": "[Jeff Beck](Jeff+Beck&Guitarist) plays a strat"<2> +} + +# Example search +GET my-index-000001/_search +{ + "query": { + "term": { + "my_field": "Beck" <3> + } + } +} +``` + +<1> More text +<2> Even More +<3> More + +### Headline +#### a smaller headline + +```console +PUT my-index-000001 +{ + "mappings": { + "properties": { + "my_unstructured_text_field": { + "type": "annotated_text" + }, + "my_structured_people_field": { + "type": "text", + "fields": { + "keyword" : { + "type": "keyword" + } + } + } + } + } +} +``` + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_unstructured_text_field": "[Shay](%40kimchy) created elasticsearch", + "my_twitter_handles": ["@kimchy"] <1> +} + +GET my-index-000001/_search +{ + "query": { + "query_string": { + "query": "elasticsearch OR logstash OR kibana",<2> + "default_field": "my_unstructured_text_field" + } + }, + "aggregations": { + \t"top_people" :{ + \t "significant_terms" : { <3> +\t "field" : "my_twitter_handles.keyword" + \t } + \t} + } +} +``` + +```console +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_field": "The cat sat on the [mat](sku3578)" +} + +GET my-index-000001/_search +{ + "query": { + "query_string": { + "query": "cats" + } + }, + "highlight": { + "fields": { + "my_field": { + "type": "annotated", <1> + "require_field_match": false + } + } + } +} +``` + +* No support for `fielddata` or `fielddata_frequency_filter` +* No support for `index_prefixes` or `index_phrases` indexing + +""", + + "example-1.asciidoc", """ +[[mapper-annotated-text]] +=== Mapper annotated text plugin + +experimental[] + +some text + +[source,console] +-------------------------- +PUT my-index-000001 +{ + "mappings": { + "properties": { + "my_field": { + "type": "annotated_text" + } + } + } +} +-------------------------- + +[source,js] +-------------------------- +GET my-index-000001/_analyze +{ + "field": "my_field", + "text":"Investors in [Apple](Apple+Inc.) rejoiced." +} +-------------------------- +// NOTCONSOLE + +Response: + +[source,js] +-------------------------------------------------- +{ + "tokens": [ + { + "token": "investors", + "start_offset": 0, + "end_offset": 9, + "type": "", + "position": 0 + }, + { + "token": "in", + "start_offset": 10, + "end_offset": 12, + "type": "", + "position": 1 + } + ] +} +-------------------------------------------------- +// NOTCONSOLE + +[source,console] +-------------------------- +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_field": "[Jeff Beck](Jeff+Beck&Guitarist) plays a strat"<2> +} + +# Example search +GET my-index-000001/_search +{ + "query": { + "term": { + "my_field": "Beck" <3> + } + } +} +-------------------------- + +<1> More text +<2> Even More +<3> More + +[[mapper-annotated-text-tips]] +==== Headline +===== a smaller headline + +[source,console] +-------------------------- +PUT my-index-000001 +{ + "mappings": { + "properties": { + "my_unstructured_text_field": { + "type": "annotated_text" + }, + "my_structured_people_field": { + "type": "text", + "fields": { + "keyword" : { + "type": "keyword" + } + } + } + } + } +} +-------------------------- + +[source,console] +-------------------------- +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_unstructured_text_field": "[Shay](%40kimchy) created elasticsearch", + "my_twitter_handles": ["@kimchy"] <1> +} + +GET my-index-000001/_search +{ + "query": { + "query_string": { + "query": "elasticsearch OR logstash OR kibana",<2> + "default_field": "my_unstructured_text_field" + } + }, + "aggregations": { + \t"top_people" :{ + \t "significant_terms" : { <3> +\t "field" : "my_twitter_handles.keyword" + \t } + \t} + } +} +-------------------------- + +[source,console] +-------------------------- +# Example documents +PUT my-index-000001/_doc/1 +{ + "my_field": "The cat sat on the [mat](sku3578)" +} + +GET my-index-000001/_search +{ + "query": { + "query_string": { + "query": "cats" + } + }, + "highlight": { + "fields": { + "my_field": { + "type": "annotated", <1> + "require_field_match": false + } + } + } +} +-------------------------- + +* No support for `fielddata` or `fielddata_frequency_filter` +* No support for `index_prefixes` or `index_phrases` indexing + +""", + + + "example-2.asciidoc", """ +[[example-2]] +=== Field context + +Use a Painless script to create a +{ref}/search-fields.html#script-fields[script field] to return +a customized value for each document in the results of a query. + +*Variables* + +`params` (`Map`, read-only):: + User-defined parameters passed in as part of the query. + +`doc` (`Map`, read-only):: + Contains the fields of the specified document where each field is a + `List` of values. + +{ref}/mapping-source-field.html[`params['_source']`] (`Map`, read-only):: + Contains extracted JSON in a `Map` and `List` structure for the fields + existing in a stored document. + +*Return* + +`Object`:: + The customized value for each document. + +*API* + +Both the standard <> and +<> are available. + + +*Example* + +To run this example, first follow the steps in +<>. + +You can then use these two example scripts to compute custom information +for each search hit and output it to two new fields. + +The first script gets the doc value for the `datetime` field and calls +the `getDayOfWeekEnum` function to determine the corresponding day of the week. + +[source,Painless] +---- +doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ROOT) +---- + +The second script calculates the number of actors. Actors' names are stored +as a keyword array in the `actors` field. + +[source,Painless] +---- +doc['actors'].size() <1> +---- + +<1> By default, doc values are not available for `text` fields. If `actors` was +a `text` field, you could still calculate the number of actors by extracting +values from `_source` with `params['_source']['actors'].size()`. + +The following request returns the calculated day of week and the number of +actors that appear in each play: + +[source,console] +---- +GET seats/_search +{ + "size": 2, + "query": { + "match_all": {} + }, + "script_fields": { + "day-of-week": { + "script": { + "source": "doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ROOT)" + } + }, + "number-of-actors": { + "script": { + "source": "doc['actors'].size()" + } + } + } +} +---- +// TEST[setup:seats] + +[source,console-result] +---- +{ + "took" : 68, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 11, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "seats", + "_id" : "1", + "_score" : 1.0, + "fields" : { + "day-of-week" : [ + "Thursday" + ], + "number-of-actors" : [ + 4 + ] + } + }, + { + "_index" : "seats", + "_id" : "2", + "_score" : 1.0, + "fields" : { + "day-of-week" : [ + "Thursday" + ], + "number-of-actors" : [ + 1 + ] + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took" : 68/"took" : "\$body.took"/] +""", + "example-2.mdx", """--- +id: enElasticsearchPainlessPainlessFieldContext +slug: /en/elasticsearch/painless/example-2 +title: Field context +description: Description to be written +tags: [] +--- + +
+ +Use a Painless script to create a +[script field](((ref))/search-fields.html#script-fields) to return +a customized value for each document in the results of a query. + +**Variables** + +`params` (`Map`, read-only) + : User-defined parameters passed in as part of the query. + +`doc` (`Map`, read-only) + : Contains the fields of the specified document where each field is a + `List` of values. + +[`params['_source']`](((ref))/mapping-source-field.html) (`Map`, read-only) + : Contains extracted JSON in a `Map` and `List` structure for the fields + existing in a stored document. + +**Return** + +`Object` + : The customized value for each document. + +**API** + +Both the standard Painless API and +Specialized Field API are available. + +**Example** + +To run this example, first follow the steps in +context examples. + +You can then use these two example scripts to compute custom information +for each search hit and output it to two new fields. + +The first script gets the doc value for the `datetime` field and calls +the `getDayOfWeekEnum` function to determine the corresponding day of the week. + +```Painless +doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ROOT) +``` + +The second script calculates the number of actors. Actors' names are stored +as a keyword array in the `actors` field. + +```Painless +doc['actors'].size() [^1] +``` +[^1]: By default, doc values are not available for `text` fields. If `actors` was +a `text` field, you could still calculate the number of actors by extracting +values from `_source` with `params['_source']['actors'].size()`. + +The following request returns the calculated day of week and the number of +actors that appear in each play: + +```console +GET seats/_search +{ + "size": 2, + "query": { + "match_all": {} + }, + "script_fields": { + "day-of-week": { + "script": { + "source": "doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ROOT)" + } + }, + "number-of-actors": { + "script": { + "source": "doc['actors'].size()" + } + } + } +} +``` +{/* TEST[setup:seats] */} + +```console-result +{ + "took" : 68, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 11, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "seats", + "_id" : "1", + "_score" : 1.0, + "fields" : { + "day-of-week" : [ + "Thursday" + ], + "number-of-actors" : [ + 4 + ] + } + }, + { + "_index" : "seats", + "_id" : "2", + "_score" : 1.0, + "fields" : { + "day-of-week" : [ + "Thursday" + ], + "number-of-actors" : [ + 1 + ] + } + } + ] + } +} +``` +{/* TESTRESPONSE[s/"took" : 68/"took" : "\$body.took"/] */} +""", + "example-2-different.mdx", """--- +id: enElasticsearchPainlessPainlessFieldContext +slug: /en/elasticsearch/painless/example-2 +title: Field context +description: Description to be written +tags: [] +--- + +
+ +Use a Painless script to create a +[script field](((ref))/search-fields.html#script-fields) to return +a customized value for each document in the results of a query. + +**Variables** + +`params` (`Map`, read-only) + : User-defined parameters passed in as part of the query. + +`doc` (`Map`, read-only) + : Contains the fields of the specified document where each field is a + `List` of values. + +[`params['_source']`](((ref))/mapping-source-field.html) (`Map`, read-only) + : Contains extracted JSON in a `Map` and `List` structure for the fields + existing in a stored document. + +**Return** + +`Object` + : The customized value for each document. + +**API** + +Both the standard Painless API and +Specialized Field API are available. + +**Example** + +To run this example, first follow the steps in +context examples. + +You can then use these two example scripts to compute custom information +for each search hit and output it to two new fields. + +The first script gets the doc value for the `datetime` field and calls +the `getDayOfWeekEnum` function to determine the corresponding day of the week. + +```Painless +doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ROOT) +``` + +The second script calculates the number of actors. Actors' names are stored +as a keyword array in the `actors` field. + +```Painless +doc['actresses'].size() [^1] +``` +[^1]: By default, doc values are not available for `text` fields. If `actors` was +a `text` field, you could still calculate the number of actors by extracting +values from `_source` with `params['_source']['actors'].size()`. + +The following request returns the calculated day of week and the number of +actors that appear in each play: + +```console +GET seats/_search +{ + "size": 2, + "query": { + "match_all": {} + }, + "script_fields": { + "day-of-week": { + "script": { + "source": "doc['datetime'].value.getDayOfWeekEnum().getDisplayName(TextStyle.FULL, Locale.ROOT)" + } + }, + "number-of-actors": { + "script": { + "source": "doc['actors'].size()" + } + } + } +} +``` +{/* TEST[setup:seats] */} + +```console-result +{ + "took" : 68, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 11, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "seats", + "_id" : "1", + "_score" : 1.0, + "fields" : { + "day-of-week" : [ + "Thursday" + ], + "number-of-actors" : [ + 4 + ] + } + }, + { + "_index" : "seats", + "_id" : "2", + "_score" : 1.0, + "fields" : { + "day-of-week" : [ + "Thursday" + ], + "number-of-actors" : [ + 1 + ] + } + } + ] + } +} +``` +{/* TESTRESPONSE[s/"took" : 68/"took" : "\$body.took"/] */} +""") +} diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/MdxSnippetParserSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/MdxSnippetParserSpec.groovy new file mode 100644 index 000000000000..020b920de3d0 --- /dev/null +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/MdxSnippetParserSpec.groovy @@ -0,0 +1,173 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.gradle.internal.doc + +class MdxSnippetParserSpec extends AbstractSnippetParserSpec { + + @Override + SnippetParser parser(Map defaultSubstitutions = [:]) { + return new MdxSnippetParser(defaultSubstitutions) + } + + @Override + String docSnippetWithTest() { + return """```console +PUT /hockey/_doc/1?refresh +{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1]} + +POST /hockey/_explain/1 +{ + "query": { + "script": { + "script": "Debug.explain(doc.goals)" + } + } +} +``` +{/* TEST[s/_explain\\/1/_explain\\/1?error_trace=false/ catch:/painless_explain_error/] */} +{/* TEST[teardown:some_teardown] */} +{/* TEST[setup:seats] */} +{/* TEST[warning:some_warning] */} +{/* TEST[skip_shard_failures] */} + +""" + } + + @Override + String docSnippetWithRepetitiveSubstiutions() { + return """```console +GET /_cat/snapshots/repo1?v=true&s=id +``` +{/* TEST[s/^/PUT \\/_snapshot\\/repo1\\/snap1?wait_for_completion=true\\n/] */} +{/* TEST[s/^/PUT \\/_snapshot\\/repo1\\/snap2?wait_for_completion=true\\n/] */} +{/* TEST[s/^/PUT \\/_snapshot\\/repo1\\n{"type": "fs", "settings": {"location": "repo\\/1"}}\\n/] */} +""" + } + + @Override + String docSnippetWithConsole() { + return """ +```console +{/* CONSOLE */} +``` +""" + } + + @Override + String docSnippetWithNotConsole() { + return """ +```console +{/* NOTCONSOLE */} +``` +""" } + + @Override + String docSnippetWithMixedConsoleNotConsole() { + return """ +```console +{/* CONSOLE */} +{/* NOTCONSOLE */} +``` +""" } + + @Override + String docSnippetWithTestResponses() { + return """```console-result +{ + "docs" : [ + { + "processor_results" : [ + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field2" : "_value2", + "foo" : "bar" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251836Z" + } + } + }, + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field3" : "_value3", + "field2" : "_value2", + "foo" : "bar" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251836Z" + } + } + } + ] + }, + { + "processor_results" : [ + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field2" : "_value2", + "foo" : "rab" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251863Z" + } + } + }, + { + "processor_type" : "set", + "status" : "success", + "doc" : { + "_index" : "index", + "_id" : "id", + "_version": "-3", + "_source" : { + "field3" : "_value3", + "field2" : "_value2", + "foo" : "rab" + }, + "_ingest" : { + "pipeline" : "_simulate_pipeline", + "timestamp" : "2020-07-30T01:21:24.251863Z" + } + } + } + ] + } + ] +} +``` +{/* TESTRESPONSE[s/"2020-07-30T01:21:24.251836Z"/\$body.docs.0.processor_results.0.doc._ingest.timestamp/] */} +{/* TESTRESPONSE[s/"2020-07-30T01:21:24.251836Z"/\$body.docs.0.processor_results.1.doc._ingest.timestamp/] */} +{/* TESTRESPONSE[s/"2020-07-30T01:21:24.251863Z"/\$body.docs.1.processor_results.0.doc._ingest.timestamp/] */} +{/* TESTRESPONSE[s/"2020-07-30T01:21:24.251863Z"/\$body.docs.1.processor_results.1.doc._ingest.timestamp/] */} +{/* TESTRESPONSE[skip:some_skip_message] */} +""" + } + +} diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTaskSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTaskSpec.groovy index 45d389212195..dde1931afaa4 100644 --- a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTaskSpec.groovy +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTaskSpec.groovy @@ -11,9 +11,11 @@ package org.elasticsearch.gradle.internal.doc import spock.lang.Specification import spock.lang.TempDir +import org.gradle.api.GradleException import org.gradle.api.InvalidUserDataException import org.gradle.testfixtures.ProjectBuilder +import static org.elasticsearch.gradle.internal.doc.DocTestUtils.SAMPLE_TEST_DOCS import static org.elasticsearch.gradle.internal.doc.RestTestsFromDocSnippetTask.replaceBlockQuote import static org.elasticsearch.gradle.internal.doc.RestTestsFromDocSnippetTask.shouldAddShardFailureCheck import static org.elasticsearch.gradle.internal.test.TestUtils.normalizeString @@ -56,303 +58,139 @@ class RestTestsFromDocSnippetTaskSpec extends Specification { shouldAddShardFailureCheck("_ml/datafeeds/datafeed-id/_preview") == false } - def "can create rest tests from docs"() { - def build = ProjectBuilder.builder().build() + def "can generate tests files from asciidoc and mdx"() { given: - def task = build.tasks.create("restTestFromSnippet", RestTestsFromDocSnippetTask) - task.expectedUnconvertedCandidates = ["ml-update-snapshot.asciidoc", "reference/security/authorization/run-as-privilege.asciidoc"] - docs() + def build = ProjectBuilder.builder().build() + def task = build.tasks.register("restTestFromSnippet", RestTestsFromDocSnippetTask).get() + task.expectedUnconvertedCandidates = [] task.docs = build.fileTree(new File(tempDir, "docs")) task.testRoot.convention(build.getLayout().buildDirectory.dir("rest-tests")); - + docFile('docs/example-2-asciidoc.asciidoc', SAMPLE_TEST_DOCS['example-2.asciidoc']) + docFile('docs/example-2-mdx.mdx', SAMPLE_TEST_DOCS['example-2.mdx']) + task.getSetups().put( + "seats", """ +''' + - do: + indices.create: + index: seats + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + theatre: + type: keyword +""" + ) when: task.getActions().forEach { it.execute(task) } - def restSpec = new File(task.getTestRoot().get().getAsFile(), "rest-api-spec/test/painless-debugging.yml") then: - restSpec.exists() - normalizeString(restSpec.text, tempDir) == """--- -"line_22": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings + def restSpecFromAsciidoc = new File(task.getTestRoot().get().getAsFile(), "rest-api-spec/test/example-2-asciidoc.yml") + def restSpecFromMdx = new File(task.getTestRoot().get().getAsFile(), "rest-api-spec/test/example-2-mdx.yml") + normalizeRestSpec(restSpecFromAsciidoc.text) == normalizeRestSpec(restSpecFromMdx.text) + } + + def "task fails on same doc source file with supported different extension"() { + given: + def build = ProjectBuilder.builder().build() + def task = build.tasks.register("restTestFromSnippet", RestTestsFromDocSnippetTask).get() + task.expectedUnconvertedCandidates = [] + task.docs = build.fileTree(new File(tempDir, "docs")) + task.testRoot.convention(build.getLayout().buildDirectory.dir("rest-tests")); + docFile('docs/example-2.asciidoc', SAMPLE_TEST_DOCS['example-2.asciidoc']) + docFile('docs/example-2.mdx', SAMPLE_TEST_DOCS['example-2.mdx']) + task.getSetups().put( + "seats", """ +''' - do: - raw: - method: PUT - path: "hockey/_doc/1" - refresh: "" - body: | - {"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1]} - - is_false: _shards.failures + indices.create: + index: seats + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + theatre: + type: keyword +""" + ) + when: + task.getActions().forEach { it.execute(task) } + + then: + def e = thrown(GradleException) + e.message == "Found multiple files with the same name 'example-2' but different extensions: [asciidoc, mdx]" + } + + def "can run in migration mode to compare same doc source file with supported different extension"() { + given: + def build = ProjectBuilder.builder().build() + def task = build.tasks.register("restTestFromSnippet", RestTestsFromDocSnippetTask).get() + task.expectedUnconvertedCandidates = [] + task.migrationMode = true + task.docs = build.fileTree(new File(tempDir, "docs")) + task.testRoot.convention(build.getLayout().buildDirectory.dir("rest-tests")); + docFile('docs/example-2.asciidoc', SAMPLE_TEST_DOCS['example-2.asciidoc']) + docFile('docs/example-2.mdx', SAMPLE_TEST_DOCS['example-2.mdx']) + task.getSetups().put( + "seats", """ +''' - do: - catch: /painless_explain_error/ - raw: - method: POST - path: "hockey/_explain/1" - error_trace: "false" - body: | - { - "query": { - "script": { - "script": "Debug.explain(doc.goals)" - } - } - } - - is_false: _shards.failures - - match: - \$body: - { - "error": { - "type": "script_exception", - "to_string": "[1, 9, 27]", - "painless_class": "org.elasticsearch.index.fielddata.ScriptDocValues.Longs", - "java_class": "org.elasticsearch.index.fielddata.ScriptDocValues\$Longs", - "script_stack": \$body.error.script_stack, "script": \$body.error.script, "lang": \$body.error.lang, "position": \$body.error.position, "caused_by": \$body.error.caused_by, "root_cause": \$body.error.root_cause, "reason": \$body.error.reason - }, - "status": 400 - } + indices.create: + index: seats + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + theatre: + type: keyword +""" + ) + when: + task.getActions().forEach { it.execute(task) } + + then: + new File(task.getTestRoot().get().getAsFile(), "rest-api-spec/test/example-2.asciidoc.yml").exists() + new File(task.getTestRoot().get().getAsFile(), "rest-api-spec/test/example-2.mdx.yml").exists() + } + + def "fails in migration mode for same doc source file with different extension generates different spec"() { + given: + def build = ProjectBuilder.builder().build() + def task = build.tasks.register("restTestFromSnippet", RestTestsFromDocSnippetTask).get() + task.getMigrationMode().set(true) + task.docs = build.fileTree(new File(tempDir, "docs")) + task.testRoot.convention(build.getLayout().buildDirectory.dir("rest-tests")); + docFile('docs/example-2.asciidoc', SAMPLE_TEST_DOCS['example-2.asciidoc']) + docFile('docs/example-2.mdx', SAMPLE_TEST_DOCS['example-2-different.mdx']) + task.getSetups().put( + "seats", """ +''' - do: - catch: /painless_explain_error/ - raw: - method: POST - path: "hockey/_update/1" - error_trace: "false" - body: | - { - "script": "Debug.explain(ctx._source)" - } - - is_false: _shards.failures - - match: - \$body: - { - "error" : { - "root_cause": \$body.error.root_cause, - "type": "illegal_argument_exception", - "reason": "failed to execute script", - "caused_by": { - "type": "script_exception", - "to_string": \$body.error.caused_by.to_string, - "painless_class": "java.util.LinkedHashMap", - "java_class": "java.util.LinkedHashMap", - "script_stack": \$body.error.caused_by.script_stack, "script": \$body.error.caused_by.script, "lang": \$body.error.caused_by.lang, "position": \$body.error.caused_by.position, "caused_by": \$body.error.caused_by.caused_by, "reason": \$body.error.caused_by.reason - } - }, - "status": 400 - }""" - def restSpec2 = new File(task.testRoot.get().getAsFile(), "rest-api-spec/test/ml-update-snapshot.yml") - restSpec2.exists() - normalizeString(restSpec2.text, tempDir) == """--- -"line_50": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings - - always_skip - reason: todo - - do: - raw: - method: POST - path: "_ml/anomaly_detectors/it_ops_new_logs/model_snapshots/1491852978/_update" - body: | - { - "description": "Snapshot 1", - "retain": true - } - - is_false: _shards.failures""" - def restSpec3 = new File(task.testRoot.get().getAsFile(), "rest-api-spec/test/reference/sql/getting-started.yml") - restSpec3.exists() - normalizeString(restSpec3.text, tempDir) == """--- -"line_10": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings - - do: - raw: - method: PUT - path: "library/_bulk" - refresh: "" - body: | - {"index":{"_id": "Leviathan Wakes"}} - {"name": "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561} - {"index":{"_id": "Hyperion"}} - {"name": "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482} - {"index":{"_id": "Dune"}} - {"name": "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604} - - is_false: _shards.failures - - do: - raw: - method: POST - path: "_sql" - format: "txt" - body: | - { - "query": "SELECT * FROM library WHERE release_date < '2000-01-01'" - } - - is_false: _shards.failures - - match: - \$body: - / /s+author /s+/| /s+name /s+/| /s+page_count /s+/| /s+release_date/s* - ---------------/+---------------/+---------------/+------------------------/s* - Dan /s+Simmons /s+/|Hyperion /s+/|482 /s+/|1989-05-26T00:00:00.000Z/s* - Frank /s+Herbert /s+/|Dune /s+/|604 /s+/|1965-06-01T00:00:00.000Z/s*/""" - def restSpec4 = new File(task.testRoot.get().getAsFile(), "rest-api-spec/test/reference/security/authorization/run-as-privilege.yml") - restSpec4.exists() - normalizeString(restSpec4.text, tempDir) == """--- -"line_51": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings - - do: - raw: - method: POST - path: "_security/role/my_director" - refresh: "true" - body: | - { - "cluster": ["manage"], - "indices": [ - { - "names": [ "index1", "index2" ], - "privileges": [ "manage" ] - } - ], - "run_as": [ "jacknich", "rdeniro" ], - "metadata" : { - "version" : 1 - } - } - - is_false: _shards.failures ---- -"line_114": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings - - do: - raw: - method: POST - path: "_security/role/my_admin_role" - refresh: "true" - body: | - { - "cluster": ["manage"], - "indices": [ - { - "names": [ "index1", "index2" ], - "privileges": [ "manage" ] - } - ], - "applications": [ - { - "application": "myapp", - "privileges": [ "admin", "read" ], - "resources": [ "*" ] - } - ], - "run_as": [ "analyst_user" ], - "metadata" : { - "version" : 1 - } - } - - is_false: _shards.failures ---- -"line_143": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings - - do: - raw: - method: POST - path: "_security/role/my_analyst_role" - refresh: "true" - body: | - { - "cluster": [ "monitor"], - "indices": [ - { - "names": [ "index1", "index2" ], - "privileges": ["manage"] - } - ], - "applications": [ - { - "application": "myapp", - "privileges": [ "read" ], - "resources": [ "*" ] - } - ], - "metadata" : { - "version" : 1 - } - } - - is_false: _shards.failures ---- -"line_170": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings - - do: - raw: - method: POST - path: "_security/user/admin_user" - refresh: "true" - body: | - { - "password": "l0ng-r4nd0m-p@ssw0rd", - "roles": [ "my_admin_role" ], - "full_name": "Eirian Zola", - "metadata": { "intelligence" : 7} - } - - is_false: _shards.failures ---- -"line_184": - - skip: - features: - - default_shards - - stash_in_key - - stash_in_path - - stash_path_replace - - warnings - - do: - raw: - method: POST - path: "_security/user/analyst_user" - refresh: "true" - body: | - { - "password": "l0nger-r4nd0mer-p@ssw0rd", - "roles": [ "my_analyst_role" ], - "full_name": "Monday Jaffe", - "metadata": { "innovation" : 8} - } - - is_false: _shards.failures""" -} + indices.create: + index: seats + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + theatre: + type: keyword +""" + ) + when: + task.getActions().forEach { it.execute(task) } + + then: + new File(task.getTestRoot().get().getAsFile(), "rest-api-spec/test/example-2.asciidoc.yml").exists() + new File(task.getTestRoot().get().getAsFile(), "rest-api-spec/test/example-2.mdx.yml").exists() + } File docFile(String fileName, String docContent) { def file = tempDir.toPath().resolve(fileName).toFile() @@ -361,473 +199,8 @@ class RestTestsFromDocSnippetTaskSpec extends Specification { return file } - - void docs() { - docFile( - "docs/reference/sql/getting-started.asciidoc", """ -[role="xpack"] -[[sql-getting-started]] -== Getting Started with SQL - -To start using {es-sql}, create -an index with some data to experiment with: - -[source,console] --------------------------------------------------- -PUT /library/_bulk?refresh -{"index":{"_id": "Leviathan Wakes"}} -{"name": "Leviathan Wakes", "author": "James S.A. Corey", "release_date": "2011-06-02", "page_count": 561} -{"index":{"_id": "Hyperion"}} -{"name": "Hyperion", "author": "Dan Simmons", "release_date": "1989-05-26", "page_count": 482} -{"index":{"_id": "Dune"}} -{"name": "Dune", "author": "Frank Herbert", "release_date": "1965-06-01", "page_count": 604} --------------------------------------------------- - -And now you can execute SQL using the <>: - -[source,console] --------------------------------------------------- -POST /_sql?format=txt -{ - "query": "SELECT * FROM library WHERE release_date < '2000-01-01'" -} --------------------------------------------------- -// TEST[continued] - -Which should return something along the lines of: - -[source,text] --------------------------------------------------- - author | name | page_count | release_date ----------------+---------------+---------------+------------------------ -Dan Simmons |Hyperion |482 |1989-05-26T00:00:00.000Z -Frank Herbert |Dune |604 |1965-06-01T00:00:00.000Z --------------------------------------------------- -// TESTRESPONSE[s/\\|/\\\\|/ s/\\+/\\\\+/] -// TESTRESPONSE[non_json] - -You can also use the <>. There is a script to start it -shipped in x-pack's bin directory: - -[source,bash] --------------------------------------------------- -\$ ./bin/elasticsearch-sql-cli --------------------------------------------------- - -From there you can run the same query: - -[source,sqlcli] --------------------------------------------------- -sql> SELECT * FROM library WHERE release_date < '2000-01-01'; - author | name | page_count | release_date ----------------+---------------+---------------+------------------------ -Dan Simmons |Hyperion |482 |1989-05-26T00:00:00.000Z -Frank Herbert |Dune |604 |1965-06-01T00:00:00.000Z --------------------------------------------------- -""" - ) - docFile( - "docs/ml-update-snapshot.asciidoc", - """ -[role="xpack"] -[[ml-update-snapshot]] -= Update model snapshots API -++++ -Update model snapshots -++++ - -Updates certain properties of a snapshot. - -[[ml-update-snapshot-request]] -== {api-request-title} - -`POST _ml/anomaly_detectors//model_snapshots//_update` - -[[ml-update-snapshot-prereqs]] -== {api-prereq-title} - -Requires the `manage_ml` cluster privilege. This privilege is included in the -`machine_learning_admin` built-in role. - -[[ml-update-snapshot-path-parms]] -== {api-path-parms-title} - -``:: -(Required, string) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] - -``:: -(Required, string) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=snapshot-id] - -[[ml-update-snapshot-request-body]] -== {api-request-body-title} - -The following properties can be updated after the model snapshot is created: - -`description`:: -(Optional, string) A description of the model snapshot. - -`retain`:: -(Optional, Boolean) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=retain] - - -[[ml-update-snapshot-example]] -== {api-examples-title} - -[source,console] --------------------------------------------------- -POST -_ml/anomaly_detectors/it_ops_new_logs/model_snapshots/1491852978/_update -{ - "description": "Snapshot 1", - "retain": true -} --------------------------------------------------- -// TEST[skip:todo] - -When the snapshot is updated, you receive the following results: -[source,js] ----- -{ - "acknowledged": true, - "model": { - "job_id": "it_ops_new_logs", - "timestamp": 1491852978000, - "description": "Snapshot 1", -... - "retain": true - } -} ----- - -""" - ) - - docFile( - "docs/painless-debugging.asciidoc", - """ - -[[painless-debugging]] -=== Painless Debugging - -==== Debug.Explain - -Painless doesn't have a -{wikipedia}/Read%E2%80%93eval%E2%80%93print_loop[REPL] -and while it'd be nice for it to have one day, it wouldn't tell you the -whole story around debugging painless scripts embedded in Elasticsearch because -the data that the scripts have access to or "context" is so important. For now -the best way to debug embedded scripts is by throwing exceptions at choice -places. While you can throw your own exceptions -(`throw new Exception('whatever')`), Painless's sandbox prevents you from -accessing useful information like the type of an object. So Painless has a -utility method, `Debug.explain` which throws the exception for you. For -example, you can use {ref}/search-explain.html[`_explain`] to explore the -context available to a {ref}/query-dsl-script-query.html[script query]. - -[source,console] ---------------------------------------------------------- -PUT /hockey/_doc/1?refresh -{"first":"johnny","last":"gaudreau","goals":[9,27,1],"assists":[17,46,0],"gp":[26,82,1]} - -POST /hockey/_explain/1 -{ - "query": { - "script": { - "script": "Debug.explain(doc.goals)" - } - } -} ---------------------------------------------------------- -// TEST[s/_explain\\/1/_explain\\/1?error_trace=false/ catch:/painless_explain_error/] -// The test system sends error_trace=true by default for easier debugging so -// we have to override it to get a normal shaped response - -Which shows that the class of `doc.first` is -`org.elasticsearch.index.fielddata.ScriptDocValues.Longs` by responding with: - -[source,console-result] ---------------------------------------------------------- -{ - "error": { - "type": "script_exception", - "to_string": "[1, 9, 27]", - "painless_class": "org.elasticsearch.index.fielddata.ScriptDocValues.Longs", - "java_class": "org.elasticsearch.index.fielddata.ScriptDocValues\$Longs", - ... - }, - "status": 400 -} ---------------------------------------------------------- -// TESTRESPONSE[s/\\.\\.\\./"script_stack": \$body.error.script_stack, "script": \$body.error.script, "lang": \$body.error.lang, "position": \$body.error.position, "caused_by": \$body.error.caused_by, "root_cause": \$body.error.root_cause, "reason": \$body.error.reason/] - -You can use the same trick to see that `_source` is a `LinkedHashMap` -in the `_update` API: - -[source,console] ---------------------------------------------------------- -POST /hockey/_update/1 -{ - "script": "Debug.explain(ctx._source)" -} ---------------------------------------------------------- -// TEST[continued s/_update\\/1/_update\\/1?error_trace=false/ catch:/painless_explain_error/] - -The response looks like: - -[source,console-result] ---------------------------------------------------------- -{ - "error" : { - "root_cause": ..., - "type": "illegal_argument_exception", - "reason": "failed to execute script", - "caused_by": { - "type": "script_exception", - "to_string": "{gp=[26, 82, 1], last=gaudreau, assists=[17, 46, 0], first=johnny, goals=[9, 27, 1]}", - "painless_class": "java.util.LinkedHashMap", - "java_class": "java.util.LinkedHashMap", - ... - } - }, - "status": 400 -} ---------------------------------------------------------- -// TESTRESPONSE[s/"root_cause": \\.\\.\\./"root_cause": \$body.error.root_cause/] -// TESTRESPONSE[s/\\.\\.\\./"script_stack": \$body.error.caused_by.script_stack, "script": \$body.error.caused_by.script, "lang": \$body.error.caused_by.lang, "position": \$body.error.caused_by.position, "caused_by": \$body.error.caused_by.caused_by, "reason": \$body.error.caused_by.reason/] -// TESTRESPONSE[s/"to_string": ".+"/"to_string": \$body.error.caused_by.to_string/] - -Once you have a class you can go to <> to see a list of -available methods. - -""" - ) - docFile( - "docs/reference/security/authorization/run-as-privilege.asciidoc", - """[role="xpack"] -[[run-as-privilege]] -= Submitting requests on behalf of other users - -{es} roles support a `run_as` privilege that enables an authenticated user to -submit requests on behalf of other users. For example, if your external -application is trusted to authenticate users, {es} can authenticate the external -application and use the _run as_ mechanism to issue authorized requests as -other users without having to re-authenticate each user. - -To "run as" (impersonate) another user, the first user (the authenticating user) -must be authenticated by a mechanism that supports run-as delegation. The second -user (the `run_as` user) must be authorized by a mechanism that supports -delegated run-as lookups by username. - -The `run_as` privilege essentially operates like a secondary form of -<>. Delegated authorization applies -to the authenticating user, and the `run_as` privilege applies to the user who -is being impersonated. - -Authenticating user:: --- -For the authenticating user, the following realms (plus API keys) all support -`run_as` delegation: `native`, `file`, Active Directory, JWT, Kerberos, LDAP and -PKI. - -Service tokens, the {es} Token Service, SAML 2.0, and OIDC 1.0 do not -support `run_as` delegation. --- - -`run_as` user:: --- -{es} supports `run_as` for any realm that supports user lookup. -Not all realms support user lookup. Refer to the list of <> -and ensure that the realm you wish to use is configured in a manner that -supports user lookup. - -The `run_as` user must be retrieved from a <> - it is not -possible to run as a -<>, -<> or -<>. --- - -To submit requests on behalf of other users, you need to have the `run_as` -privilege in your <>. For example, the following request -creates a `my_director` role that grants permission to submit request on behalf -of `jacknich` or `redeniro`: - -[source,console] ----- -POST /_security/role/my_director?refresh=true -{ - "cluster": ["manage"], - "indices": [ - { - "names": [ "index1", "index2" ], - "privileges": [ "manage" ] - } - ], - "run_as": [ "jacknich", "rdeniro" ], - "metadata" : { - "version" : 1 - } -} ----- - -To submit a request as another user, you specify the user in the -`es-security-runas-user` request header. For example: - -[source,sh] ----- -curl -H "es-security-runas-user: jacknich" -u es-admin -X GET http://localhost:9200/ ----- - -The `run_as` user passed in through the `es-security-runas-user` header must be -available from a realm that supports delegated authorization lookup by username. -Realms that don't support user lookup can't be used by `run_as` delegation from -other realms. - -For example, JWT realms can authenticate external users specified in JWTs, and -execute requests as a `run_as` user in the `native` realm. {es} will retrieve the -indicated `runas` user and execute the request as that user using their roles. - -[[run-as-privilege-apply]] -== Apply the `run_as` privilege to roles -You can apply the `run_as` privilege when creating roles with the -<>. Users who are assigned -a role that contains the `run_as` privilege inherit all privileges from their -role, and can also submit requests on behalf of the indicated users. - -NOTE: Roles for the authenticated user and the `run_as` user are not merged. If -a user authenticates without specifying the `run_as` parameter, only the -authenticated user's roles are used. If a user authenticates and their roles -include the `run_as` parameter, only the `run_as` user's roles are used. - -After a user successfully authenticates to {es}, an authorization process determines whether the user behind an incoming request is allowed to run -that request. If the authenticated user has the `run_as` privilege in their list -of permissions and specifies the run-as header, {es} _discards_ the authenticated -user and associated roles. It then looks in each of the configured realms in the -realm chain until it finds the username that's associated with the `run_as` user, -and uses those roles to execute any requests. - -Consider an admin role and an analyst role. The admin role has higher privileges, -but might also want to submit requests as another user to test and verify their -permissions. - -First, we'll create an admin role named `my_admin_role`. This role has `manage` -<> on the entire cluster, and on a subset of -indices. This role also contains the `run_as` privilege, which enables any user -with this role to submit requests on behalf of the specified `analyst_user`. - -[source,console] ----- -POST /_security/role/my_admin_role?refresh=true -{ - "cluster": ["manage"], - "indices": [ - { - "names": [ "index1", "index2" ], - "privileges": [ "manage" ] - } - ], - "applications": [ - { - "application": "myapp", - "privileges": [ "admin", "read" ], - "resources": [ "*" ] - } - ], - "run_as": [ "analyst_user" ], - "metadata" : { - "version" : 1 - } -} ----- - -Next, we'll create an analyst role named `my_analyst_role`, which has more -restricted `monitor` cluster privileges and `manage` privileges on a subset of -indices. - -[source,console] ----- -POST /_security/role/my_analyst_role?refresh=true -{ - "cluster": [ "monitor"], - "indices": [ - { - "names": [ "index1", "index2" ], - "privileges": ["manage"] - } - ], - "applications": [ - { - "application": "myapp", - "privileges": [ "read" ], - "resources": [ "*" ] - } - ], - "metadata" : { - "version" : 1 - } -} ----- - -We'll create an administrator user and assign them the role named `my_admin_role`, -which allows this user to submit requests as the `analyst_user`. - -[source,console] ----- -POST /_security/user/admin_user?refresh=true -{ - "password": "l0ng-r4nd0m-p@ssw0rd", - "roles": [ "my_admin_role" ], - "full_name": "Eirian Zola", - "metadata": { "intelligence" : 7} -} ----- - -We can also create an analyst user and assign them the role named -`my_analyst_role`. - -[source,console] ----- -POST /_security/user/analyst_user?refresh=true -{ - "password": "l0nger-r4nd0mer-p@ssw0rd", - "roles": [ "my_analyst_role" ], - "full_name": "Monday Jaffe", - "metadata": { "innovation" : 8} -} ----- - -You can then authenticate to {es} as the `admin_user` or `analyst_user`. However, the `admin_user` could optionally submit requests on -behalf of the `analyst_user`. The following request authenticates to {es} with a -`Basic` authorization token and submits the request as the `analyst_user`: - -[source,sh] ----- -curl -s -X GET -H "Authorization: Basic YWRtaW5fdXNlcjpsMG5nLXI0bmQwbS1wQHNzdzByZA==" -H "es-security-runas-user: analyst_user" https://localhost:9200/_security/_authenticate ----- - -The response indicates that the `analyst_user` submitted this request, using the -`my_analyst_role` that's assigned to that user. When the `admin_user` submitted -the request, {es} authenticated that user, discarded their roles, and then used -the roles of the `run_as` user. - -[source,sh] ----- -{"username":"analyst_user","roles":["my_analyst_role"],"full_name":"Monday Jaffe","email":null, -"metadata":{"innovation":8},"enabled":true,"authentication_realm":{"name":"native", -"type":"native"},"lookup_realm":{"name":"native","type":"native"},"authentication_type":"realm"} -% ----- - -The `authentication_realm` and `lookup_realm` in the response both specify -the `native` realm because both the `admin_user` and `analyst_user` are from -that realm. If the two users are in different realms, the values for -`authentication_realm` and `lookup_realm` are different (such as `pki` and -`native`). -""" - ) - + String normalizeRestSpec(String inputString) { + def withNormalizedLines = inputString.replaceAll(/"line_\d+":/, "\"line_0\":") + return withNormalizedLines } } diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/SnippetBuilderSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/SnippetBuilderSpec.groovy new file mode 100644 index 000000000000..278728ec176c --- /dev/null +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/SnippetBuilderSpec.groovy @@ -0,0 +1,107 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.gradle.internal.doc + +import spock.lang.Specification +import spock.lang.Unroll + +import org.gradle.api.InvalidUserDataException + +class SnippetBuilderSpec extends Specification { + + @Unroll + def "checks for valid json for #languageParam"() { + when: + def snippet1 = snippetBuilder().withLanguage(languageParam).withTestResponse(true).withConsole(true) + .withContent( + """{ + "name": "John Doe", + "age": 30, + "isMarried": true, + "address": { + "street": "123 Main Street", + "city": "Springfield", + "state": "IL", + "zip": "62701" + }, + "hobbies": ["Reading", "Cooking", "Traveling"] +}""" + ).build() + then: + snippet1 != null + + + when: + snippetBuilder().withLanguage(languageParam).withTestResponse(true).withConsole(true) + .withContent( + "some no valid json" + ).build() + + then: + def e = thrown(InvalidUserDataException) + e.message.contains("Invalid json in") + + when: + def snippet2 = snippetBuilder().withLanguage(languageParam).withTestResponse(true).withConsole(true) + .withSkip("skipping") + .withContent( + "some no valid json" + ).build() + + then: + snippet2 != null + + where: + languageParam << ["js", "console-result"] + } + + def "language must be defined"() { + when: + snippetBuilder().withContent("snippet-content").build() + then: + def e = thrown(InvalidUserDataException) + e.message.contains("Snippet missing a language.") + } + + def "handles snippets with curl"() { + expect: + snippetBuilder().withLanguage("sh") + .withName("snippet-name-1") + .withContent("curl substDefault subst") + .build() + .curl() == true + + } + + def "snippet builder handles substitutions"() { + when: + def snippet = snippetBuilder().withLanguage("console").withContent("snippet-content substDefault subst") + .withSubstitutions([substDefault: "\$body", subst: 'substValue']).build() + + then: + snippet.contents == "snippet-content \$body substValue" + } + + def "test snippets with no curl no console"() { + when: + snippetBuilder() + .withConsole(false) + .withLanguage("shell") + .withContent("hello substDefault subst") + .build() + then: + def e = thrown(InvalidUserDataException) + e.message.contains("No need for NOTCONSOLE if snippet doesn't contain `curl`") + } + + SnippetBuilder snippetBuilder() { + return new SnippetBuilder() + } + +} diff --git a/build-tools/src/testFixtures/java/org/elasticsearch/gradle/internal/test/TestUtils.java b/build-tools/src/testFixtures/java/org/elasticsearch/gradle/internal/test/TestUtils.java index 17d3375c4e83..e4513bfe4c0f 100644 --- a/build-tools/src/testFixtures/java/org/elasticsearch/gradle/internal/test/TestUtils.java +++ b/build-tools/src/testFixtures/java/org/elasticsearch/gradle/internal/test/TestUtils.java @@ -14,6 +14,10 @@ import java.util.stream.Collectors; public class TestUtils { + public static String normalizeString(String input) { + return normalizeString(input, new File(".")); + } + public static String normalizeString(String input, File projectRootDir) { try { String canonicalNormalizedPathPrefix = projectRootDir.getCanonicalPath().replace('\\', '/'); diff --git a/docs/build.gradle b/docs/build.gradle index e38b0129b219..0eba980e8cc3 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -1,6 +1,6 @@ import org.elasticsearch.gradle.Version import org.elasticsearch.gradle.internal.info.BuildParams - +import org.elasticsearch.gradle.internal.doc.DocSnippetTask import static org.elasticsearch.gradle.testclusters.TestDistribution.DEFAULT /* @@ -16,6 +16,7 @@ apply plugin: 'elasticsearch.rest-resources' ext.docsFileTree = fileTree(projectDir) { include '**/*.asciidoc' + include '**/*.mdx' // That is where the snippets go, not where they come from! exclude 'build/**' exclude 'build-idea/**' @@ -37,7 +38,7 @@ ext.docsFileTree = fileTree(projectDir) { /* List of files that have snippets that will not work until platinum tests can occur ... */ tasks.named("buildRestTests").configure { - expectedUnconvertedCandidates = [ + getExpectedUnconvertedCandidates().addAll( 'reference/ml/anomaly-detection/ml-configuring-transform.asciidoc', 'reference/ml/anomaly-detection/apis/delete-calendar-event.asciidoc', 'reference/ml/anomaly-detection/apis/get-bucket.asciidoc', @@ -58,7 +59,7 @@ tasks.named("buildRestTests").configure { 'reference/rest-api/watcher/put-watch.asciidoc', 'reference/rest-api/watcher/stats.asciidoc', 'reference/watcher/example-watches/watching-time-series-data.asciidoc' - ] + ) } restResources { @@ -176,16 +177,8 @@ tasks.named("forbiddenPatterns").configure { exclude '**/*.mmdb' } -tasks.named("buildRestTests").configure { - docs = docsFileTree -} - -tasks.named("listSnippets").configure { - docs = docsFileTree -} - -tasks.named("listConsoleCandidates").configure { - docs = docsFileTree +tasks.withType(DocSnippetTask).configureEach { + docs = docsFileTree } Closure setupMyIndex = { String name, int count ->