Add date processor migration (#7275)

* add date migration with some refactoring * Update comments * get rig of unused imports * #7275 fixes by @original-brownbear Fixes #7301
2025-04-23 22:27:21 -04:00 · 2017-06-02 11:23:14 -07:00 · 2017-06-02 11:23:14 -07:00 · b3bc2f8cea
commit b3bc2f8cea
parent 248cef97c3
15 changed files with 311 additions and 42 deletions
--- a/tools/ingest-converter/src/main/java/org/logstash/ingest/Date.java
+++ b/tools/ingest-converter/src/main/java/org/logstash/ingest/Date.java
@ -0,0 +1,41 @@
+package org.logstash.ingest;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import javax.script.Invocable;
+import javax.script.ScriptEngine;
+import javax.script.ScriptEngineManager;
+import javax.script.ScriptException;
+
+/**
+ * Ingest Date DSL to Logstash Date Transpiler.
+ */
+public final class Date {
+
+    private Date() {
+        // Utility Wrapper for JS Script.
+    }
+
+    public static void main(final String... args) throws ScriptException, NoSuchMethodException {
+        try (final Reader reader = new InputStreamReader(
+            Grok.class.getResourceAsStream("/ingest-date.js")
+        )
+        ) {
+            final ScriptEngine engine =
+                new ScriptEngineManager().getEngineByName("nashorn");
+            engine.eval(reader);
+            Files.write(Paths.get(args[1]), ((String) ((Invocable) engine).invokeFunction(
+                "ingest_to_logstash_date",
+                new String(
+                    Files.readAllBytes(Paths.get(args[0])), StandardCharsets.UTF_8
+                )
+            )).getBytes(StandardCharsets.UTF_8));
+        } catch (final IOException ex) {
+            throw new IllegalStateException(ex);
+        }
+    }
+}
--- a/tools/ingest-converter/src/main/java/org/logstash/ingest/Grok.java
+++ b/tools/ingest-converter/src/main/java/org/logstash/ingest/Grok.java
@ -22,14 +22,14 @@ public final class Grok {

    public static void main(final String... args) throws ScriptException, NoSuchMethodException {
        try (final Reader reader = new InputStreamReader(
-                     Grok.class.getResourceAsStream("/ingest-to-grok.js")
+                     Grok.class.getResourceAsStream("/ingest-grok.js")
             )
        ) {
            final ScriptEngine engine =
                new ScriptEngineManager().getEngineByName("nashorn");
            engine.eval(reader);
            Files.write(Paths.get(args[1]), ((String) ((Invocable) engine).invokeFunction(
-                "json_to_grok",
+                "ingest_to_logstash_grok",
                new String(
                    Files.readAllBytes(Paths.get(args[0])), StandardCharsets.UTF_8
                )
--- a/tools/ingest-converter/src/main/resources/ingest-date.js
+++ b/tools/ingest-converter/src/main/resources/ingest-date.js
@ -0,0 +1,141 @@
+/**
+ * Converts Ingest Date JSON to LS Date filter.
+ */
+function ingest_to_logstash_date(json) {
+
+    function quote_string(string) {
+        return "\"" + string.replace(/"/g, "\\\"") + "\"";
+    }
+
+    function wrap_in_curly(string) {
+        return "{\n" + string + "\n}";
+    }
+
+    function create_field(name, content) {
+        return name + " => " + content;
+    }
+
+    function create_hash(name, content) {
+        return name + " " + wrap_in_curly(content);
+    }
+
+    /**
+     * All hash fields in LS start on a new line.
+     * @param fields Array of Strings of Serialized Hash Fields
+     * @returns {string} Joined Serialization of Hash Fields
+     */
+    function join_hash_fields(fields) {
+        return fields.join("\n");
+    }
+
+    /**
+     * Translates the JSON naming pattern (`name.qualifier.sub`) into the LS pattern
+     * [name][qualifier][sub] for all applicable tokens in the given string.
+     * This function correctly identifies and omits renaming of string literals.
+     * @param string to replace naming pattern in
+     * @returns {string} with Json naming translated into grok naming
+     */
+    function dots_to_square_brackets(string) {
+
+        function token_dots_to_square_brackets(string) {
+            return string.replace(/(\w*)\.(\w*)/g, "$1][$2").replace(/(\w+)}/g, "$1]}")
+                .replace(/{(\w+):(\w+)]/g, "{$1:[$2]");
+        }
+
+        var literals = string.match(/\(\?:%{.*\|-\)/);
+        var i;
+        var tokens = [];
+        // Copy String before Manipulation
+        var right = string;
+        if (literals) {
+            for (i = 0; i < literals.length; ++i) {
+                var parts = right.split(literals[i], 2);
+                right = parts[1];
+                tokens.push(token_dots_to_square_brackets(parts[0]));
+                tokens.push(literals[i]);
+            }
+        }
+        tokens.push(token_dots_to_square_brackets(right));
+        return tokens.join("");
+    }
+
+    /**
+     * Converts Ingest/JSON style pattern array to LS pattern array, performing necessary variable
+     * name and quote escaping adjustments.
+     * @param patterns Pattern Array in JSON formatting
+     * @returns {string} Pattern array in Grok formatting
+     */
+    function create_pattern_array(patterns) {
+        return "[\n" + patterns.map(dots_to_square_brackets).map(quote_string).join(",\n") + "\n]";
+    }
+
+
+    /**
+     * Fixes indentation in LS string.
+     * @param string LS string to fix indentation in, that has no indentation intentionally with
+     * all lines starting on a token without preceding spaces.
+     * @returns {string} LS string indented by 3 spaces per level
+     */
+    function fix_indent(string) {
+
+        function indent(string, shifts) {
+            return new Array(shifts * 3 + 1).join(" ") + string;
+        }
+
+        var lines = string.split("\n");
+        var count = 0;
+        var i;
+        for (i = 0; i < lines.length; ++i) {
+            if (lines[i].match(/(\{|\[)$/)) {
+                lines[i] = indent(lines[i], count);
+                ++count;
+            } else if (lines[i].match(/(\}|\])$/)) {
+                --count;
+                lines[i] = indent(lines[i], count);
+                // Only indent line if previous line ended on relevant control char.
+            } else if (i > 0 && lines[i - 1].match(/(=>\s+".+"|,|\{|\}|\[|\])$/)) {
+                lines[i] = indent(lines[i], count);
+            }
+        }
+        return lines.join("\n");
+    }
+
+    function date_hash(processor) {
+        var date_json = processor["date"];
+        var formats = date_json["formats"];
+        var match_contents = [dots_to_square_brackets(date_json["field"])];
+        for (var f in formats) {
+            match_contents.push(formats[f]);
+        }
+        var date_contents = create_field(
+            "match",
+            create_pattern_array(match_contents)
+        );
+        if (date_json["target_field"]) {
+            var target = create_field("target", quote_string(dots_to_square_brackets(date_json["target_field"])));
+            date_contents = join_hash_fields([date_contents, target]);
+        }
+        if (date_json["timezone"]) {
+            var timezone = create_field("timezone", quote_string(date_json["timezone"]));
+            date_contents = join_hash_fields([date_contents, timezone]);
+        }
+        if (date_json["locale"]) {
+            var locale = create_field("locale", quote_string(date_json["locale"]));
+            date_contents = join_hash_fields([date_contents, locale]);
+        }
+        return date_contents;
+    }
+
+    function map_processor (processor) {
+        return fix_indent(
+            create_hash(
+                "filter",
+                create_hash(
+                    "date", date_hash(processor)
+                )
+            )
+        )
+    }
+
+    return JSON.parse(json)["processors"].map(map_processor).join("\n\n") + "\n";
+}
--- a/tools/ingest-converter/src/main/resources/ingest-to-grok.js
+++ b/tools/ingest-converter/src/main/resources/ingest-to-grok.js
@ -1,7 +1,7 @@
 /**
- * Converts Ingest JSON to Grok.
+ * Converts Ingest JSON to LS Grok.
 */
-function json_to_grok(json) {
+function ingest_to_logstash_grok(json) {

    function quote_string(string) {
        return "\"" + string.replace(/"/g, "\\\"") + "\"";
@ -24,7 +24,7 @@ function json_to_grok(json) {
    }

    /**
-     * All hash fields in Grok start on a new line.
+     * All hash fields in LS start on a new line.
     * @param fields Array of Strings of Serialized Hash Fields
     * @returns {string} Joined Serialization of Hash Fields
     */
@ -33,13 +33,13 @@ function json_to_grok(json) {
    }

    /**
-     * Converts Ingest/JSON style pattern array to Grok pattern array, performing necessary variable
+     * Converts Ingest/JSON style pattern array to LS pattern array, performing necessary variable
     * name and quote escaping adjustments.
     * @param patterns Pattern Array in JSON formatting
-     * @returns {string} Pattern array in Grok formatting
+     * @returns {string} Pattern array in LS formatting
     */
    function create_pattern_array(patterns) {
-        
+
        /**
         * Translates the JSON naming pattern (`name.qualifier.sub`) into the grok pattern
         * [name][qualifier][sub] for all applicable tokens in the given string.
@ -70,7 +70,7 @@ function json_to_grok(json) {
            tokens.push(token_dots_to_square_brackets(right));
            return tokens.join("");
        }
-        
+
        return "[\n" + patterns.map(dots_to_square_brackets).map(quote_string).join(",\n") + "\n]";
    }

@ -85,17 +85,17 @@ function json_to_grok(json) {
    }

    /**
-     * Fixes indentation in Grok string.
-     * @param string Grok string to fix indentation in, that has no indentation intentionally with 
+     * Fixes indentation in LS string.
+     * @param string LS string to fix indentation in, that has no indentation intentionally with
     * all lines starting on a token without preceding spaces.
-     * @returns {string} Grok string indented by 3 spaces per level
+     * @returns {string} LS string indented by 3 spaces per level
     */
    function fix_indent(string) {
-        
+
        function indent(string, shifts) {
            return new Array(shifts * 3 + 1).join(" ") + string;
        }
-        
+
        var lines = string.split("\n");
        var count = 0;
        var i;
@ -107,13 +107,13 @@ function json_to_grok(json) {
                --count;
                lines[i] = indent(lines[i], count);
            // Only indent line if previous line ended on relevant control char.
-            } else if (i > 0 && lines[i - 1].match(/(,|\{|\}|\[|\])$/)) {
+            } else if (i > 0 && lines[i - 1].match(/(=>\s+".+"|,|\{|\}|\[|\])$/)) {
                lines[i] = indent(lines[i], count);
            }
        }
        return lines.join("\n");
    }
-    
+
    function grok_hash(processor) {
        var grok_data = processor["grok"];
        var grok_contents = create_hash_field(
@ -125,13 +125,13 @@ function json_to_grok(json) {
        );
        if (grok_data["pattern_definitions"]) {
            grok_contents = join_hash_fields([
-                    grok_contents, 
+                    grok_contents,
                    create_pattern_definition_hash(grok_data["pattern_definitions"])
            ])
        }
        return grok_contents;
    }
-    
+
    function map_processor (processor) {
        return fix_indent(
            create_hash(
--- a/tools/ingest-converter/src/test/java/org/logstash/ingest/DateTest.java
+++ b/tools/ingest-converter/src/test/java/org/logstash/ingest/DateTest.java
@ -0,0 +1,27 @@
+package org.logstash.ingest;
+
+import org.junit.Test;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+public final class DateTest extends IngestTest {
+
+    @Test
+    public void convertsFieldPatternsCorrectly() throws Exception {
+        final String date = getResultPath(temp);
+        Date.main(resourcePath("ingestDate.json"), date);
+        assertThat(
+            utf8File(date), is(utf8File(resourcePath("logstashDate.conf")))
+        );
+    }
+
+    @Test
+    public void convertsFieldDefinitionsCorrectly() throws Exception {
+        final String date = getResultPath(temp);
+        Date.main(resourcePath("ingestDateExtraFields.json"), date);
+        assertThat(
+            utf8File(date), is(utf8File(resourcePath("logstashDateExtraFields.conf")))
+        );
+    }
+}
--- a/tools/ingest-converter/src/test/java/org/logstash/ingest/GrokTest.java
+++ b/tools/ingest-converter/src/test/java/org/logstash/ingest/GrokTest.java
@ -1,44 +1,27 @@
 package org.logstash.ingest;

-import java.io.IOException;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import org.junit.Rule;
 import org.junit.Test;
-import org.junit.rules.TemporaryFolder;

 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.MatcherAssert.assertThat;

-public final class GrokTest {
-
-    @Rule
-    public final TemporaryFolder temp = new TemporaryFolder();
+public final class GrokTest extends IngestTest {

    @Test
    public void convertsFieldPatternsCorrectly() throws Exception {
-        final String grok = temp.newFolder().toPath().resolve("converted.grok").toString();
-        Grok.main(resourcePath("ingestTestConfig.json"), grok);
+        final String grok = getResultPath(temp);
+        Grok.main(resourcePath("ingestGrok.json"), grok);
        assertThat(
-            utf8File(grok), is(utf8File(resourcePath("ingestTestConfig.grok")))
+            utf8File(grok), is(utf8File(resourcePath("logstashGrok.conf")))
        );
    }

    @Test
    public void convertsFieldDefinitionsCorrectly() throws Exception {
-        final String grok = temp.newFolder().toPath().resolve("converted.grok").toString();
-        Grok.main(resourcePath("ingestTestPatternDefinition.json"), grok);
+        final String grok = getResultPath(temp);
+        Grok.main(resourcePath("ingestGrokPatternDefinition.json"), grok);
        assertThat(
-            utf8File(grok), is(utf8File(resourcePath("ingestTestPatternDefinition.grok")))
+            utf8File(grok), is(utf8File(resourcePath("logstashGrokPatternDefinition.conf")))
        );
    }
-
-    private static String utf8File(final String path) throws IOException {
-        return new String(Files.readAllBytes(Paths.get(path)), StandardCharsets.UTF_8);
-    }
-
-    private static String resourcePath(final String name) {
-        return Grok.class.getResource(name).getPath();
-    }
 }
--- a/tools/ingest-converter/src/test/java/org/logstash/ingest/IngestTest.java
+++ b/tools/ingest-converter/src/test/java/org/logstash/ingest/IngestTest.java
@ -0,0 +1,29 @@
+package org.logstash.ingest;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import org.junit.Rule;
+import org.junit.rules.TemporaryFolder;
+
+/**
+ * Base class for ingest migration tests
+ */
+public abstract class IngestTest {
+
+    @Rule
+    public TemporaryFolder temp = new TemporaryFolder();
+    
+    static String utf8File(final String path) throws IOException {
+        return new String(Files.readAllBytes(Paths.get(path)), StandardCharsets.UTF_8);
+    }
+
+    static String resourcePath(final String name) {
+        return IngestTest.class.getResource(name).getPath();
+    }
+
+    static String getResultPath(TemporaryFolder temp) throws Exception {
+        return temp.newFolder().toPath().resolve("converted").toString();
+    }
+}
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestDate.json
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestDate.json
@ -0,0 +1,12 @@
+{
+    "description" : "...",
+    "processors" : [
+        {
+            "date" : {
+                "field" : "initial_date",
+                "target_field" : "timestamp",
+                "formats" : ["dd/MM/yyyy hh:mm:ss", "dd/MM/yyyy"]
+            }
+        }
+    ]
+}
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestDateExtraFields.json
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestDateExtraFields.json
@ -0,0 +1,14 @@
+{
+  "description" : "...",
+  "processors" : [
+    {
+      "date" : {
+        "field" : "initial_date",
+        "target_field" : "timestamp",
+        "formats" : ["dd/MM/yyyy hh:mm:ss", "dd/MM/yyyy"],
+        "timezone" : "Europe/Amsterdam",
+        "locale": "en"
+      }
+    }
+  ]
+}
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestTestConfig.json
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestTestConfig.json
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestGrokPatternDefinition.json
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestGrokPatternDefinition.json
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/logstashDate.conf
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/logstashDate.conf
@ -0,0 +1,10 @@
+filter {
+   date {
+      match => [
+         "initial_date",
+         "dd/MM/yyyy hh:mm:ss",
+         "dd/MM/yyyy"
+      ]
+      target => "timestamp"
+   }
+}
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/logstashDateExtraFields.conf
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/logstashDateExtraFields.conf
@ -0,0 +1,12 @@
+filter {
+   date {
+      match => [
+         "initial_date",
+         "dd/MM/yyyy hh:mm:ss",
+         "dd/MM/yyyy"
+      ]
+      target => "timestamp"
+      timezone => "Europe/Amsterdam"
+      locale => "en"
+   }
+}
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestTestConfig.grok
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/ingestTestConfig.grok
--- a/tools/ingest-converter/src/test/resources/org/logstash/ingest/logstashGrokPatternDefinition.conf
+++ b/tools/ingest-converter/src/test/resources/org/logstash/ingest/logstashGrokPatternDefinition.conf