Add date processor migration (#7275)

* add date migration with some refactoring

* Update comments

* get rig of unused imports

* #7275 fixes by @original-brownbear

Fixes #7301
This commit is contained in:
Suyog Rao 2017-06-02 11:23:14 -07:00 committed by Armin Braun
parent 248cef97c3
commit b3bc2f8cea
15 changed files with 311 additions and 42 deletions

View file

@ -0,0 +1,41 @@
package org.logstash.ingest;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import javax.script.Invocable;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
/**
* Ingest Date DSL to Logstash Date Transpiler.
*/
public final class Date {
private Date() {
// Utility Wrapper for JS Script.
}
public static void main(final String... args) throws ScriptException, NoSuchMethodException {
try (final Reader reader = new InputStreamReader(
Grok.class.getResourceAsStream("/ingest-date.js")
)
) {
final ScriptEngine engine =
new ScriptEngineManager().getEngineByName("nashorn");
engine.eval(reader);
Files.write(Paths.get(args[1]), ((String) ((Invocable) engine).invokeFunction(
"ingest_to_logstash_date",
new String(
Files.readAllBytes(Paths.get(args[0])), StandardCharsets.UTF_8
)
)).getBytes(StandardCharsets.UTF_8));
} catch (final IOException ex) {
throw new IllegalStateException(ex);
}
}
}

View file

@ -22,14 +22,14 @@ public final class Grok {
public static void main(final String... args) throws ScriptException, NoSuchMethodException {
try (final Reader reader = new InputStreamReader(
Grok.class.getResourceAsStream("/ingest-to-grok.js")
Grok.class.getResourceAsStream("/ingest-grok.js")
)
) {
final ScriptEngine engine =
new ScriptEngineManager().getEngineByName("nashorn");
engine.eval(reader);
Files.write(Paths.get(args[1]), ((String) ((Invocable) engine).invokeFunction(
"json_to_grok",
"ingest_to_logstash_grok",
new String(
Files.readAllBytes(Paths.get(args[0])), StandardCharsets.UTF_8
)

View file

@ -0,0 +1,141 @@
/**
* Converts Ingest Date JSON to LS Date filter.
*/
function ingest_to_logstash_date(json) {
function quote_string(string) {
return "\"" + string.replace(/"/g, "\\\"") + "\"";
}
function wrap_in_curly(string) {
return "{\n" + string + "\n}";
}
function create_field(name, content) {
return name + " => " + content;
}
function create_hash(name, content) {
return name + " " + wrap_in_curly(content);
}
/**
* All hash fields in LS start on a new line.
* @param fields Array of Strings of Serialized Hash Fields
* @returns {string} Joined Serialization of Hash Fields
*/
function join_hash_fields(fields) {
return fields.join("\n");
}
/**
* Translates the JSON naming pattern (`name.qualifier.sub`) into the LS pattern
* [name][qualifier][sub] for all applicable tokens in the given string.
* This function correctly identifies and omits renaming of string literals.
* @param string to replace naming pattern in
* @returns {string} with Json naming translated into grok naming
*/
function dots_to_square_brackets(string) {
function token_dots_to_square_brackets(string) {
return string.replace(/(\w*)\.(\w*)/g, "$1][$2").replace(/(\w+)}/g, "$1]}")
.replace(/{(\w+):(\w+)]/g, "{$1:[$2]");
}
var literals = string.match(/\(\?:%{.*\|-\)/);
var i;
var tokens = [];
// Copy String before Manipulation
var right = string;
if (literals) {
for (i = 0; i < literals.length; ++i) {
var parts = right.split(literals[i], 2);
right = parts[1];
tokens.push(token_dots_to_square_brackets(parts[0]));
tokens.push(literals[i]);
}
}
tokens.push(token_dots_to_square_brackets(right));
return tokens.join("");
}
/**
* Converts Ingest/JSON style pattern array to LS pattern array, performing necessary variable
* name and quote escaping adjustments.
* @param patterns Pattern Array in JSON formatting
* @returns {string} Pattern array in Grok formatting
*/
function create_pattern_array(patterns) {
return "[\n" + patterns.map(dots_to_square_brackets).map(quote_string).join(",\n") + "\n]";
}
/**
* Fixes indentation in LS string.
* @param string LS string to fix indentation in, that has no indentation intentionally with
* all lines starting on a token without preceding spaces.
* @returns {string} LS string indented by 3 spaces per level
*/
function fix_indent(string) {
function indent(string, shifts) {
return new Array(shifts * 3 + 1).join(" ") + string;
}
var lines = string.split("\n");
var count = 0;
var i;
for (i = 0; i < lines.length; ++i) {
if (lines[i].match(/(\{|\[)$/)) {
lines[i] = indent(lines[i], count);
++count;
} else if (lines[i].match(/(\}|\])$/)) {
--count;
lines[i] = indent(lines[i], count);
// Only indent line if previous line ended on relevant control char.
} else if (i > 0 && lines[i - 1].match(/(=>\s+".+"|,|\{|\}|\[|\])$/)) {
lines[i] = indent(lines[i], count);
}
}
return lines.join("\n");
}
function date_hash(processor) {
var date_json = processor["date"];
var formats = date_json["formats"];
var match_contents = [dots_to_square_brackets(date_json["field"])];
for (var f in formats) {
match_contents.push(formats[f]);
}
var date_contents = create_field(
"match",
create_pattern_array(match_contents)
);
if (date_json["target_field"]) {
var target = create_field("target", quote_string(dots_to_square_brackets(date_json["target_field"])));
date_contents = join_hash_fields([date_contents, target]);
}
if (date_json["timezone"]) {
var timezone = create_field("timezone", quote_string(date_json["timezone"]));
date_contents = join_hash_fields([date_contents, timezone]);
}
if (date_json["locale"]) {
var locale = create_field("locale", quote_string(date_json["locale"]));
date_contents = join_hash_fields([date_contents, locale]);
}
return date_contents;
}
function map_processor (processor) {
return fix_indent(
create_hash(
"filter",
create_hash(
"date", date_hash(processor)
)
)
)
}
return JSON.parse(json)["processors"].map(map_processor).join("\n\n") + "\n";
}

View file

@ -1,7 +1,7 @@
/**
* Converts Ingest JSON to Grok.
* Converts Ingest JSON to LS Grok.
*/
function json_to_grok(json) {
function ingest_to_logstash_grok(json) {
function quote_string(string) {
return "\"" + string.replace(/"/g, "\\\"") + "\"";
@ -24,7 +24,7 @@ function json_to_grok(json) {
}
/**
* All hash fields in Grok start on a new line.
* All hash fields in LS start on a new line.
* @param fields Array of Strings of Serialized Hash Fields
* @returns {string} Joined Serialization of Hash Fields
*/
@ -33,13 +33,13 @@ function json_to_grok(json) {
}
/**
* Converts Ingest/JSON style pattern array to Grok pattern array, performing necessary variable
* Converts Ingest/JSON style pattern array to LS pattern array, performing necessary variable
* name and quote escaping adjustments.
* @param patterns Pattern Array in JSON formatting
* @returns {string} Pattern array in Grok formatting
* @returns {string} Pattern array in LS formatting
*/
function create_pattern_array(patterns) {
/**
* Translates the JSON naming pattern (`name.qualifier.sub`) into the grok pattern
* [name][qualifier][sub] for all applicable tokens in the given string.
@ -70,7 +70,7 @@ function json_to_grok(json) {
tokens.push(token_dots_to_square_brackets(right));
return tokens.join("");
}
return "[\n" + patterns.map(dots_to_square_brackets).map(quote_string).join(",\n") + "\n]";
}
@ -85,17 +85,17 @@ function json_to_grok(json) {
}
/**
* Fixes indentation in Grok string.
* @param string Grok string to fix indentation in, that has no indentation intentionally with
* Fixes indentation in LS string.
* @param string LS string to fix indentation in, that has no indentation intentionally with
* all lines starting on a token without preceding spaces.
* @returns {string} Grok string indented by 3 spaces per level
* @returns {string} LS string indented by 3 spaces per level
*/
function fix_indent(string) {
function indent(string, shifts) {
return new Array(shifts * 3 + 1).join(" ") + string;
}
var lines = string.split("\n");
var count = 0;
var i;
@ -107,13 +107,13 @@ function json_to_grok(json) {
--count;
lines[i] = indent(lines[i], count);
// Only indent line if previous line ended on relevant control char.
} else if (i > 0 && lines[i - 1].match(/(,|\{|\}|\[|\])$/)) {
} else if (i > 0 && lines[i - 1].match(/(=>\s+".+"|,|\{|\}|\[|\])$/)) {
lines[i] = indent(lines[i], count);
}
}
return lines.join("\n");
}
function grok_hash(processor) {
var grok_data = processor["grok"];
var grok_contents = create_hash_field(
@ -125,13 +125,13 @@ function json_to_grok(json) {
);
if (grok_data["pattern_definitions"]) {
grok_contents = join_hash_fields([
grok_contents,
grok_contents,
create_pattern_definition_hash(grok_data["pattern_definitions"])
])
}
return grok_contents;
}
function map_processor (processor) {
return fix_indent(
create_hash(

View file

@ -0,0 +1,27 @@
package org.logstash.ingest;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
public final class DateTest extends IngestTest {
@Test
public void convertsFieldPatternsCorrectly() throws Exception {
final String date = getResultPath(temp);
Date.main(resourcePath("ingestDate.json"), date);
assertThat(
utf8File(date), is(utf8File(resourcePath("logstashDate.conf")))
);
}
@Test
public void convertsFieldDefinitionsCorrectly() throws Exception {
final String date = getResultPath(temp);
Date.main(resourcePath("ingestDateExtraFields.json"), date);
assertThat(
utf8File(date), is(utf8File(resourcePath("logstashDateExtraFields.conf")))
);
}
}

View file

@ -1,44 +1,27 @@
package org.logstash.ingest;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
public final class GrokTest {
@Rule
public final TemporaryFolder temp = new TemporaryFolder();
public final class GrokTest extends IngestTest {
@Test
public void convertsFieldPatternsCorrectly() throws Exception {
final String grok = temp.newFolder().toPath().resolve("converted.grok").toString();
Grok.main(resourcePath("ingestTestConfig.json"), grok);
final String grok = getResultPath(temp);
Grok.main(resourcePath("ingestGrok.json"), grok);
assertThat(
utf8File(grok), is(utf8File(resourcePath("ingestTestConfig.grok")))
utf8File(grok), is(utf8File(resourcePath("logstashGrok.conf")))
);
}
@Test
public void convertsFieldDefinitionsCorrectly() throws Exception {
final String grok = temp.newFolder().toPath().resolve("converted.grok").toString();
Grok.main(resourcePath("ingestTestPatternDefinition.json"), grok);
final String grok = getResultPath(temp);
Grok.main(resourcePath("ingestGrokPatternDefinition.json"), grok);
assertThat(
utf8File(grok), is(utf8File(resourcePath("ingestTestPatternDefinition.grok")))
utf8File(grok), is(utf8File(resourcePath("logstashGrokPatternDefinition.conf")))
);
}
private static String utf8File(final String path) throws IOException {
return new String(Files.readAllBytes(Paths.get(path)), StandardCharsets.UTF_8);
}
private static String resourcePath(final String name) {
return Grok.class.getResource(name).getPath();
}
}

View file

@ -0,0 +1,29 @@
package org.logstash.ingest;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.junit.Rule;
import org.junit.rules.TemporaryFolder;
/**
* Base class for ingest migration tests
*/
public abstract class IngestTest {
@Rule
public TemporaryFolder temp = new TemporaryFolder();
static String utf8File(final String path) throws IOException {
return new String(Files.readAllBytes(Paths.get(path)), StandardCharsets.UTF_8);
}
static String resourcePath(final String name) {
return IngestTest.class.getResource(name).getPath();
}
static String getResultPath(TemporaryFolder temp) throws Exception {
return temp.newFolder().toPath().resolve("converted").toString();
}
}

View file

@ -0,0 +1,12 @@
{
"description" : "...",
"processors" : [
{
"date" : {
"field" : "initial_date",
"target_field" : "timestamp",
"formats" : ["dd/MM/yyyy hh:mm:ss", "dd/MM/yyyy"]
}
}
]
}

View file

@ -0,0 +1,14 @@
{
"description" : "...",
"processors" : [
{
"date" : {
"field" : "initial_date",
"target_field" : "timestamp",
"formats" : ["dd/MM/yyyy hh:mm:ss", "dd/MM/yyyy"],
"timezone" : "Europe/Amsterdam",
"locale": "en"
}
}
]
}

View file

@ -0,0 +1,10 @@
filter {
date {
match => [
"initial_date",
"dd/MM/yyyy hh:mm:ss",
"dd/MM/yyyy"
]
target => "timestamp"
}
}

View file

@ -0,0 +1,12 @@
filter {
date {
match => [
"initial_date",
"dd/MM/yyyy hh:mm:ss",
"dd/MM/yyyy"
]
target => "timestamp"
timezone => "Europe/Amsterdam"
locale => "en"
}
}