Fixes #7676 and generally improves the performance of org.logstash.StringInterpolation#evaluate by:

* Don't use regex to find the contents between `%{}`, it's way too inefficient for a simple case like this with all its allocations
* Flatten logic out of the many helper classes
* Use a reusable, thread-local `StringBuilder` to serialize into
* Don't use `.substring` for comparisons
* `static` `ObjectMapper`
* `org.joda.time.format.DateTimeFormat#forPattern` not being cached is not an issue, since it's cached in `org.joda.time.format.DateTimeFormat#cPatternCache`

* Added benchmark
   * ~15% faster for a few cases I ran, even for repeated patterns with interpolation + couldn't find any case even with date formatting, where this implementation is slower than the cached one
   *  no leaking on dynamic patterns

Fixes #7692
This commit is contained in:
Armin 2017-07-14 11:46:55 +02:00 committed by Armin Braun
parent ea9f56e2fc
commit 96ff365997
9 changed files with 136 additions and 207 deletions

View file

@ -0,0 +1,61 @@
package org.logstash.benchmark;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.logstash.Event;
import org.logstash.Timestamp;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
@Warmup(iterations = 3, time = 100, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
@Fork(1)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
public class EventSprintfBenchmark {
private static final int EVENTS_PER_INVOCATION = 10_000_000;
private static final Event EVENT = new Event();
@Setup
public void setUp() throws IOException {
EVENT.setField("Foo", "Bar");
EVENT.setField("Foo1", "Bar1");
EVENT.setField("Foo2", "Bar2");
EVENT.setField("Foo3", "Bar3");
EVENT.setField("Foo4", "Bar4");
EVENT.setTimestamp(Timestamp.now());
}
@Benchmark
@OperationsPerInvocation(EVENTS_PER_INVOCATION)
public final void sprintfField(final Blackhole blackhole) throws Exception {
for (int i = 0; i < EVENTS_PER_INVOCATION; ++i) {
blackhole.consume(EVENT.sprintf(i + "-%{[Foo]}"));
}
}
public static void main(final String... args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(EventSprintfBenchmark.class.getSimpleName())
.forks(2)
.build();
new Runner(opt).run();
}
}

View file

@ -1,23 +0,0 @@
package org.logstash;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public class DateNode implements TemplateNode {
private DateTimeFormatter formatter;
public DateNode(String format) {
this.formatter = DateTimeFormat.forPattern(format).withZone(DateTimeZone.UTC);
}
@Override
public String evaluate(Event event) throws IOException {
return event.getTimestamp().getTime().toString(this.formatter);
}
}

View file

@ -1,15 +0,0 @@
package org.logstash;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public class EpochNode implements TemplateNode {
public EpochNode(){ }
@Override
public String evaluate(Event event) throws IOException {
return String.valueOf(event.getTimestamp().getTime().getMillis() / 1000);
}
}

View file

@ -1,43 +1,15 @@
package org.logstash; package org.logstash;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.logstash.bivalues.BiValue;
import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import org.logstash.bivalues.BiValue;
/** /**
* Created by ph on 15-05-22. * Created by ph on 15-05-22.
*/ */
public class KeyNode implements TemplateNode { public class KeyNode {
private String key;
public KeyNode(String key) { private KeyNode() {
this.key = key; // Utility Class
}
/**
This will be more complicated with hash and array.
leverage jackson lib to do the actual.
*/
@Override
public String evaluate(Event event) throws IOException {
Object value = event.getField(this.key);
if (value != null) {
if (value instanceof List) {
return join((List)value, ",");
} else if (value instanceof Map) {
ObjectMapper mapper = new ObjectMapper();
return mapper.writeValueAsString(value);
} else {
return event.getField(this.key).toString();
}
} else {
return "%{" + this.key + "}";
}
} }
// TODO: (colin) this should be moved somewhere else to make it reusable // TODO: (colin) this should be moved somewhere else to make it reusable
@ -48,7 +20,7 @@ public class KeyNode implements TemplateNode {
if (len == 0) return ""; if (len == 0) return "";
StringBuilder result = new StringBuilder(toString(list.get(0), delim)); final StringBuilder result = new StringBuilder(toString(list.get(0), delim));
for (int i = 1; i < len; i++) { for (int i = 1; i < len; i++) {
result.append(delim); result.append(delim);
result.append(toString(list.get(i), delim)); result.append(toString(list.get(i), delim));

View file

@ -1,19 +0,0 @@
package org.logstash;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public class StaticNode implements TemplateNode {
private String content;
public StaticNode(String content) {
this.content = content;
}
@Override
public String evaluate(Event event) throws IOException {
return this.content;
}
}

View file

@ -1,92 +1,80 @@
package org.logstash; package org.logstash;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import org.joda.time.DateTimeZone;
import java.util.regex.Matcher; import org.joda.time.format.DateTimeFormat;
import java.util.regex.Pattern;
public class StringInterpolation { public final class StringInterpolation {
static Pattern TEMPLATE_TAG = Pattern.compile("%\\{([^}]+)\\}");
static final Map<String, TemplateNode> cache = new ConcurrentHashMap<>(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final ThreadLocal<StringBuilder> STRING_BUILDER =
new ThreadLocal<StringBuilder>() {
@Override
protected StringBuilder initialValue() {
return new StringBuilder();
}
@Override
public StringBuilder get() {
StringBuilder b = super.get();
b.setLength(0); // clear/reset the buffer
return b;
}
};
private StringInterpolation() { private StringInterpolation() {
// TODO: // Utility Class
// This may need some tweaking for the concurrency level to get better memory usage.
// The current implementation doesn't allow the keys to expire, I think under normal usage
// the keys will converge to a fixed number.
//
// If this code make logstash goes OOM, we have the following options:
// - If the key doesn't contains a `%` do not cache it, this will reduce the key size at a performance cost.
// - Use some kind LRU cache
// - Create a new data structure that use weakref or use Google Guava for the cache https://code.google.com/p/guava-libraries/
} }
public static void clearCache() { public static String evaluate(final Event event, final String template) throws IOException {
cache.clear(); int open = template.indexOf("%{");
} int close = template.indexOf('}', open);
if (open == -1 || close == -1) {
public static int cacheSize() { return template;
return cache.size();
}
public static String evaluate(Event event, String template) throws IOException {
TemplateNode compiledTemplate = cache.get(template);
if (compiledTemplate == null) {
compiledTemplate = compile(template);
cache.put(template, compiledTemplate);
} }
final StringBuilder builder = STRING_BUILDER.get();
return compiledTemplate.evaluate(event); int pos = 0;
} while (open > -1 && close > -1) {
if (open > 0) {
public static TemplateNode compile(String template) { builder.append(template, pos, open);
Template compiledTemplate = new Template(); }
if (template.regionMatches(open + 2, "+%s", 0, close - open - 2)) {
if (template.indexOf('%') == -1) { builder.append(event.getTimestamp().getTime().getMillis() / 1000L);
// Move the nodes to a custom instance } else if (template.charAt(open + 2) == '+') {
// so we can remove the iterator and do one `.evaluate` builder.append(
compiledTemplate.add(new StaticNode(template)); event.getTimestamp().getTime().toString(
} else { DateTimeFormat.forPattern(template.substring(open + 3, close))
Matcher matcher = TEMPLATE_TAG.matcher(template); .withZone(DateTimeZone.UTC)
String tag; ));
int pos = 0; } else {
final String found = template.substring(open + 2, close);
while (matcher.find()) { final Object value = event.getField(found);
if (matcher.start() > 0) { if (value != null) {
compiledTemplate.add(new StaticNode(template.substring(pos, matcher.start()))); if (value instanceof List) {
builder.append(KeyNode.join((List) value, ","));
} else if (value instanceof Map) {
builder.append(OBJECT_MAPPER.writeValueAsString(value));
} else {
builder.append(value.toString());
}
} else {
builder.append("%{").append(found).append('}');
} }
tag = matcher.group(1);
compiledTemplate.add(identifyTag(tag));
pos = matcher.end();
}
if(pos <= template.length() - 1) {
compiledTemplate.add(new StaticNode(template.substring(pos)));
} }
pos = close + 1;
open = template.indexOf("%{", pos);
close = template.indexOf('}', open);
} }
final int len = template.length();
// if we only have one node return the node directly if (pos < len) {
// and remove the need to loop. builder.append(template, pos, len);
if(compiledTemplate.size() == 1) {
return compiledTemplate.get(0);
} else {
return compiledTemplate;
}
}
public static TemplateNode identifyTag(String tag) {
if(tag.equals("+%s")) {
return new EpochNode();
} else if(tag.charAt(0) == '+') {
return new DateNode(tag.substring(1));
} else {
return new KeyNode(tag);
} }
return builder.toString();
} }
} }

View file

@ -1,32 +0,0 @@
package org.logstash;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Template implements TemplateNode {
public List nodes = new ArrayList<>();
public Template() {}
public void add(TemplateNode node) {
nodes.add(node);
}
public int size() {
return nodes.size();
}
public TemplateNode get(int index) {
return (TemplateNode) nodes.get(index);
}
@Override
public String evaluate(Event event) throws IOException {
StringBuffer results = new StringBuffer();
for (int i = 0; i < nodes.size(); i++) {
results.append(((TemplateNode) nodes.get(i)).evaluate(event));
}
return results.toString();
}
}

View file

@ -1,10 +0,0 @@
package org.logstash;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public interface TemplateNode {
String evaluate(Event event) throws IOException;
}

View file

@ -77,6 +77,13 @@ public class StringInterpolationTest {
assertEquals("v", StringInterpolation.evaluate(event, path)); assertEquals("v", StringInterpolation.evaluate(event, path));
} }
@Test
public void TestEpochSeconds() throws IOException {
Event event = getTestEvent();
String path = "%{+%ss}";
assertEquals("%00", StringInterpolation.evaluate(event, path));
}
@Test @Test
public void TestEpoch() throws IOException { public void TestEpoch() throws IOException {
Event event = getTestEvent(); Event event = getTestEvent();