Fixes #7676 and generally improves the performance of org.logstash.StringInterpolation#evaluate by:

* Don't use regex to find the contents between `%{}`, it's way too inefficient for a simple case like this with all its allocations
* Flatten logic out of the many helper classes
* Use a reusable, thread-local `StringBuilder` to serialize into
* Don't use `.substring` for comparisons
* `static` `ObjectMapper`
* `org.joda.time.format.DateTimeFormat#forPattern` not being cached is not an issue, since it's cached in `org.joda.time.format.DateTimeFormat#cPatternCache`

* Added benchmark
   * ~15% faster for a few cases I ran, even for repeated patterns with interpolation + couldn't find any case even with date formatting, where this implementation is slower than the cached one
   *  no leaking on dynamic patterns

Fixes #7692
This commit is contained in:
Armin 2017-07-14 11:46:55 +02:00 committed by Armin Braun
parent ea9f56e2fc
commit 96ff365997
9 changed files with 136 additions and 207 deletions

View file

@ -0,0 +1,61 @@
package org.logstash.benchmark;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.logstash.Event;
import org.logstash.Timestamp;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
@Warmup(iterations = 3, time = 100, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
@Fork(1)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Thread)
public class EventSprintfBenchmark {
private static final int EVENTS_PER_INVOCATION = 10_000_000;
private static final Event EVENT = new Event();
@Setup
public void setUp() throws IOException {
EVENT.setField("Foo", "Bar");
EVENT.setField("Foo1", "Bar1");
EVENT.setField("Foo2", "Bar2");
EVENT.setField("Foo3", "Bar3");
EVENT.setField("Foo4", "Bar4");
EVENT.setTimestamp(Timestamp.now());
}
@Benchmark
@OperationsPerInvocation(EVENTS_PER_INVOCATION)
public final void sprintfField(final Blackhole blackhole) throws Exception {
for (int i = 0; i < EVENTS_PER_INVOCATION; ++i) {
blackhole.consume(EVENT.sprintf(i + "-%{[Foo]}"));
}
}
public static void main(final String... args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(EventSprintfBenchmark.class.getSimpleName())
.forks(2)
.build();
new Runner(opt).run();
}
}

View file

@ -1,23 +0,0 @@
package org.logstash;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public class DateNode implements TemplateNode {
private DateTimeFormatter formatter;
public DateNode(String format) {
this.formatter = DateTimeFormat.forPattern(format).withZone(DateTimeZone.UTC);
}
@Override
public String evaluate(Event event) throws IOException {
return event.getTimestamp().getTime().toString(this.formatter);
}
}

View file

@ -1,15 +0,0 @@
package org.logstash;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public class EpochNode implements TemplateNode {
public EpochNode(){ }
@Override
public String evaluate(Event event) throws IOException {
return String.valueOf(event.getTimestamp().getTime().getMillis() / 1000);
}
}

View file

@ -1,43 +1,15 @@
package org.logstash;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.logstash.bivalues.BiValue;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.logstash.bivalues.BiValue;
/**
* Created by ph on 15-05-22.
*/
public class KeyNode implements TemplateNode {
private String key;
public class KeyNode {
public KeyNode(String key) {
this.key = key;
}
/**
This will be more complicated with hash and array.
leverage jackson lib to do the actual.
*/
@Override
public String evaluate(Event event) throws IOException {
Object value = event.getField(this.key);
if (value != null) {
if (value instanceof List) {
return join((List)value, ",");
} else if (value instanceof Map) {
ObjectMapper mapper = new ObjectMapper();
return mapper.writeValueAsString(value);
} else {
return event.getField(this.key).toString();
}
} else {
return "%{" + this.key + "}";
}
private KeyNode() {
// Utility Class
}
// TODO: (colin) this should be moved somewhere else to make it reusable
@ -48,7 +20,7 @@ public class KeyNode implements TemplateNode {
if (len == 0) return "";
StringBuilder result = new StringBuilder(toString(list.get(0), delim));
final StringBuilder result = new StringBuilder(toString(list.get(0), delim));
for (int i = 1; i < len; i++) {
result.append(delim);
result.append(toString(list.get(i), delim));

View file

@ -1,19 +0,0 @@
package org.logstash;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public class StaticNode implements TemplateNode {
private String content;
public StaticNode(String content) {
this.content = content;
}
@Override
public String evaluate(Event event) throws IOException {
return this.content;
}
}

View file

@ -1,92 +1,80 @@
package org.logstash;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
public class StringInterpolation {
static Pattern TEMPLATE_TAG = Pattern.compile("%\\{([^}]+)\\}");
static final Map<String, TemplateNode> cache = new ConcurrentHashMap<>();
public final class StringInterpolation {
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final ThreadLocal<StringBuilder> STRING_BUILDER =
new ThreadLocal<StringBuilder>() {
@Override
protected StringBuilder initialValue() {
return new StringBuilder();
}
@Override
public StringBuilder get() {
StringBuilder b = super.get();
b.setLength(0); // clear/reset the buffer
return b;
}
};
private StringInterpolation() {
// TODO:
// This may need some tweaking for the concurrency level to get better memory usage.
// The current implementation doesn't allow the keys to expire, I think under normal usage
// the keys will converge to a fixed number.
//
// If this code make logstash goes OOM, we have the following options:
// - If the key doesn't contains a `%` do not cache it, this will reduce the key size at a performance cost.
// - Use some kind LRU cache
// - Create a new data structure that use weakref or use Google Guava for the cache https://code.google.com/p/guava-libraries/
// Utility Class
}
public static void clearCache() {
cache.clear();
public static String evaluate(final Event event, final String template) throws IOException {
int open = template.indexOf("%{");
int close = template.indexOf('}', open);
if (open == -1 || close == -1) {
return template;
}
public static int cacheSize() {
return cache.size();
}
public static String evaluate(Event event, String template) throws IOException {
TemplateNode compiledTemplate = cache.get(template);
if (compiledTemplate == null) {
compiledTemplate = compile(template);
cache.put(template, compiledTemplate);
}
return compiledTemplate.evaluate(event);
}
public static TemplateNode compile(String template) {
Template compiledTemplate = new Template();
if (template.indexOf('%') == -1) {
// Move the nodes to a custom instance
// so we can remove the iterator and do one `.evaluate`
compiledTemplate.add(new StaticNode(template));
} else {
Matcher matcher = TEMPLATE_TAG.matcher(template);
String tag;
final StringBuilder builder = STRING_BUILDER.get();
int pos = 0;
while (matcher.find()) {
if (matcher.start() > 0) {
compiledTemplate.add(new StaticNode(template.substring(pos, matcher.start())));
while (open > -1 && close > -1) {
if (open > 0) {
builder.append(template, pos, open);
}
tag = matcher.group(1);
compiledTemplate.add(identifyTag(tag));
pos = matcher.end();
}
if(pos <= template.length() - 1) {
compiledTemplate.add(new StaticNode(template.substring(pos)));
}
}
// if we only have one node return the node directly
// and remove the need to loop.
if(compiledTemplate.size() == 1) {
return compiledTemplate.get(0);
if (template.regionMatches(open + 2, "+%s", 0, close - open - 2)) {
builder.append(event.getTimestamp().getTime().getMillis() / 1000L);
} else if (template.charAt(open + 2) == '+') {
builder.append(
event.getTimestamp().getTime().toString(
DateTimeFormat.forPattern(template.substring(open + 3, close))
.withZone(DateTimeZone.UTC)
));
} else {
return compiledTemplate;
}
}
public static TemplateNode identifyTag(String tag) {
if(tag.equals("+%s")) {
return new EpochNode();
} else if(tag.charAt(0) == '+') {
return new DateNode(tag.substring(1));
final String found = template.substring(open + 2, close);
final Object value = event.getField(found);
if (value != null) {
if (value instanceof List) {
builder.append(KeyNode.join((List) value, ","));
} else if (value instanceof Map) {
builder.append(OBJECT_MAPPER.writeValueAsString(value));
} else {
return new KeyNode(tag);
builder.append(value.toString());
}
} else {
builder.append("%{").append(found).append('}');
}
}
pos = close + 1;
open = template.indexOf("%{", pos);
close = template.indexOf('}', open);
}
final int len = template.length();
if (pos < len) {
builder.append(template, pos, len);
}
return builder.toString();
}
}

View file

@ -1,32 +0,0 @@
package org.logstash;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class Template implements TemplateNode {
public List nodes = new ArrayList<>();
public Template() {}
public void add(TemplateNode node) {
nodes.add(node);
}
public int size() {
return nodes.size();
}
public TemplateNode get(int index) {
return (TemplateNode) nodes.get(index);
}
@Override
public String evaluate(Event event) throws IOException {
StringBuffer results = new StringBuffer();
for (int i = 0; i < nodes.size(); i++) {
results.append(((TemplateNode) nodes.get(i)).evaluate(event));
}
return results.toString();
}
}

View file

@ -1,10 +0,0 @@
package org.logstash;
import java.io.IOException;
/**
* Created by ph on 15-05-22.
*/
public interface TemplateNode {
String evaluate(Event event) throws IOException;
}

View file

@ -77,6 +77,13 @@ public class StringInterpolationTest {
assertEquals("v", StringInterpolation.evaluate(event, path));
}
@Test
public void TestEpochSeconds() throws IOException {
Event event = getTestEvent();
String path = "%{+%ss}";
assertEquals("%00", StringInterpolation.evaluate(event, path));
}
@Test
public void TestEpoch() throws IOException {
Event event = getTestEvent();