mirror of
https://github.com/elastic/logstash.git
synced 2025-04-24 14:47:19 -04:00
Fixes #7676 and generally improves the performance of org.logstash.StringInterpolation#evaluate
by:
* Don't use regex to find the contents between `%{}`, it's way too inefficient for a simple case like this with all its allocations * Flatten logic out of the many helper classes * Use a reusable, thread-local `StringBuilder` to serialize into * Don't use `.substring` for comparisons * `static` `ObjectMapper` * `org.joda.time.format.DateTimeFormat#forPattern` not being cached is not an issue, since it's cached in `org.joda.time.format.DateTimeFormat#cPatternCache` * Added benchmark * ~15% faster for a few cases I ran, even for repeated patterns with interpolation + couldn't find any case even with date formatting, where this implementation is slower than the cached one * no leaking on dynamic patterns Fixes #7692
This commit is contained in:
parent
ea9f56e2fc
commit
96ff365997
9 changed files with 136 additions and 207 deletions
|
@ -0,0 +1,61 @@
|
|||
package org.logstash.benchmark;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.logstash.Event;
|
||||
import org.logstash.Timestamp;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.OperationsPerInvocation;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.infra.Blackhole;
|
||||
import org.openjdk.jmh.runner.Runner;
|
||||
import org.openjdk.jmh.runner.RunnerException;
|
||||
import org.openjdk.jmh.runner.options.Options;
|
||||
import org.openjdk.jmh.runner.options.OptionsBuilder;
|
||||
|
||||
@Warmup(iterations = 3, time = 100, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Fork(1)
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
@State(Scope.Thread)
|
||||
public class EventSprintfBenchmark {
|
||||
|
||||
private static final int EVENTS_PER_INVOCATION = 10_000_000;
|
||||
|
||||
private static final Event EVENT = new Event();
|
||||
|
||||
@Setup
|
||||
public void setUp() throws IOException {
|
||||
EVENT.setField("Foo", "Bar");
|
||||
EVENT.setField("Foo1", "Bar1");
|
||||
EVENT.setField("Foo2", "Bar2");
|
||||
EVENT.setField("Foo3", "Bar3");
|
||||
EVENT.setField("Foo4", "Bar4");
|
||||
EVENT.setTimestamp(Timestamp.now());
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(EVENTS_PER_INVOCATION)
|
||||
public final void sprintfField(final Blackhole blackhole) throws Exception {
|
||||
for (int i = 0; i < EVENTS_PER_INVOCATION; ++i) {
|
||||
blackhole.consume(EVENT.sprintf(i + "-%{[Foo]}"));
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(final String... args) throws RunnerException {
|
||||
Options opt = new OptionsBuilder()
|
||||
.include(EventSprintfBenchmark.class.getSimpleName())
|
||||
.forks(2)
|
||||
.build();
|
||||
new Runner(opt).run();
|
||||
}
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
package org.logstash;
|
||||
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Created by ph on 15-05-22.
|
||||
*/
|
||||
public class DateNode implements TemplateNode {
|
||||
private DateTimeFormatter formatter;
|
||||
|
||||
public DateNode(String format) {
|
||||
this.formatter = DateTimeFormat.forPattern(format).withZone(DateTimeZone.UTC);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String evaluate(Event event) throws IOException {
|
||||
return event.getTimestamp().getTime().toString(this.formatter);
|
||||
}
|
||||
}
|
|
@ -1,15 +0,0 @@
|
|||
package org.logstash;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Created by ph on 15-05-22.
|
||||
*/
|
||||
public class EpochNode implements TemplateNode {
|
||||
public EpochNode(){ }
|
||||
|
||||
@Override
|
||||
public String evaluate(Event event) throws IOException {
|
||||
return String.valueOf(event.getTimestamp().getTime().getMillis() / 1000);
|
||||
}
|
||||
}
|
|
@ -1,43 +1,15 @@
|
|||
package org.logstash;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import org.logstash.bivalues.BiValue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.logstash.bivalues.BiValue;
|
||||
|
||||
/**
|
||||
* Created by ph on 15-05-22.
|
||||
*/
|
||||
public class KeyNode implements TemplateNode {
|
||||
private String key;
|
||||
public class KeyNode {
|
||||
|
||||
public KeyNode(String key) {
|
||||
this.key = key;
|
||||
}
|
||||
|
||||
/**
|
||||
This will be more complicated with hash and array.
|
||||
leverage jackson lib to do the actual.
|
||||
*/
|
||||
@Override
|
||||
public String evaluate(Event event) throws IOException {
|
||||
Object value = event.getField(this.key);
|
||||
|
||||
if (value != null) {
|
||||
if (value instanceof List) {
|
||||
return join((List)value, ",");
|
||||
} else if (value instanceof Map) {
|
||||
ObjectMapper mapper = new ObjectMapper();
|
||||
return mapper.writeValueAsString(value);
|
||||
} else {
|
||||
return event.getField(this.key).toString();
|
||||
}
|
||||
|
||||
} else {
|
||||
return "%{" + this.key + "}";
|
||||
}
|
||||
private KeyNode() {
|
||||
// Utility Class
|
||||
}
|
||||
|
||||
// TODO: (colin) this should be moved somewhere else to make it reusable
|
||||
|
@ -48,7 +20,7 @@ public class KeyNode implements TemplateNode {
|
|||
|
||||
if (len == 0) return "";
|
||||
|
||||
StringBuilder result = new StringBuilder(toString(list.get(0), delim));
|
||||
final StringBuilder result = new StringBuilder(toString(list.get(0), delim));
|
||||
for (int i = 1; i < len; i++) {
|
||||
result.append(delim);
|
||||
result.append(toString(list.get(i), delim));
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
package org.logstash;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Created by ph on 15-05-22.
|
||||
*/
|
||||
public class StaticNode implements TemplateNode {
|
||||
private String content;
|
||||
|
||||
public StaticNode(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String evaluate(Event event) throws IOException {
|
||||
return this.content;
|
||||
}
|
||||
}
|
|
@ -1,92 +1,80 @@
|
|||
package org.logstash;
|
||||
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
|
||||
public class StringInterpolation {
|
||||
static Pattern TEMPLATE_TAG = Pattern.compile("%\\{([^}]+)\\}");
|
||||
static final Map<String, TemplateNode> cache = new ConcurrentHashMap<>();
|
||||
public final class StringInterpolation {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
private static final ThreadLocal<StringBuilder> STRING_BUILDER =
|
||||
new ThreadLocal<StringBuilder>() {
|
||||
@Override
|
||||
protected StringBuilder initialValue() {
|
||||
return new StringBuilder();
|
||||
}
|
||||
|
||||
@Override
|
||||
public StringBuilder get() {
|
||||
StringBuilder b = super.get();
|
||||
b.setLength(0); // clear/reset the buffer
|
||||
return b;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
private StringInterpolation() {
|
||||
// TODO:
|
||||
// This may need some tweaking for the concurrency level to get better memory usage.
|
||||
// The current implementation doesn't allow the keys to expire, I think under normal usage
|
||||
// the keys will converge to a fixed number.
|
||||
//
|
||||
// If this code make logstash goes OOM, we have the following options:
|
||||
// - If the key doesn't contains a `%` do not cache it, this will reduce the key size at a performance cost.
|
||||
// - Use some kind LRU cache
|
||||
// - Create a new data structure that use weakref or use Google Guava for the cache https://code.google.com/p/guava-libraries/
|
||||
// Utility Class
|
||||
}
|
||||
|
||||
public static void clearCache() {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
public static int cacheSize() {
|
||||
return cache.size();
|
||||
}
|
||||
|
||||
public static String evaluate(Event event, String template) throws IOException {
|
||||
TemplateNode compiledTemplate = cache.get(template);
|
||||
|
||||
if (compiledTemplate == null) {
|
||||
compiledTemplate = compile(template);
|
||||
cache.put(template, compiledTemplate);
|
||||
public static String evaluate(final Event event, final String template) throws IOException {
|
||||
int open = template.indexOf("%{");
|
||||
int close = template.indexOf('}', open);
|
||||
if (open == -1 || close == -1) {
|
||||
return template;
|
||||
}
|
||||
|
||||
return compiledTemplate.evaluate(event);
|
||||
}
|
||||
|
||||
public static TemplateNode compile(String template) {
|
||||
Template compiledTemplate = new Template();
|
||||
|
||||
if (template.indexOf('%') == -1) {
|
||||
// Move the nodes to a custom instance
|
||||
// so we can remove the iterator and do one `.evaluate`
|
||||
compiledTemplate.add(new StaticNode(template));
|
||||
} else {
|
||||
Matcher matcher = TEMPLATE_TAG.matcher(template);
|
||||
String tag;
|
||||
int pos = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
if (matcher.start() > 0) {
|
||||
compiledTemplate.add(new StaticNode(template.substring(pos, matcher.start())));
|
||||
final StringBuilder builder = STRING_BUILDER.get();
|
||||
int pos = 0;
|
||||
while (open > -1 && close > -1) {
|
||||
if (open > 0) {
|
||||
builder.append(template, pos, open);
|
||||
}
|
||||
if (template.regionMatches(open + 2, "+%s", 0, close - open - 2)) {
|
||||
builder.append(event.getTimestamp().getTime().getMillis() / 1000L);
|
||||
} else if (template.charAt(open + 2) == '+') {
|
||||
builder.append(
|
||||
event.getTimestamp().getTime().toString(
|
||||
DateTimeFormat.forPattern(template.substring(open + 3, close))
|
||||
.withZone(DateTimeZone.UTC)
|
||||
));
|
||||
} else {
|
||||
final String found = template.substring(open + 2, close);
|
||||
final Object value = event.getField(found);
|
||||
if (value != null) {
|
||||
if (value instanceof List) {
|
||||
builder.append(KeyNode.join((List) value, ","));
|
||||
} else if (value instanceof Map) {
|
||||
builder.append(OBJECT_MAPPER.writeValueAsString(value));
|
||||
} else {
|
||||
builder.append(value.toString());
|
||||
}
|
||||
} else {
|
||||
builder.append("%{").append(found).append('}');
|
||||
}
|
||||
|
||||
tag = matcher.group(1);
|
||||
compiledTemplate.add(identifyTag(tag));
|
||||
pos = matcher.end();
|
||||
}
|
||||
|
||||
if(pos <= template.length() - 1) {
|
||||
compiledTemplate.add(new StaticNode(template.substring(pos)));
|
||||
}
|
||||
pos = close + 1;
|
||||
open = template.indexOf("%{", pos);
|
||||
close = template.indexOf('}', open);
|
||||
}
|
||||
|
||||
// if we only have one node return the node directly
|
||||
// and remove the need to loop.
|
||||
if(compiledTemplate.size() == 1) {
|
||||
return compiledTemplate.get(0);
|
||||
} else {
|
||||
return compiledTemplate;
|
||||
}
|
||||
}
|
||||
|
||||
public static TemplateNode identifyTag(String tag) {
|
||||
if(tag.equals("+%s")) {
|
||||
return new EpochNode();
|
||||
} else if(tag.charAt(0) == '+') {
|
||||
return new DateNode(tag.substring(1));
|
||||
|
||||
} else {
|
||||
return new KeyNode(tag);
|
||||
final int len = template.length();
|
||||
if (pos < len) {
|
||||
builder.append(template, pos, len);
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
package org.logstash;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class Template implements TemplateNode {
|
||||
public List nodes = new ArrayList<>();
|
||||
public Template() {}
|
||||
|
||||
public void add(TemplateNode node) {
|
||||
nodes.add(node);
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return nodes.size();
|
||||
}
|
||||
|
||||
public TemplateNode get(int index) {
|
||||
return (TemplateNode) nodes.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String evaluate(Event event) throws IOException {
|
||||
StringBuffer results = new StringBuffer();
|
||||
|
||||
for (int i = 0; i < nodes.size(); i++) {
|
||||
results.append(((TemplateNode) nodes.get(i)).evaluate(event));
|
||||
}
|
||||
return results.toString();
|
||||
}
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
package org.logstash;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Created by ph on 15-05-22.
|
||||
*/
|
||||
public interface TemplateNode {
|
||||
String evaluate(Event event) throws IOException;
|
||||
}
|
|
@ -77,6 +77,13 @@ public class StringInterpolationTest {
|
|||
assertEquals("v", StringInterpolation.evaluate(event, path));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestEpochSeconds() throws IOException {
|
||||
Event event = getTestEvent();
|
||||
String path = "%{+%ss}";
|
||||
assertEquals("%00", StringInterpolation.evaluate(event, path));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void TestEpoch() throws IOException {
|
||||
Event event = getTestEvent();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue