Performance: improve event.clone memory usage

for Strings with copy-on-write semantics when deep cloning.

motivated by "big" events reaching plugins such as split,
which might produce several new events out of a single one.

Fixes #11794
This commit is contained in:
Karol Bucek 2020-04-15 18:44:24 +02:00
parent b3fd033e27
commit 1cc1ce390a
2 changed files with 58 additions and 7 deletions

View file

@ -42,7 +42,8 @@ public final class Cloner {
} else if (input instanceof List<?>) {
return (T) deepList((List<?>) input);
} else if (input instanceof RubyString) {
return (T) ((RubyString) input).doClone();
// new instance but sharing ByteList (until either String is modified)
return (T) ((RubyString) input).dup();
} else if (input instanceof Collection<?>) {
throw new ClassCastException("unexpected Collection type " + input.getClass());
}

View file

@ -21,6 +21,9 @@
package org.logstash;
import org.jruby.RubyString;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.junit.Test;
import static org.junit.Assert.*;
@ -33,14 +36,61 @@ public class ClonerTest {
RubyString result = Cloner.deep(original);
// Check object identity
assertTrue(result != original);
// Check different underlying bytes
assertTrue(result.getByteList() != original.getByteList());
assertNotSame(original, result);
// Check string equality
assertEquals(result, original);
assertEquals(original, result);
assertEquals(javaString, result.asJavaString());
}
@Test
public void testRubyStringCloningAndAppend() {
String javaString = "fooBar";
RubyString original = RubyString.newString(RubyUtil.RUBY, javaString);
RubyString result = Cloner.deep(original);
result.append(RubyUtil.RUBY.newString("X"));
assertNotEquals(result, original);
ThreadContext context = RubyUtil.RUBY.getCurrentContext();
assertTrue(original.op_equal(context, RubyString.newString(RubyUtil.RUBY, javaString)).isTrue());
assertEquals(javaString, original.asJavaString());
}
@Test
public void testRubyStringCloningAndChangeOriginal() {
String javaString = "fooBar";
RubyString original = RubyString.newString(RubyUtil.RUBY, javaString);
RubyString result = Cloner.deep(original);
ThreadContext context = RubyUtil.RUBY.getCurrentContext();
IRubyObject index = RubyUtil.RUBY.newFixnum(5);
original.op_aset(context, index, RubyUtil.RUBY.newString("z")); // original[5] = 'z'
assertNotEquals(result, original);
assertTrue(result.op_equal(context, RubyString.newString(RubyUtil.RUBY, javaString)).isTrue());
assertEquals(javaString, result.asJavaString());
assertEquals("fooBaz", original.asJavaString());
}
@Test // @Tag("Performance Optimization")
public void testRubyStringCloningMemoryOptimization() {
ByteList bytes = ByteList.create("0123456789");
RubyString original = RubyString.newString(RubyUtil.RUBY, bytes);
RubyString result = Cloner.deep(original);
assertNotSame(original, result);
assertSame(bytes, original.getByteList());
// NOTE: this is an implementation detail or the underlying sharing :
assertSame(bytes, result.getByteList()); // bytes-list shared
// but when string is modified it will stop using the same byte container
result.concat(RubyUtil.RUBY.getCurrentContext(), RubyUtil.RUBY.newString(" "));
assertNotSame(bytes, result.getByteList()); // byte-list copied on write
}
}