diff --git a/Gemfile b/Gemfile index 9e2c0c8d8..3dd811e8b 100644 --- a/Gemfile +++ b/Gemfile @@ -3,6 +3,7 @@ source "https://rubygems.org" gem "logstash-core", :path => "./logstash-core" +gem "logstash-core-queue-jruby", :path => "./logstash-core-queue-jruby" gem "logstash-core-event-java", :path => "./logstash-core-event-java" gem "logstash-core-plugin-api", :path => "./logstash-core-plugin-api" gem "file-dependencies", "0.1.6" @@ -113,3 +114,4 @@ gem "logstash-output-stdout" gem "logstash-output-tcp" gem "logstash-output-udp" gem "logstash-output-webhdfs" +gem "logstash-filter-multiline" diff --git a/config/logstash.yml b/config/logstash.yml index bce9f417e..0910bac80 100644 --- a/config/logstash.yml +++ b/config/logstash.yml @@ -85,6 +85,28 @@ # # config.debug: false # +# ------------ Queuing Settings -------------- +# +# Internal queuing model, "memory" for legacy in-memory based queuing and +# "persisted" for disk-based acked queueing. Defaults is memory +# +# queue.type: memory +# +# If using queue.type: persisted, the directory path where the data files will be stored. +# Default is path.data/queue +# +# path.queue: +# +# If using queue.type: persisted, the page data files size. The queue data consists of +# append-only data files separated into pages. Default is 250mb +# +# queue.page_capacity: 250mb +# +# If using queue.type: persisted, the maximum number of unread events in the queue. +# Default is 0 (unlimited) +# +# queue.max_events: 0 +# # ------------ Metrics Settings -------------- # # Bind address for the metrics REST endpoint diff --git a/dripmain.rb b/dripmain.rb index 032415087..60bfc0435 100644 --- a/dripmain.rb +++ b/dripmain.rb @@ -3,6 +3,7 @@ require_relative "lib/bootstrap/environment" LogStash::Bundler.setup!({:without => [:build]}) +require "logstash-core" # typical required gems and libs require "logstash/environment" diff --git a/lib/bootstrap/rspec.rb b/lib/bootstrap/rspec.rb index 4c95f3bfc..d24c95595 100755 --- a/lib/bootstrap/rspec.rb +++ b/lib/bootstrap/rspec.rb @@ -1,6 +1,7 @@ # encoding: utf-8 require_relative "environment" LogStash::Bundler.setup!({:without => [:build]}) +require "logstash-core" require "logstash/environment" $LOAD_PATH.unshift(File.join(LogStash::Environment::LOGSTASH_CORE, "spec")) diff --git a/logstash-core-event-java/build.gradle b/logstash-core-event-java/build.gradle index d99d30476..1ff40a093 100644 --- a/logstash-core-event-java/build.gradle +++ b/logstash-core-event-java/build.gradle @@ -1,18 +1,13 @@ -buildscript { - repositories { - mavenLocal() - mavenCentral() - jcenter() - } - dependencies { - classpath 'net.saliman:gradle-cobertura-plugin:2.2.8' - } -} +group = 'org.logstash' +version = '6.0.0-alpha1' + +description = "Logstash Core Event Java" + +apply plugin: 'java' +apply plugin: 'idea' repositories { - mavenLocal() mavenCentral() - jcenter() } gradle.projectsEvaluated { @@ -22,12 +17,8 @@ gradle.projectsEvaluated { } } -apply plugin: 'java' -apply plugin: 'idea' - -group = 'org.logstash' - project.sourceCompatibility = 1.8 +project.targetCompatibility = 1.8 task sourcesJar(type: Jar, dependsOn: classes) { from sourceSets.main.allSource @@ -41,9 +32,11 @@ task javadocJar(type: Jar, dependsOn: javadoc) { extension 'jar' } +// copy jar file into the gem lib dir but without the version number in filename task copyGemjar(type: Copy, dependsOn: sourcesJar) { from project.jar into project.file('lib/logstash-core-event-java/') + rename(/(.+)-${project.version}.jar/, '$1.jar') } task cleanGemjar { @@ -55,6 +48,19 @@ task cleanGemjar { clean.dependsOn(cleanGemjar) jar.finalizedBy(copyGemjar) +task gemspec_jars << { + File gemspec_jars = file("./gemspec_jars.rb") + gemspec_jars.newWriter().withWriter { w -> + w << "# This file is generated by Gradle as part of the build process. It extracts the build.gradle\n" + w << "# runtime dependencies to generate this gemspec dependencies file to be eval'ed by the gemspec\n" + w << "# for the jar-dependencies requirements.\n\n" + configurations.runtime.allDependencies.each { dependency -> + w << "gem.requirements << \"jar ${dependency.group}:${dependency.name}, ${dependency.version}\"\n" + } + } +} +build.finalizedBy(gemspec_jars) + configurations.create('sources') configurations.create('javadoc') configurations.archives { @@ -94,10 +100,13 @@ idea { dependencies { compile 'com.fasterxml.jackson.core:jackson-core:2.7.3' compile 'com.fasterxml.jackson.core:jackson-databind:2.7.3' + compile 'com.fasterxml.jackson.module:jackson-module-afterburner:2.7.3' + compile 'com.fasterxml.jackson.dataformat:jackson-dataformat-cbor:2.7.3' compile 'org.apache.logging.log4j:log4j-api:2.6.2' - provided 'org.jruby:jruby-core:1.7.25' testCompile 'junit:junit:4.12' testCompile 'net.javacrumbs.json-unit:json-unit:1.9.0' + provided 'org.jruby:jruby-core:1.7.25' + provided files('../logstash-core/lib/logstash-core/logstash-core.jar') } // See http://www.gradle.org/docs/current/userguide/gradle_wrapper.html diff --git a/logstash-core-event-java/gemspec_jars.rb b/logstash-core-event-java/gemspec_jars.rb new file mode 100644 index 000000000..9c3acd021 --- /dev/null +++ b/logstash-core-event-java/gemspec_jars.rb @@ -0,0 +1,9 @@ +# This file is generated by Gradle as part of the build process. It extracts the build.gradle +# runtime dependencies to generate this gemspec dependencies file to be eval'ed by the gemspec +# for the jar-dependencies requirements. + +gem.requirements << "jar com.fasterxml.jackson.core:jackson-core, 2.7.3" +gem.requirements << "jar com.fasterxml.jackson.core:jackson-databind, 2.7.3" +gem.requirements << "jar com.fasterxml.jackson.module:jackson-module-afterburner, 2.7.3" +gem.requirements << "jar com.fasterxml.jackson.dataformat:jackson-dataformat-cbor, 2.7.3" +gem.requirements << "jar org.apache.logging.log4j:log4j-api, 2.6.2" diff --git a/logstash-core-event-java/lib/logstash-core-event-java_jars.rb b/logstash-core-event-java/lib/logstash-core-event-java_jars.rb index eda40d431..068f18262 100644 --- a/logstash-core-event-java/lib/logstash-core-event-java_jars.rb +++ b/logstash-core-event-java/lib/logstash-core-event-java_jars.rb @@ -3,12 +3,18 @@ begin require 'jar_dependencies' rescue LoadError require 'com/fasterxml/jackson/core/jackson-databind/2.7.3/jackson-databind-2.7.3.jar' + require 'org/apache/logging/log4j/log4j-api/2.6.2/log4j-api-2.6.2.jar' require 'com/fasterxml/jackson/core/jackson-annotations/2.7.0/jackson-annotations-2.7.0.jar' + require 'com/fasterxml/jackson/module/jackson-module-afterburner/2.7.3/jackson-module-afterburner-2.7.3.jar' + require 'com/fasterxml/jackson/dataformat/jackson-dataformat-cbor/2.7.3/jackson-dataformat-cbor-2.7.3.jar' require 'com/fasterxml/jackson/core/jackson-core/2.7.3/jackson-core-2.7.3.jar' end if defined? Jars require_jar( 'com.fasterxml.jackson.core', 'jackson-databind', '2.7.3' ) + require_jar( 'org.apache.logging.log4j', 'log4j-api', '2.6.2' ) require_jar( 'com.fasterxml.jackson.core', 'jackson-annotations', '2.7.0' ) + require_jar( 'com.fasterxml.jackson.module', 'jackson-module-afterburner', '2.7.3' ) + require_jar( 'com.fasterxml.jackson.dataformat', 'jackson-dataformat-cbor', '2.7.3' ) require_jar( 'com.fasterxml.jackson.core', 'jackson-core', '2.7.3' ) end diff --git a/logstash-core-event-java/lib/logstash/event.rb b/logstash-core-event-java/lib/logstash/event.rb index 38c5d2069..27766aca9 100644 --- a/logstash-core-event-java/lib/logstash/event.rb +++ b/logstash-core-event-java/lib/logstash/event.rb @@ -25,10 +25,14 @@ module LogStash def shutdown?; false; end; end - FLUSH = FlushEvent.new + class NoSignal < SignalEvent + def flush?; false; end; + def shutdown?; false; end; + end - # LogStash::SHUTDOWN is used by plugins + FLUSH = FlushEvent.new SHUTDOWN = ShutdownEvent.new + NO_SIGNAL = NoSignal.new class Event MSG_BRACKETS_METHOD_MISSING = "Direct event field references (i.e. event['field']) have been disabled in favor of using event get and set methods (e.g. event.get('field')). Please consult the Logstash 5.0 breaking changes documentation for more details.".freeze diff --git a/logstash-core-event-java/logstash-core-event-java.gemspec b/logstash-core-event-java/logstash-core-event-java.gemspec index 42c190b5f..89d0bf105 100644 --- a/logstash-core-event-java/logstash-core-event-java.gemspec +++ b/logstash-core-event-java/logstash-core-event-java.gemspec @@ -11,7 +11,7 @@ Gem::Specification.new do |gem| gem.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" gem.license = "Apache License (2.0)" - gem.files = Dir.glob(["logstash-core-event-java.gemspec", "lib/**/*.jar", "lib/**/*.rb", "spec/**/*.rb"]) + gem.files = Dir.glob(["logstash-core-event-java.gemspec", "gemspec_jars.rb", "lib/**/*.jar", "lib/**/*.rb", "spec/**/*.rb"]) gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) gem.name = "logstash-core-event-java" gem.require_paths = ["lib"] @@ -26,6 +26,5 @@ Gem::Specification.new do |gem| # which does not have this problem. gem.add_runtime_dependency "ruby-maven", "~> 3.3.9" - gem.requirements << "jar com.fasterxml.jackson.core:jackson-core, 2.7.3" - gem.requirements << "jar com.fasterxml.jackson.core:jackson-databind, 2.7.3" + eval(File.read(File.expand_path("../gemspec_jars.rb", __FILE__))) end diff --git a/logstash-core-event-java/spec/event_spec.rb b/logstash-core-event-java/spec/event_spec.rb index 0b7d174e7..3402270c9 100644 --- a/logstash-core-event-java/spec/event_spec.rb +++ b/logstash-core-event-java/spec/event_spec.rb @@ -146,6 +146,17 @@ describe LogStash::Event do expect(e.get("[proxy][array][1]")).to eql("baz") expect(e.get("[proxy][hash][string]")).to eql("quux") end + + it "should fail on non UTF-8 encoding" do + # e = LogStash::Event.new + # s1 = "\xE0 Montr\xE9al".force_encoding("ISO-8859-1") + # expect(s1.encoding.name).to eq("ISO-8859-1") + # expect(s1.valid_encoding?).to eq(true) + # e.set("test", s1) + # s2 = e.get("test") + # expect(s2.encoding.name).to eq("UTF-8") + # expect(s2.valid_encoding?).to eq(true) + end end context "timestamp" do diff --git a/logstash-core-event-java/src/main/java/org/logstash/Event.java b/logstash-core-event-java/src/main/java/org/logstash/Event.java index b54806db3..a0ad214da 100644 --- a/logstash-core-event-java/src/main/java/org/logstash/Event.java +++ b/logstash-core-event-java/src/main/java/org/logstash/Event.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.joda.time.DateTime; import org.jruby.RubySymbol; +import org.logstash.ackedqueue.Queueable; import java.io.IOException; import java.io.Serializable; @@ -19,8 +20,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import static org.logstash.ObjectMappers.CBOR_MAPPER; +import static org.logstash.ObjectMappers.JSON_MAPPER; -public class Event implements Cloneable, Serializable { + +public class Event implements Cloneable, Serializable, Queueable { private boolean cancelled; private Map data; @@ -36,6 +40,10 @@ public class Event implements Cloneable, Serializable { public static final String TIMESTAMP_FAILURE_FIELD = "_@timestamp"; public static final String VERSION = "@version"; public static final String VERSION_ONE = "1"; + private static final String DATA_MAP_KEY = "DATA"; + private static final String META_MAP_KEY = "META"; + private static final String SEQNUM_MAP_KEY = "SEQUENCE_NUMBER"; + private static final Logger logger = LogManager.getLogger(Event.class); private static final ObjectMapper mapper = new ObjectMapper(); @@ -165,10 +173,53 @@ public class Event implements Cloneable, Serializable { } } + public byte[] toBinary() throws IOException { + return toBinaryFromMap(toSerializableMap()); + } + + private Map> toSerializableMap() { + HashMap> hashMap = new HashMap<>(); + hashMap.put(DATA_MAP_KEY, this.data); + hashMap.put(META_MAP_KEY, this.metadata); + return hashMap; + } + + private byte[] toBinaryFromMap(Map> representation) throws IOException { + return CBOR_MAPPER.writeValueAsBytes(representation); + } + + private static Event fromSerializableMap(Map> representation) throws IOException{ + if (!representation.containsKey(DATA_MAP_KEY)) { + throw new IOException("The deserialized Map must contain the \"DATA\" key"); + } + if (!representation.containsKey(META_MAP_KEY)) { + throw new IOException("The deserialized Map must contain the \"META\" key"); + } + Map dataMap = representation.get(DATA_MAP_KEY); + dataMap.put(METADATA, representation.get(META_MAP_KEY)); + return new Event(dataMap); + } + + public static Event fromBinary(byte[] source) throws IOException { + if (source == null || source.length == 0) { + return new Event(); + } + return fromSerializableMap(fromBinaryToMap(source)); + } + + private static Map> fromBinaryToMap(byte[] source) throws IOException { + Object o = CBOR_MAPPER.readValue(source, HashMap.class); + if (o instanceof Map) { + return (HashMap>) o; + } else { + throw new IOException("incompatible from binary object type=" + o.getClass().getName() + " , only HashMap is supported"); + } + } + public String toJson() throws IOException { - return mapper.writeValueAsString(this.data); + return JSON_MAPPER.writeValueAsString(this.data); } public static Event[] fromJson(String json) @@ -180,7 +231,7 @@ public class Event implements Cloneable, Serializable { } Event[] result; - Object o = mapper.readValue(json, Object.class); + Object o = JSON_MAPPER.readValue(json, Object.class); // we currently only support Map or Array json objects if (o instanceof Map) { result = new Event[]{ new Event((Map)o) }; @@ -304,4 +355,21 @@ public class Event implements Cloneable, Serializable { tags.add(tag); } } + + public byte[] serialize() throws IOException { + Map> dataMap = toSerializableMap(); + return toBinaryFromMap(dataMap); + } + + public byte[] serializeWithoutSeqNum() throws IOException { + return toBinary(); + } + + public static Event deserialize(byte[] data) throws IOException { + if (data == null || data.length == 0) { + return new Event(); + } + Map> dataMap = fromBinaryToMap(data); + return fromSerializableMap(dataMap); + } } diff --git a/logstash-core-event-java/src/main/java/org/logstash/ObjectMappers.java b/logstash-core-event-java/src/main/java/org/logstash/ObjectMappers.java new file mode 100644 index 000000000..55cc633b6 --- /dev/null +++ b/logstash-core-event-java/src/main/java/org/logstash/ObjectMappers.java @@ -0,0 +1,19 @@ +package org.logstash; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.cbor.CBORFactory; +import com.fasterxml.jackson.dataformat.cbor.CBORGenerator; +import com.fasterxml.jackson.module.afterburner.AfterburnerModule; + +public class ObjectMappers { + public static final ObjectMapper JSON_MAPPER = new ObjectMapper(); + public static final ObjectMapper CBOR_MAPPER = new ObjectMapper(new CBORFactory()); + + static { + JSON_MAPPER.registerModule(new AfterburnerModule()); + + CBORFactory cborf = (CBORFactory) CBOR_MAPPER.getFactory(); + cborf.configure(CBORGenerator.Feature.WRITE_MINIMAL_INTS, false); + CBOR_MAPPER.registerModule(new AfterburnerModule()); + } +} diff --git a/logstash-core-event-java/src/main/java/org/logstash/Timestamp.java b/logstash-core-event-java/src/main/java/org/logstash/Timestamp.java index f3cea5b56..15a86d1c2 100644 --- a/logstash-core-event-java/src/main/java/org/logstash/Timestamp.java +++ b/logstash-core-event-java/src/main/java/org/logstash/Timestamp.java @@ -3,14 +3,14 @@ package org.logstash; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; -import org.joda.time.LocalDateTime; import org.joda.time.Duration; +import org.joda.time.LocalDateTime; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; import java.util.Date; -@JsonSerialize(using = TimestampSerializer.class) +@JsonSerialize(using = org.logstash.json.TimestampSerializer.class) public class Timestamp implements Cloneable { // all methods setting the time object must set it in the UTC timezone diff --git a/logstash-core-event-java/src/main/java/org/logstash/json/TimestampSerializer.java b/logstash-core-event-java/src/main/java/org/logstash/json/TimestampSerializer.java new file mode 100644 index 000000000..cc61e45c6 --- /dev/null +++ b/logstash-core-event-java/src/main/java/org/logstash/json/TimestampSerializer.java @@ -0,0 +1,18 @@ +package org.logstash.json; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.JsonSerializer; +import com.fasterxml.jackson.databind.SerializerProvider; +import org.logstash.Timestamp; + +import java.io.IOException; + +public class TimestampSerializer extends JsonSerializer { + + @Override + public void serialize(Timestamp value, JsonGenerator jgen, SerializerProvider provider) + throws IOException + { + jgen.writeString(value.toIso8601()); + } +} diff --git a/logstash-core-event-java/src/test/java/org/logstash/EventTest.java b/logstash-core-event-java/src/test/java/org/logstash/EventTest.java index a27147616..08a330e2b 100644 --- a/logstash-core-event-java/src/test/java/org/logstash/EventTest.java +++ b/logstash-core-event-java/src/test/java/org/logstash/EventTest.java @@ -14,6 +14,69 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; public class EventTest { + @Test + public void queueableInterfaceWithoutSeqNumRoundTrip() throws Exception { + Event e = new Event(); + e.setField("foo", 42L); + e.setField("bar", 42); + HashMap inner = new HashMap(2); + inner.put("innerFoo", 42L); + inner.put("innerQuux", 42.42); + e.setField("baz", inner); + e.setField("[@metadata][foo]", 42L); + byte[] binary = e.serializeWithoutSeqNum(); + Event er = Event.deserialize(binary); + assertEquals(42L, er.getField("foo")); + assertEquals(42, er.getField("bar")); + assertEquals(42L, er.getField("[baz][innerFoo]")); + assertEquals(42.42, er.getField("[baz][innerQuux]")); + assertEquals(42L, er.getField("[@metadata][foo]")); + + assertEquals(e.getTimestamp().toIso8601(), er.getTimestamp().toIso8601()); + } + + @Test + public void queueableInterfaceRoundTrip() throws Exception { + Event e = new Event(); + e.setField("foo", 42L); + e.setField("bar", 42); + HashMap inner = new HashMap(2); + inner.put("innerFoo", 42L); + inner.put("innerQuux", 42.42); + e.setField("baz", inner); + e.setField("[@metadata][foo]", 42L); + byte[] binary = e.serialize(); + Event er = Event.deserialize(binary); + assertEquals(42L, er.getField("foo")); + assertEquals(42, er.getField("bar")); + assertEquals(42L, er.getField("[baz][innerFoo]")); + assertEquals(42.42, er.getField("[baz][innerQuux]")); + assertEquals(42L, er.getField("[@metadata][foo]")); + + assertEquals(e.getTimestamp().toIso8601(), er.getTimestamp().toIso8601()); + } + + @Test + public void toBinaryRoundtrip() throws Exception { + Event e = new Event(); + e.setField("foo", 42L); + e.setField("bar", 42); + HashMap inner = new HashMap(2); + inner.put("innerFoo", 42L); + inner.put("innerQuux", 42.42); + e.setField("baz", inner); + e.setField("[@metadata][foo]", 42L); + byte[] binary = e.toBinary(); + Event er = Event.fromBinary(binary); + assertEquals(42L, er.getField("foo")); + assertEquals(42, er.getField("bar")); + assertEquals(42L, er.getField("[baz][innerFoo]")); + assertEquals(42.42, er.getField("[baz][innerQuux]")); + assertEquals(42L, er.getField("[@metadata][foo]")); + + assertEquals(e.getTimestamp().toIso8601(), er.getTimestamp().toIso8601()); + } + @Test public void testBareToJson() throws Exception { Event e = new Event(); diff --git a/logstash-core-queue-jruby/build.gradle b/logstash-core-queue-jruby/build.gradle new file mode 100644 index 000000000..4f0ed7269 --- /dev/null +++ b/logstash-core-queue-jruby/build.gradle @@ -0,0 +1,123 @@ +import org.yaml.snakeyaml.Yaml + +apply plugin: 'java' +apply plugin: 'idea' + +// fetch version from Logstash's master versions.yml file +def versionMap = (Map) (new Yaml()).load(new File("$projectDir/../versions.yml").text) + +description = "Logstash Core Queue JRuby" +group 'org.logstash' +version = versionMap['logstash-core-queue-jruby'] + +buildscript { + repositories { + mavenCentral() + } + dependencies { + classpath 'org.yaml:snakeyaml:1.17' + } +} + +repositories { + mavenCentral() +} + +gradle.projectsEvaluated { + tasks.withType(JavaCompile) { + options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation" + } +} + +project.sourceCompatibility = 1.8 +project.targetCompatibility = 1.8 + +task sourcesJar(type: org.gradle.api.tasks.bundling.Jar, dependsOn: classes) { + from sourceSets.main.allSource + classifier 'sources' + extension 'jar' +} + +task javadocJar(type: org.gradle.api.tasks.bundling.Jar, dependsOn: javadoc) { + from javadoc.destinationDir + classifier 'javadoc' + extension 'jar' +} + +// copy jar file into the gem lib dir but without the version number in filename +task copyGemjar(type: org.gradle.api.tasks.Copy, dependsOn: sourcesJar) { + from project.jar + into project.file('lib/logstash-core-queue-jruby/') + rename(/(.+)-${project.version}.jar/, '$1.jar') +} + +task cleanGemjar { + delete fileTree(project.file('lib/logstash-core-queue-jruby/')) { + include '*.jar' + } +} + +clean.dependsOn(cleanGemjar) +jar.finalizedBy(copyGemjar) + +task gemspec_jars << { + File gemspec_jars = file("./gemspec_jars.rb") + gemspec_jars.newWriter().withWriter { w -> + w << "# This file is generated by Gradle as part of the build process. It extracts the build.gradle\n" + w << "# runtime dependencies to generate this gemspec dependencies file to be eval'ed by the gemspec\n" + w << "# for the jar-dependencies requirements.\n\n" + configurations.runtime.allDependencies.each { dependency -> + w << "gem.requirements << \"jar ${dependency.group}:${dependency.name}, ${dependency.version}\"\n" + } + } +} +build.finalizedBy(gemspec_jars) + +configurations.create('sources') +configurations.create('javadoc') +configurations.archives { + extendsFrom configurations.sources + extendsFrom configurations.javadoc +} + +artifacts { + sources(sourcesJar) { + // Weird Gradle quirk where type will be used for the extension, but only for sources + type 'jar' + } + + javadoc(javadocJar) { + type 'javadoc' + } +} + +configurations { + provided +} + +project.sourceSets { + main.compileClasspath += project.configurations.provided + main.runtimeClasspath += project.configurations.provided + test.compileClasspath += project.configurations.provided + test.runtimeClasspath += project.configurations.provided +} +project.javadoc.classpath += project.configurations.provided + +idea { + module { + scopes.PROVIDED.plus += [project.configurations.provided] + } +} + +dependencies { + testCompile group: 'junit', name: 'junit', version: '4.12' + provided group: 'org.jruby', name: 'jruby-core', version: '1.7.25' + provided files('../logstash-core-event-java/lib/logstash-core-event-java/logstash-core-event-java.jar') + provided files('../logstash-core/lib/logstash-core/logstash-core.jar') +} + +// See http://www.gradle.org/docs/current/userguide/gradle_wrapper.html +task wrapper(type: Wrapper) { + description = 'Install Gradle wrapper' + gradleVersion = '2.8' +} diff --git a/logstash-core-queue-jruby/gemspec_jars.rb b/logstash-core-queue-jruby/gemspec_jars.rb new file mode 100644 index 000000000..80046e4e6 --- /dev/null +++ b/logstash-core-queue-jruby/gemspec_jars.rb @@ -0,0 +1,4 @@ +# This file is generated by Gradle as part of the build process. It extracts the build.gradle +# runtime dependencies to generate this gemspec dependencies file to be eval'ed by the gemspec +# for the jar-dependencies requirements. + diff --git a/logstash-core-queue-jruby/lib/logstash-core-queue-jruby/logstash-core-queue-jruby.rb b/logstash-core-queue-jruby/lib/logstash-core-queue-jruby/logstash-core-queue-jruby.rb new file mode 100644 index 000000000..25c471f68 --- /dev/null +++ b/logstash-core-queue-jruby/lib/logstash-core-queue-jruby/logstash-core-queue-jruby.rb @@ -0,0 +1,24 @@ +# encoding: utf-8 + +require "java" + +module LogStash +end + +# local dev setup +classes_dir = File.expand_path("../../../build/classes/main", __FILE__) + +if File.directory?(classes_dir) + # if in local dev setup, add target to classpath + $CLASSPATH << classes_dir unless $CLASSPATH.include?(classes_dir) +else + # otherwise use included jar + begin + require "logstash-core-queue-jruby/logstash-core-queue-jruby.jar" + rescue Exception => e + raise("Error loading logstash-core-queue-jruby/logstash-core-queue-jruby.jar file, cause: #{e.message}") + end +end + +require "jruby_acked_queue_ext" +require "jruby_acked_batch_ext" diff --git a/logstash-core-queue-jruby/lib/logstash-core-queue-jruby/version.rb b/logstash-core-queue-jruby/lib/logstash-core-queue-jruby/version.rb new file mode 100644 index 000000000..62f138e2b --- /dev/null +++ b/logstash-core-queue-jruby/lib/logstash-core-queue-jruby/version.rb @@ -0,0 +1,3 @@ +# encoding: utf-8 + +LOGSTASH_CORE_QUEUE_JRUBY_VERSION = "0.0.1" diff --git a/logstash-core-queue-jruby/logstash-core-queue-jruby.gemspec b/logstash-core-queue-jruby/logstash-core-queue-jruby.gemspec new file mode 100644 index 000000000..463d1dc34 --- /dev/null +++ b/logstash-core-queue-jruby/logstash-core-queue-jruby.gemspec @@ -0,0 +1,23 @@ +# -*- encoding: utf-8 -*- +lib = File.expand_path('../lib', __FILE__) +$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) +require 'logstash-core-queue-jruby/version' + +Gem::Specification.new do |gem| + gem.authors = ["Elastic"] + gem.email = ["info@elastic.co"] + gem.description = %q{The core event component of logstash, the scalable log and event management tool} + gem.summary = %q{logstash-core-event-java - The core event component of logstash} + gem.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" + gem.license = "Apache License (2.0)" + + gem.files = Dir.glob(["logstash-core-queue-jruby.gemspec", "gemspec_jars.rb", "lib/**/*.jar", "lib/**/*.rb", "spec/**/*.rb"]) + gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) + gem.name = "logstash-core-queue-jruby" + gem.require_paths = ["lib"] + gem.version = LOGSTASH_CORE_QUEUE_JRUBY_VERSION + + gem.platform = "java" + + eval(File.read(File.expand_path("../gemspec_jars.rb", __FILE__))) +end diff --git a/logstash-core-queue-jruby/settings.gradle b/logstash-core-queue-jruby/settings.gradle new file mode 100644 index 000000000..31c56eb1a --- /dev/null +++ b/logstash-core-queue-jruby/settings.gradle @@ -0,0 +1 @@ +rootProject.name = 'logstash-core-queue-jruby' diff --git a/logstash-core-queue-jruby/src/main/java/JrubyAckedBatchExtService.java b/logstash-core-queue-jruby/src/main/java/JrubyAckedBatchExtService.java new file mode 100644 index 000000000..f31aa6089 --- /dev/null +++ b/logstash-core-queue-jruby/src/main/java/JrubyAckedBatchExtService.java @@ -0,0 +1,14 @@ +import org.jruby.Ruby; +import org.jruby.runtime.load.BasicLibraryService; +import org.logstash.ackedqueue.ext.JrubyAckedBatchExtLibrary; + +import java.io.IOException; + +public class JrubyAckedBatchExtService implements BasicLibraryService { + public boolean basicLoad(final Ruby runtime) + throws IOException + { + new JrubyAckedBatchExtLibrary().load(runtime, false); + return true; + } +} diff --git a/logstash-core-queue-jruby/src/main/java/JrubyAckedQueueExtService.java b/logstash-core-queue-jruby/src/main/java/JrubyAckedQueueExtService.java new file mode 100644 index 000000000..8b349646e --- /dev/null +++ b/logstash-core-queue-jruby/src/main/java/JrubyAckedQueueExtService.java @@ -0,0 +1,16 @@ +import org.jruby.Ruby; +import org.jruby.runtime.load.BasicLibraryService; +import org.logstash.ackedqueue.ext.JrubyAckedQueueExtLibrary; +import org.logstash.ackedqueue.ext.JrubyAckedQueueMemoryExtLibrary; + +import java.io.IOException; + +public class JrubyAckedQueueExtService implements BasicLibraryService { + public boolean basicLoad(final Ruby runtime) + throws IOException + { + new JrubyAckedQueueExtLibrary().load(runtime, false); + new JrubyAckedQueueMemoryExtLibrary().load(runtime, false); + return true; + } +} diff --git a/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedBatchExtLibrary.java b/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedBatchExtLibrary.java new file mode 100644 index 000000000..cd858b5fa --- /dev/null +++ b/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedBatchExtLibrary.java @@ -0,0 +1,88 @@ +package org.logstash.ackedqueue.ext; + +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.RubyArray; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.ObjectAllocator; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.runtime.load.Library; +import org.logstash.ackedqueue.Batch; +import org.logstash.Event; +import org.logstash.ext.JrubyEventExtLibrary; + +import java.io.IOException; + +public class JrubyAckedBatchExtLibrary implements Library { + + public void load(Ruby runtime, boolean wrap) throws IOException { + RubyModule module = runtime.defineModule("LogStash"); + + RubyClass clazz = runtime.defineClassUnder("AckedBatch", runtime.getObject(), new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) { + return new RubyAckedBatch(runtime, rubyClass); + } + }, module); + + clazz.defineAnnotatedMethods(RubyAckedBatch.class); + } + + @JRubyClass(name = "AckedBatch", parent = "Object") + public static class RubyAckedBatch extends RubyObject { + private Batch batch; + + public RubyAckedBatch(Ruby runtime, RubyClass klass) { + super(runtime, klass); + this.batch = null; + } + + public RubyAckedBatch(Ruby runtime, Batch batch) { + super(runtime, runtime.getModule("LogStash").getClass("AckedBatch")); + this.batch = batch; + } + + @SuppressWarnings("unchecked") // for the getList() calls + @JRubyMethod(name = "initialize", required = 3) + public IRubyObject ruby_initialize(ThreadContext context, IRubyObject events, IRubyObject seqNums, IRubyObject queue) + { + if (! (events instanceof RubyArray)) { + context.runtime.newArgumentError("expected events array"); + } + if (! (seqNums instanceof RubyArray)) { + context.runtime.newArgumentError("expected seqNums array"); + } + if (! (queue instanceof JrubyAckedQueueExtLibrary.RubyAckedQueue)) { + context.runtime.newArgumentError("expected queue AckedQueue"); + } + + this.batch = new Batch(((RubyArray)events).getList(), ((RubyArray)seqNums).getList(), ((JrubyAckedQueueExtLibrary.RubyAckedQueue)queue).getQueue()); + + return context.nil; + } + + @JRubyMethod(name = "get_elements") + public IRubyObject ruby_get_elements(ThreadContext context) + { + RubyArray result = context.runtime.newArray(); + this.batch.getElements().forEach(e -> result.add(new JrubyEventExtLibrary.RubyEvent(context.runtime, (Event)e))); + + return result; + } + + @JRubyMethod(name = "close") + public IRubyObject ruby_close(ThreadContext context) + { + try { + this.batch.close(); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + return context.nil; + } + } +} diff --git a/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedQueueExtLibrary.java b/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedQueueExtLibrary.java new file mode 100644 index 000000000..8b73d837c --- /dev/null +++ b/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedQueueExtLibrary.java @@ -0,0 +1,140 @@ +package org.logstash.ackedqueue.ext; + +import org.logstash.Event; +import org.logstash.ext.JrubyEventExtLibrary; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyFixnum; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Arity; +import org.jruby.runtime.ObjectAllocator; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.runtime.load.Library; +import org.logstash.ackedqueue.Batch; +import org.logstash.ackedqueue.FileSettings; +import org.logstash.ackedqueue.Queue; +import org.logstash.ackedqueue.Settings; +import org.logstash.common.io.CheckpointIOFactory; +import org.logstash.common.io.FileCheckpointIO; +import org.logstash.common.io.MmapPageIO; +import org.logstash.common.io.PageIOFactory; + +import java.io.IOException; + +public class JrubyAckedQueueExtLibrary implements Library { + + public void load(Ruby runtime, boolean wrap) throws IOException { + RubyModule module = runtime.defineModule("LogStash"); + + RubyClass clazz = runtime.defineClassUnder("AckedQueue", runtime.getObject(), new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) { + return new RubyAckedQueue(runtime, rubyClass); + } + }, module); + + clazz.defineAnnotatedMethods(RubyAckedQueue.class); + } + + // TODO: + // as a simplified first prototyping implementation, the Settings class is not exposed and the queue elements + // are assumed to be logstash Event. + + + @JRubyClass(name = "AckedQueue", parent = "Object") + public static class RubyAckedQueue extends RubyObject { + private Queue queue; + + public RubyAckedQueue(Ruby runtime, RubyClass klass) { + super(runtime, klass); + } + + public Queue getQueue() { + return this.queue; + } + + // def initialize + @JRubyMethod(name = "initialize", optional = 3) + public IRubyObject ruby_initialize(ThreadContext context, IRubyObject[] args) + { + args = Arity.scanArgs(context.runtime, args, 3, 0); + + int capacity = RubyFixnum.num2int(args[1]); + int maxUnread = RubyFixnum.num2int(args[2]); + + Settings s = new FileSettings(args[0].asJavaString()); + PageIOFactory pageIOFactory = (pageNum, size, path) -> new MmapPageIO(pageNum, size, path); + CheckpointIOFactory checkpointIOFactory = (source) -> new FileCheckpointIO(source); + s.setCapacity(capacity); + s.setMaxUnread(maxUnread); + s.setElementIOFactory(pageIOFactory); + s.setCheckpointIOFactory(checkpointIOFactory); + s.setElementClass(Event.class); + + this.queue = new Queue(s); + + return context.nil; + } + + @JRubyMethod(name = "open") + public IRubyObject ruby_open(ThreadContext context) + { + try { + this.queue.open(); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + return context.nil; + } + + @JRubyMethod(name = {"write", "<<"}, required = 1) + public IRubyObject ruby_write(ThreadContext context, IRubyObject event) + { + if (!(event instanceof JrubyEventExtLibrary.RubyEvent)) { + throw context.runtime.newTypeError("wrong argument type " + event.getMetaClass() + " (expected LogStash::Event)"); + } + + long seqNum; + try { + seqNum = this.queue.write(((JrubyEventExtLibrary.RubyEvent) event).getEvent()); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + return context.runtime.newFixnum(seqNum); + } + + @JRubyMethod(name = "read_batch", required = 2) + public IRubyObject ruby_read_batch(ThreadContext context, IRubyObject limit, IRubyObject timeout) + { + Batch b; + + try { + b = this.queue.readBatch(RubyFixnum.num2int(limit), RubyFixnum.num2int(timeout)); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + // TODO: return proper Batch object + return (b == null) ? context.nil : new JrubyAckedBatchExtLibrary.RubyAckedBatch(context.runtime, b); + } + + + @JRubyMethod(name = "close") + public IRubyObject ruby_close(ThreadContext context) + { + try { + this.queue.close(); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + return context.nil; + } + + } +} diff --git a/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedQueueMemoryExtLibrary.java b/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedQueueMemoryExtLibrary.java new file mode 100644 index 000000000..3ba292108 --- /dev/null +++ b/logstash-core-queue-jruby/src/main/java/org/logstash/ackedqueue/ext/JrubyAckedQueueMemoryExtLibrary.java @@ -0,0 +1,141 @@ +package org.logstash.ackedqueue.ext; + +import org.logstash.Event; +import org.logstash.ext.JrubyEventExtLibrary; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.RubyFixnum; +import org.jruby.RubyModule; +import org.jruby.RubyObject; +import org.jruby.anno.JRubyClass; +import org.jruby.anno.JRubyMethod; +import org.jruby.runtime.Arity; +import org.jruby.runtime.ObjectAllocator; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.runtime.load.Library; +import org.logstash.ackedqueue.Batch; +import org.logstash.ackedqueue.MemorySettings; +import org.logstash.ackedqueue.Queue; +import org.logstash.ackedqueue.Settings; +import org.logstash.common.io.ByteBufferPageIO; +import org.logstash.common.io.CheckpointIOFactory; +import org.logstash.common.io.MemoryCheckpointIO; +import org.logstash.common.io.PageIOFactory; + +import java.io.IOException; + +public class JrubyAckedQueueMemoryExtLibrary implements Library { + + public void load(Ruby runtime, boolean wrap) throws IOException { + RubyModule module = runtime.defineModule("LogStash"); + + RubyClass clazz = runtime.defineClassUnder("AckedMemoryQueue", runtime.getObject(), new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass rubyClass) { + return new RubyAckedMemoryQueue(runtime, rubyClass); + } + }, module); + + clazz.defineAnnotatedMethods(RubyAckedMemoryQueue.class); + } + + // TODO: + // as a simplified first prototyping implementation, the Settings class is not exposed and the queue elements + // are assumed to be logstash Event. + + + @JRubyClass(name = "AckedMemoryQueue", parent = "Object") + public static class RubyAckedMemoryQueue extends RubyObject { + private Queue queue; + + public RubyAckedMemoryQueue(Ruby runtime, RubyClass klass) { + super(runtime, klass); + } + + public Queue getQueue() { + return this.queue; + } + + // def initialize + @JRubyMethod(name = "initialize", optional = 3) + public IRubyObject ruby_initialize(ThreadContext context, IRubyObject[] args) + { + args = Arity.scanArgs(context.runtime, args, 3, 0); + + int capacity = RubyFixnum.num2int(args[1]); + int maxUnread = RubyFixnum.num2int(args[2]); + + Settings s = new MemorySettings(args[0].asJavaString()); + PageIOFactory pageIOFactory = (pageNum, size, path) -> new ByteBufferPageIO(pageNum, size, path); + CheckpointIOFactory checkpointIOFactory = (source) -> new MemoryCheckpointIO(source); + s.setCapacity(capacity); + s.setMaxUnread(maxUnread); + s.setElementIOFactory(pageIOFactory); + s.setCheckpointIOFactory(checkpointIOFactory); + s.setElementClass(Event.class); + + this.queue = new Queue(s); + + return context.nil; + } + + @JRubyMethod(name = "open") + public IRubyObject ruby_open(ThreadContext context) + { + try { + this.queue.getCheckpointIO().purge(); + this.queue.open(); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + return context.nil; + } + + @JRubyMethod(name = {"write", "<<"}, required = 1) + public IRubyObject ruby_write(ThreadContext context, IRubyObject event) + { + if (!(event instanceof JrubyEventExtLibrary.RubyEvent)) { + throw context.runtime.newTypeError("wrong argument type " + event.getMetaClass() + " (expected LogStash::Event)"); + } + + long seqNum; + try { + seqNum = this.queue.write(((JrubyEventExtLibrary.RubyEvent) event).getEvent()); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + return context.runtime.newFixnum(seqNum); + } + + @JRubyMethod(name = "read_batch", required = 2) + public IRubyObject ruby_read_batch(ThreadContext context, IRubyObject limit, IRubyObject timeout) + { + Batch b; + + try { + b = this.queue.readBatch(RubyFixnum.num2int(limit), RubyFixnum.num2int(timeout)); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + // TODO: return proper Batch object + return (b == null) ? context.nil : new JrubyAckedBatchExtLibrary.RubyAckedBatch(context.runtime, b); + } + + + @JRubyMethod(name = "close") + public IRubyObject ruby_close(ThreadContext context) + { + try { + this.queue.close(); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); + } + + return context.nil; + } + + } +} diff --git a/logstash-core/build.gradle b/logstash-core/build.gradle index c45ad3492..625b73933 100644 --- a/logstash-core/build.gradle +++ b/logstash-core/build.gradle @@ -1,32 +1,98 @@ -import java.nio.file.Files import org.yaml.snakeyaml.Yaml -import static java.nio.file.StandardCopyOption.REPLACE_EXISTING apply plugin: 'java' apply plugin: 'idea' -group = 'org.logstash' -description = """Logstash Core Java""" - -sourceCompatibility = 1.8 -targetCompatibility = 1.8 - // fetch version from Logstash's master versions.yml file def versionMap = (Map) (new Yaml()).load(new File("$projectDir/../versions.yml").text) + +group = 'org.logstash' +description = """Logstash Core Java""" version = versionMap['logstash-core'] +repositories { + mavenCentral() +} + buildscript { repositories { mavenCentral() } - dependencies { classpath 'org.yaml:snakeyaml:1.17' } } +gradle.projectsEvaluated { + tasks.withType(JavaCompile) { + options.compilerArgs << "-Xlint:deprecation" +// options.compilerArgs << "-Xlint:unchecked" << "-Xlint:deprecation" + } +} + +project.sourceCompatibility = 1.8 +project.targetCompatibility = 1.8 + +task sourcesJar(type: org.gradle.api.tasks.bundling.Jar, dependsOn: classes) { + from sourceSets.main.allSource + classifier 'sources' + extension 'jar' +} + +task javadocJar(type: org.gradle.api.tasks.bundling.Jar, dependsOn: javadoc) { + from javadoc.destinationDir + classifier 'javadoc' + extension 'jar' +} + +// copy jar file into the gem lib dir but without the version number in filename +task copyGemjar(type: org.gradle.api.tasks.Copy, dependsOn: sourcesJar) { + from project.jar + into project.file('lib/logstash-core/') + rename(/(.+)-${project.version}.jar/, '$1.jar') +} + +task cleanGemjar { + delete fileTree(project.file('lib/logstash-core/')) { + include '*.jar' + } +} + +clean.dependsOn(cleanGemjar) +jar.finalizedBy(copyGemjar) + +task gemspec_jars << { + File gemspec_jars = file("./gemspec_jars.rb") + gemspec_jars.newWriter().withWriter { w -> + w << "# This file is generated by Gradle as part of the build process. It extracts the build.gradle\n" + w << "# runtime dependencies to generate this gemspec dependencies file to be eval'ed by the gemspec\n" + w << "# for the jar-dependencies requirements.\n\n" + configurations.runtime.allDependencies.each { dependency -> + w << "gem.requirements << \"jar ${dependency.group}:${dependency.name}, ${dependency.version}\"\n" + } + } +} +build.finalizedBy(gemspec_jars) + +configurations.create('sources') +configurations.create('javadoc') +configurations.archives { + extendsFrom configurations.sources + extendsFrom configurations.javadoc +} + +artifacts { + sources(sourcesJar) { + // Weird Gradle quirk where type will be used for the extension, but only for sources + type 'jar' + } + javadoc(javadocJar) { + type 'javadoc' + } +} + configurations { - provided + provided } project.sourceSets { @@ -37,52 +103,27 @@ project.sourceSets { } project.javadoc.classpath += project.configurations.provided -repositories { - mavenCentral() -} - -dependencies { - runtime 'org.apache.logging.log4j:log4j-1.2-api:2.6.2' - compile 'org.apache.logging.log4j:log4j-api:2.6.2' - compile 'org.apache.logging.log4j:log4j-core:2.6.2' - compile 'com.fasterxml.jackson.core:jackson-core:2.7.4' - compile 'com.fasterxml.jackson.core:jackson-databind:2.7.4' - testCompile 'org.apache.logging.log4j:log4j-core:2.6.2:tests' - testCompile 'org.apache.logging.log4j:log4j-api:2.6.2:tests' - testCompile 'junit:junit:4.12' - provided 'org.jruby:jruby-core:1.7.25' -} - idea { module { scopes.PROVIDED.plus += [project.configurations.provided] } } -task generateGemJarRequiresFile << { - File jars_file = file('lib/jars.rb') - jars_file.newWriter().withWriter { w -> - w << "require \'jar_dependencies\'\n" - configurations.runtime.allDependencies.each { - w << "require_jar(\'${it.group}\', \'${it.name}\', \'${it.version}\')\n" - } - w << "require_jar(\'${project.group}\', \'${project.name}\', \'${project.version}\')\n" - } +dependencies { + runtime 'org.apache.logging.log4j:log4j-1.2-api:2.6.2' + compile 'org.apache.logging.log4j:log4j-api:2.6.2' + compile 'org.apache.logging.log4j:log4j-core:2.6.2' + compile 'com.fasterxml.jackson.core:jackson-core:2.7.4' + compile 'com.fasterxml.jackson.core:jackson-databind:2.7.4' + testCompile 'org.apache.logging.log4j:log4j-core:2.6.2:tests' + testCompile 'org.apache.logging.log4j:log4j-api:2.6.2:tests' + testCompile 'junit:junit:4.12' + provided 'org.jruby:jruby-core:1.7.25' } -task vendor << { - String vendorPathPrefix = "vendor/jars" - configurations.runtime.allDependencies.each { dep -> - File f = configurations.runtime.filter { it.absolutePath.contains("${dep.group}/${dep.name}/${dep.version}") }.singleFile - String groupPath = dep.group.replaceAll('\\.', '/') - File newJarFile = file("${vendorPathPrefix}/${groupPath}/${dep.name}/${dep.version}/${dep.name}-${dep.version}.jar") - newJarFile.mkdirs() - Files.copy(f.toPath(), newJarFile.toPath(), REPLACE_EXISTING) - } - String projectGroupPath = project.group.replaceAll('\\.', '/') - File projectJarFile = file("${vendorPathPrefix}/${projectGroupPath}/${project.name}/${project.version}/${project.name}-${project.version}.jar") - projectJarFile.mkdirs() - Files.copy(file("$buildDir/libs/${project.name}-${project.version}.jar").toPath(), projectJarFile.toPath(), REPLACE_EXISTING) +// See http://www.gradle.org/docs/current/userguide/gradle_wrapper.html +task wrapper(type: Wrapper) { + description = 'Install Gradle wrapper' + gradleVersion = '2.8' } -vendor.dependsOn(jar, generateGemJarRequiresFile) diff --git a/logstash-core/gemspec_jars.rb b/logstash-core/gemspec_jars.rb new file mode 100644 index 000000000..15f2967a4 --- /dev/null +++ b/logstash-core/gemspec_jars.rb @@ -0,0 +1,9 @@ +# This file is generated by Gradle as part of the build process. It extracts the build.gradle +# runtime dependencies to generate this gemspec dependencies file to be eval'ed by the gemspec +# for the jar-dependencies requirements. + +gem.requirements << "jar org.apache.logging.log4j:log4j-1.2-api, 2.6.2" +gem.requirements << "jar org.apache.logging.log4j:log4j-api, 2.6.2" +gem.requirements << "jar org.apache.logging.log4j:log4j-core, 2.6.2" +gem.requirements << "jar com.fasterxml.jackson.core:jackson-core, 2.7.4" +gem.requirements << "jar com.fasterxml.jackson.core:jackson-databind, 2.7.4" diff --git a/logstash-core/lib/jars.rb b/logstash-core/lib/jars.rb deleted file mode 100644 index b55fbe05c..000000000 --- a/logstash-core/lib/jars.rb +++ /dev/null @@ -1,7 +0,0 @@ -require 'jar_dependencies' -require_jar('org.apache.logging.log4j', 'log4j-1.2-api', '2.6.2') -require_jar('org.apache.logging.log4j', 'log4j-api', '2.6.2') -require_jar('org.apache.logging.log4j', 'log4j-core', '2.6.2') -require_jar('com.fasterxml.jackson.core', 'jackson-core', '2.7.4') -require_jar('com.fasterxml.jackson.core', 'jackson-databind', '2.7.4') -require_jar('org.logstash', 'logstash-core', '6.0.0-alpha1') diff --git a/logstash-core/lib/logstash-core/logstash-core.rb b/logstash-core/lib/logstash-core/logstash-core.rb index 74f073326..1d39ad9e8 100644 --- a/logstash-core/lib/logstash-core/logstash-core.rb +++ b/logstash-core/lib/logstash-core/logstash-core.rb @@ -1,3 +1,23 @@ # encoding: utf-8 + +require "java" + module LogStash end + +require "logstash-core_jars" + +# local dev setup +classes_dir = File.expand_path("../../../build/classes/main", __FILE__) + +if File.directory?(classes_dir) + # if in local dev setup, add target to classpath + $CLASSPATH << classes_dir unless $CLASSPATH.include?(classes_dir) +else + # otherwise use included jar + begin + require "logstash-core/logstash-core.jar" + rescue Exception => e + raise("Error loading logstash-core/logstash-core.jar file, cause: #{e.message}") + end +end diff --git a/logstash-core/lib/logstash-core_jars.rb b/logstash-core/lib/logstash-core_jars.rb new file mode 100644 index 000000000..f548c9fd3 --- /dev/null +++ b/logstash-core/lib/logstash-core_jars.rb @@ -0,0 +1,20 @@ +# this is a generated file, to avoid over-writing it just delete this comment +begin + require 'jar_dependencies' +rescue LoadError + require 'org/apache/logging/log4j/log4j-core/2.6.2/log4j-core-2.6.2.jar' + require 'org/apache/logging/log4j/log4j-api/2.6.2/log4j-api-2.6.2.jar' + require 'com/fasterxml/jackson/core/jackson-core/2.7.4/jackson-core-2.7.4.jar' + require 'org/apache/logging/log4j/log4j-1.2-api/2.6.2/log4j-1.2-api-2.6.2.jar' + require 'com/fasterxml/jackson/core/jackson-annotations/2.7.0/jackson-annotations-2.7.0.jar' + require 'com/fasterxml/jackson/core/jackson-databind/2.7.4/jackson-databind-2.7.4.jar' +end + +if defined? Jars + require_jar( 'org.apache.logging.log4j', 'log4j-core', '2.6.2' ) + require_jar( 'org.apache.logging.log4j', 'log4j-api', '2.6.2' ) + require_jar( 'com.fasterxml.jackson.core', 'jackson-core', '2.7.4' ) + require_jar( 'org.apache.logging.log4j', 'log4j-1.2-api', '2.6.2' ) + require_jar( 'com.fasterxml.jackson.core', 'jackson-annotations', '2.7.0' ) + require_jar( 'com.fasterxml.jackson.core', 'jackson-databind', '2.7.4' ) +end diff --git a/logstash-core/lib/logstash/environment.rb b/logstash-core/lib/logstash/environment.rb index a918a4041..6b052c2dc 100644 --- a/logstash-core/lib/logstash/environment.rb +++ b/logstash-core/lib/logstash/environment.rb @@ -40,8 +40,15 @@ module LogStash Setting::String.new("http.host", "127.0.0.1"), Setting::PortRange.new("http.port", 9600..9700), Setting::String.new("http.environment", "production"), + Setting::String.new("queue.type", "memory", true, ["persisted", "memory", "memory_acked"]), + Setting::Bytes.new("queue.page_capacity", "250mb"), + Setting::Numeric.new("queue.max_events", 0), # 0 is unlimited ].each {|setting| SETTINGS.register(setting) } + # Compute the default queue path based on `path.data` + default_queue_file_path = ::File.join(SETTINGS.get("path.data"), "queue") + SETTINGS.register Setting::WritableDirectory.new("path.queue", default_queue_file_path) + module Environment extend self diff --git a/logstash-core/lib/logstash/instrument/metric_store.rb b/logstash-core/lib/logstash/instrument/metric_store.rb index c440e2524..3967cefbf 100644 --- a/logstash-core/lib/logstash/instrument/metric_store.rb +++ b/logstash-core/lib/logstash/instrument/metric_store.rb @@ -1,6 +1,5 @@ # encoding: utf-8 require "concurrent" -require "logstash/event" require "logstash/instrument/metric_type" require "thread" diff --git a/logstash-core/lib/logstash/instrument/metric_type/base.rb b/logstash-core/lib/logstash/instrument/metric_type/base.rb index 206f175c7..b473871b5 100644 --- a/logstash-core/lib/logstash/instrument/metric_type/base.rb +++ b/logstash-core/lib/logstash/instrument/metric_type/base.rb @@ -1,5 +1,4 @@ # encoding: utf-8 -require "logstash/event" require "logstash/util" module LogStash module Instrument module MetricType diff --git a/logstash-core/lib/logstash/instrument/snapshot.rb b/logstash-core/lib/logstash/instrument/snapshot.rb index f46068439..62a12677f 100644 --- a/logstash-core/lib/logstash/instrument/snapshot.rb +++ b/logstash-core/lib/logstash/instrument/snapshot.rb @@ -1,6 +1,5 @@ # encoding: utf-8 require "logstash/util/loggable" -require "logstash/event" module LogStash module Instrument class Snapshot diff --git a/logstash-core/lib/logstash/java_integration.rb b/logstash-core/lib/logstash/java_integration.rb index 26f9eb546..670ceaae6 100644 --- a/logstash-core/lib/logstash/java_integration.rb +++ b/logstash-core/lib/logstash/java_integration.rb @@ -1,6 +1,5 @@ # encoding: utf-8 require "java" -require "jars" # this is mainly for usage with JrJackson json parsing in :raw mode which genenerates # Java::JavaUtil::ArrayList and Java::JavaUtil::LinkedHashMap native objects for speed. diff --git a/logstash-core/lib/logstash/outputs/base.rb b/logstash-core/lib/logstash/outputs/base.rb index 453d0cbdf..5e3ce1ad1 100644 --- a/logstash-core/lib/logstash/outputs/base.rb +++ b/logstash-core/lib/logstash/outputs/base.rb @@ -84,7 +84,7 @@ class LogStash::Outputs::Base < LogStash::Plugin end # def receive public - # To be overriden in implementations + # To be overridden in implementations def multi_receive(events) if @receives_encoded self.multi_receive_encoded(codec.multi_encode(events)) diff --git a/logstash-core/lib/logstash/pipeline.rb b/logstash-core/lib/logstash/pipeline.rb index 6b4c3b966..efb03d838 100644 --- a/logstash-core/lib/logstash/pipeline.rb +++ b/logstash-core/lib/logstash/pipeline.rb @@ -4,13 +4,15 @@ require "stud/interval" require "concurrent" require "logstash/namespace" require "logstash/errors" +require "logstash-core/logstash-core" +require "logstash/util/wrapped_acked_queue" +require "logstash/util/wrapped_synchronous_queue" require "logstash/event" require "logstash/config/file" require "logstash/filters/base" require "logstash/inputs/base" require "logstash/outputs/base" require "logstash/shutdown_watcher" -require "logstash/util/wrapped_synchronous_queue" require "logstash/pipeline_reporter" require "logstash/instrument/metric" require "logstash/instrument/namespaced_metric" @@ -92,16 +94,17 @@ module LogStash; class Pipeline rescue => e raise end - - queue = Util::WrappedSynchronousQueue.new + queue = build_queue_from_settings @input_queue_client = queue.write_client @filter_queue_client = queue.read_client - # Note that @inflight_batches as a central mechanism for tracking inflight + @signal_queue = Queue.new + # Note that @infilght_batches as a central mechanism for tracking inflight # batches will fail if we have multiple read clients here. @filter_queue_client.set_events_metric(metric.namespace([:stats, :events])) @filter_queue_client.set_pipeline_metric( metric.namespace([:stats, :pipelines, pipeline_id.to_s.to_sym, :events]) ) + @events_filtered = Concurrent::AtomicFixnum.new(0) @events_consumed = Concurrent::AtomicFixnum.new(0) @@ -112,6 +115,28 @@ module LogStash; class Pipeline @flushing = Concurrent::AtomicReference.new(false) end # def initialize + def build_queue_from_settings + queue_type = settings.get("queue.type") + queue_page_capacity = settings.get("queue.page_capacity") + max_events = settings.get("queue.max_events") + + if queue_type == "memory_acked" + # memory_acked is used in tests/specs + LogStash::Util::WrappedAckedQueue.create_memory_based("", queue_page_capacity, max_events) + elsif queue_type == "memory" + # memory is the legacy and default setting + LogStash::Util::WrappedSynchronousQueue.new() + elsif queue_type == "persisted" + # persisted is the disk based acked queue + queue_path = settings.get("path.queue") + LogStash::Util::WrappedAckedQueue.create_file_based(queue_path, queue_page_capacity, max_events) + else + raise(ConfigurationError, "invalid queue.type setting") + end + end + + private :build_queue_from_settings + def ready? @ready.value end @@ -168,6 +193,8 @@ module LogStash; class Pipeline shutdown_flusher shutdown_workers + @filter_queue_client.close + @logger.debug("Pipeline #{@pipeline_id} has been shutdown") # exit code @@ -242,12 +269,15 @@ module LogStash; class Pipeline while running batch = @filter_queue_client.take_batch + signal = @signal_queue.empty? ? NO_SIGNAL : @signal_queue.pop + running = !signal.shutdown? + @events_consumed.increment(batch.size) - running = false if batch.shutdown_signal_received? + filter_batch(batch) - if batch.shutdown_signal_received? || batch.flush_signal_received? - flush_filters_to_batch(batch) + if signal.flush? || signal.shutdown? + flush_filters_to_batch(batch, :final => signal.shutdown?) end output_batch(batch) @@ -257,11 +287,9 @@ module LogStash; class Pipeline def filter_batch(batch) batch.each do |event| - if event.is_a?(Event) - filter_func(event).each do |e| - # these are both original and generated events - batch.merge(e) unless e.cancelled? - end + filter_func(event).each do |e| + #these are both original and generated events + batch.merge(e) unless e.cancelled? end end @filter_queue_client.add_filtered_metrics(batch) @@ -383,7 +411,7 @@ module LogStash; class Pipeline # Each worker thread will receive this exactly once! @worker_threads.each do |t| @logger.debug("Pushing shutdown", :thread => t.inspect) - @input_queue_client.push(SHUTDOWN) + @signal_queue.push(SHUTDOWN) end @worker_threads.each do |t| @@ -468,7 +496,7 @@ module LogStash; class Pipeline def flush if @flushing.compare_and_set(false, true) @logger.debug? && @logger.debug("Pushing flush onto pipeline") - @input_queue_client.push(FLUSH) + @signal_queue.push(FLUSH) end end @@ -486,7 +514,6 @@ module LogStash; class Pipeline # @param batch [ReadClient::ReadBatch] # @param options [Hash] def flush_filters_to_batch(batch, options = {}) - options[:final] = batch.shutdown_signal_received? flush_filters(options) do |event| unless event.cancelled? @logger.debug? and @logger.debug("Pushing flushed events", :event => event) diff --git a/logstash-core/lib/logstash/runner.rb b/logstash-core/lib/logstash/runner.rb index 4d9ca67db..531a94a36 100644 --- a/logstash-core/lib/logstash/runner.rb +++ b/logstash-core/lib/logstash/runner.rb @@ -5,11 +5,13 @@ $DEBUGLIST = (ENV["DEBUG"] || "").split(",") require "clamp" require "net/http" + +require "logstash/namespace" +require "logstash-core/logstash-core" require "logstash/environment" LogStash::Environment.load_locale! -require "logstash/namespace" require "logstash/agent" require "logstash/config/defaults" require "logstash/shutdown_watcher" diff --git a/logstash-core/lib/logstash/settings.rb b/logstash-core/lib/logstash/settings.rb index 10ec1f5f7..2039303ad 100644 --- a/logstash-core/lib/logstash/settings.rb +++ b/logstash-core/lib/logstash/settings.rb @@ -1,5 +1,7 @@ # encoding: utf-8 require "logstash/util/loggable" +require "fileutils" +require "logstash/util/byte_value" module LogStash class Settings @@ -434,6 +436,33 @@ module LogStash end end end + + class Bytes < Coercible + def initialize(name, default=nil, strict=true) + super(name, ::Fixnum, default, strict=true) { |value| valid?(value) } + end + + def valid?(value) + value.is_a?(Fixnum) && value >= 0 + end + + def coerce(value) + case value + when ::Numeric + value + when ::String + LogStash::Util::ByteValue.parse(value) + else + raise ArgumentError.new("Could not coerce '#{value}' into a bytes value") + end + end + + def validate(value) + unless valid?(value) + raise ArgumentError.new("Invalid byte value \"#{value}\".") + end + end + end end SETTINGS = Settings.new diff --git a/logstash-core/lib/logstash/util/byte_value.rb b/logstash-core/lib/logstash/util/byte_value.rb new file mode 100644 index 000000000..bc189a438 --- /dev/null +++ b/logstash-core/lib/logstash/util/byte_value.rb @@ -0,0 +1,61 @@ +# encoding: utf-8 +require "logstash/namespace" +require "logstash/util" + +module LogStash; module Util; module ByteValue + module_function + + B = 1 + KB = B << 10 + MB = B << 20 + GB = B << 30 + TB = B << 40 + PB = B << 50 + + def parse(text) + if !text.is_a?(String) + raise ArgumentError, "ByteValue::parse takes a String, got a `#{text.class.name}`" + end + number = text.to_f + factor = multiplier(text) + + (number * factor).to_i + end + + def multiplier(text) + case text + when /(?:k|kb)$/ + KB + when /(?:m|mb)$/ + MB + when /(?:g|gb)$/ + GB + when /(?:t|tb)$/ + TB + when /(?:p|pb)$/ + PB + when /(?:b)$/ + B + else + raise ArgumentError, "Unknown bytes value '#{text}'" + end + end + + def human_readable(number) + value, unit = if number > PB + [number / PB, "pb"] + elsif number > TB + [number / TB, "tb"] + elsif number > GB + [number / GB, "gb"] + elsif number > MB + [number / MB, "mb"] + elsif number > KB + [number / KB, "kb"] + else + [number, "b"] + end + + format("%.2d%s", value, unit) + end +end end end diff --git a/logstash-core/lib/logstash/util/wrapped_acked_queue.rb b/logstash-core/lib/logstash/util/wrapped_acked_queue.rb new file mode 100644 index 000000000..07d1978ac --- /dev/null +++ b/logstash-core/lib/logstash/util/wrapped_acked_queue.rb @@ -0,0 +1,347 @@ +# encoding: utf-8 + +require "logstash-core-queue-jruby/logstash-core-queue-jruby" +require "concurrent" +# This is an adapted copy of the wrapped_synchronous_queue file +# ideally this should be moved to Java/JRuby + +module LogStash; module Util + # Some specialized constructors. The calling code *does* need to know what kind it creates but + # not the internal implementation e.g. LogStash::AckedMemoryQueue etc. + # Note the use of allocate - this is what new does before it calls initialize. + # Note that the new method has been made private this is because there is no + # default queue implementation. + # It would be expensive to create a persistent queue in the new method + # to then throw it away in favor of a memory based one directly after. + # Especially in terms of (mmap) memory allocation and proper close sequencing. + + class WrappedAckedQueue + class QueueClosedError < ::StandardError; end + class NotImplementedError < ::StandardError; end + + def self.create_memory_based(path, capacity, size) + self.allocate.with_queue( + LogStash::AckedMemoryQueue.new(path, capacity, size) + ) + end + + def self.create_file_based(path, capacity, size) + self.allocate.with_queue( + LogStash::AckedQueue.new(path, capacity, size) + ) + end + + private_class_method :new + + def with_queue(queue) + @queue = queue + @queue.open + @closed = Concurrent::AtomicBoolean.new(false) + self + end + + def closed? + @closed.true? + end + + # Push an object to the queue if the queue is full + # it will block until the object can be added to the queue. + # + # @param [Object] Object to add to the queue + def push(obj) + check_closed("write") + @queue.write(obj) + end + alias_method(:<<, :push) + + # TODO - fix doc for this noop method + # Offer an object to the queue, wait for the specified amount of time. + # If adding to the queue was successful it will return true, false otherwise. + # + # @param [Object] Object to add to the queue + # @param [Integer] Time in milliseconds to wait before giving up + # @return [Boolean] True if adding was successfull if not it return false + def offer(obj, timeout_ms) + raise NotImplementedError.new("The offer method is not implemented. There is no non blocking write operation yet.") + end + + # Blocking + def take + check_closed("read a batch") + # TODO - determine better arbitrary timeout millis + @queue.read_batch(1, 200).get_elements.first + end + + # Block for X millis + def poll(millis) + check_closed("read") + @queue.read_batch(1, millis).get_elements.first + end + + def read_batch(size, wait) + check_closed("read a batch") + @queue.read_batch(size, wait) + end + + def write_client + WriteClient.new(self) + end + + def read_client() + ReadClient.new(self) + end + + def check_closed(action) + if closed? + raise QueueClosedError.new("Attempted to #{action} on a closed AckedQueue") + end + end + + def close + @queue.close + @closed.make_true + end + + class ReadClient + # We generally only want one thread at a time able to access pop/take/poll operations + # from this queue. We also depend on this to be able to block consumers while we snapshot + # in-flight buffers + + def initialize(queue, batch_size = 125, wait_for = 250) + @queue = queue + @mutex = Mutex.new + # Note that @inflight_batches as a central mechanism for tracking inflight + # batches will fail if we have multiple read clients in the pipeline. + @inflight_batches = {} + # allow the worker thread to report the execution time of the filter + output + @inflight_clocks = {} + @batch_size = batch_size + @wait_for = wait_for + end + + def close + @queue.close + end + + def set_batch_dimensions(batch_size, wait_for) + @batch_size = batch_size + @wait_for = wait_for + end + + def set_events_metric(metric) + @event_metric = metric + end + + def set_pipeline_metric(metric) + @pipeline_metric = metric + end + + def inflight_batches + @mutex.synchronize do + yield(@inflight_batches) + end + end + + def current_inflight_batch + @inflight_batches.fetch(Thread.current, []) + end + + def take_batch + if @queue.closed? + raise QueueClosedError.new("Attempt to take a batch from a closed AckedQueue") + end + @mutex.synchronize do + batch = ReadBatch.new(@queue, @batch_size, @wait_for) + add_starting_metrics(batch) + set_current_thread_inflight_batch(batch) + start_clock + batch + end + end + + def set_current_thread_inflight_batch(batch) + @inflight_batches[Thread.current] = batch + end + + def close_batch(batch) + @mutex.synchronize do + batch.close + @inflight_batches.delete(Thread.current) + stop_clock + end + end + + def start_clock + @inflight_clocks[Thread.current] = [ + @event_metric.time(:duration_in_millis), + @pipeline_metric.time(:duration_in_millis) + ] + end + + def stop_clock + @inflight_clocks[Thread.current].each(&:stop) + @inflight_clocks.delete(Thread.current) + end + + def add_starting_metrics(batch) + return if @event_metric.nil? || @pipeline_metric.nil? + @event_metric.increment(:in, batch.starting_size) + @pipeline_metric.increment(:in, batch.starting_size) + end + + def add_filtered_metrics(batch) + @event_metric.increment(:filtered, batch.filtered_size) + @pipeline_metric.increment(:filtered, batch.filtered_size) + end + + def add_output_metrics(batch) + @event_metric.increment(:out, batch.filtered_size) + @pipeline_metric.increment(:out, batch.filtered_size) + end + end + + class ReadBatch + def initialize(queue, size, wait) + @originals = Hash.new + + # TODO: disabled for https://github.com/elastic/logstash/issues/6055 - will have to properly refactor + # @cancelled = Hash.new + + @generated = Hash.new + @iterating_temp = Hash.new + @iterating = false # Atomic Boolean maybe? Although batches are not shared across threads + take_originals_from_queue(queue, size, wait) # this sets a reference to @acked_batch + end + + def close + # this will ack the whole batch, regardless of whether some + # events were cancelled or failed + return if @acked_batch.nil? + @acked_batch.close + end + + def merge(event) + return if event.nil? || @originals.key?(event) + # take care not to cause @generated to change during iteration + # @iterating_temp is merged after the iteration + if iterating? + @iterating_temp[event] = true + else + # the periodic flush could generate events outside of an each iteration + @generated[event] = true + end + end + + def cancel(event) + # TODO: disabled for https://github.com/elastic/logstash/issues/6055 - will have to properly refactor + raise("cancel is unsupported") + # @cancelled[event] = true + end + + def each(&blk) + # take care not to cause @originals or @generated to change during iteration + + # below the checks for @cancelled.include?(e) have been replaced by e.cancelled? + # TODO: for https://github.com/elastic/logstash/issues/6055 = will have to properly refactor + @iterating = true + @originals.each do |e, _| + blk.call(e) unless e.cancelled? + end + @generated.each do |e, _| + blk.call(e) unless e.cancelled? + end + @iterating = false + update_generated + end + + def size + filtered_size + end + + def starting_size + @originals.size + end + + def filtered_size + @originals.size + @generated.size + end + + def cancelled_size + # TODO: disabled for https://github.com/elastic/logstash/issues/6055 = will have to properly refactor + raise("cancelled_size is unsupported ") + # @cancelled.size + end + + def shutdown_signal_received? + false + end + + def flush_signal_received? + false + end + + private + + def iterating? + @iterating + end + + def update_generated + @generated.update(@iterating_temp) + @iterating_temp.clear + end + + def take_originals_from_queue(queue, size, wait) + @acked_batch = queue.read_batch(size, wait) + return if @acked_batch.nil? + @acked_batch.get_elements.each do |e| + @originals[e] = true + end + end + end + + class WriteClient + def initialize(queue) + @queue = queue + end + + def get_new_batch + WriteBatch.new + end + + def push(event) + if @queue.closed? + raise QueueClosedError.new("Attempted to write an event to a closed AckedQueue") + end + @queue.push(event) + end + alias_method(:<<, :push) + + def push_batch(batch) + if @queue.closed? + raise QueueClosedError.new("Attempted to write a batch to a closed AckedQueue") + end + batch.each do |event| + push(event) + end + end + end + + class WriteBatch + def initialize + @events = [] + end + + def push(event) + @events.push(event) + end + alias_method(:<<, :push) + + def each(&blk) + @events.each do |e| + blk.call(e) + end + end + end + end +end end diff --git a/logstash-core/lib/logstash/util/wrapped_synchronous_queue.rb b/logstash-core/lib/logstash/util/wrapped_synchronous_queue.rb index 55bc66c23..6e4015276 100644 --- a/logstash-core/lib/logstash/util/wrapped_synchronous_queue.rb +++ b/logstash-core/lib/logstash/util/wrapped_synchronous_queue.rb @@ -5,8 +5,8 @@ module LogStash; module Util java_import java.util.concurrent.SynchronousQueue java_import java.util.concurrent.TimeUnit - def initialize() - @queue = java.util.concurrent.SynchronousQueue.new() + def initialize + @queue = java.util.concurrent.SynchronousQueue.new end # Push an object to the queue if the queue is full @@ -30,7 +30,7 @@ module LogStash; module Util # Blocking def take - @queue.take() + @queue.take end # Block for X millis @@ -42,7 +42,7 @@ module LogStash; module Util WriteClient.new(self) end - def read_client() + def read_client ReadClient.new(self) end @@ -51,7 +51,7 @@ module LogStash; module Util # from this queue. We also depend on this to be able to block consumers while we snapshot # in-flight buffers - def initialize(queue, batch_size = 125, wait_for = 5) + def initialize(queue, batch_size = 125, wait_for = 250) @queue = queue @mutex = Mutex.new # Note that @infilght_batches as a central mechanism for tracking inflight @@ -64,6 +64,10 @@ module LogStash; module Util @wait_for = wait_for end + def close + # noop, compat with acked queue read client + end + def set_batch_dimensions(batch_size, wait_for) @batch_size = batch_size @wait_for = wait_for @@ -145,8 +149,6 @@ module LogStash; module Util class ReadBatch def initialize(queue, size, wait) - @shutdown_signal_received = false - @flush_signal_received = false @originals = Hash.new # TODO: disabled for https://github.com/elastic/logstash/issues/6055 - will have to properly refactor @@ -210,14 +212,6 @@ module LogStash; module Util # @cancelled.size end - def shutdown_signal_received? - @shutdown_signal_received - end - - def flush_signal_received? - @flush_signal_received - end - private def iterating? @@ -231,24 +225,10 @@ module LogStash; module Util def take_originals_from_queue(queue, size, wait) size.times do |t| - event = (t == 0) ? queue.take : queue.poll(wait) - if event.nil? - # queue poll timed out - next - elsif event.is_a?(LogStash::SignalEvent) - # We MUST break here. If a batch consumes two SHUTDOWN events - # then another worker may have its SHUTDOWN 'stolen', thus blocking - # the pipeline. - @shutdown_signal_received = event.shutdown? + event = queue.poll(wait) + return if event.nil? # queue poll timed out - # See comment above - # We should stop doing work after flush as well. - @flush_signal_received = event.flush? - - break - else - @originals[event] = true - end + @originals[event] = true end end end diff --git a/logstash-core/logstash-core.gemspec b/logstash-core/logstash-core.gemspec index 3b1ee5c62..f59d397b7 100644 --- a/logstash-core/logstash-core.gemspec +++ b/logstash-core/logstash-core.gemspec @@ -11,13 +11,16 @@ Gem::Specification.new do |gem| gem.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html" gem.license = "Apache License (2.0)" - gem.files = Dir.glob(["logstash-core.gemspec", "lib/**/*.rb", "spec/**/*.rb", "locales/*", "lib/logstash/api/init.ru", "vendor/jars/**/*.jar"]) + gem.files = Dir.glob(["logstash-core.gemspec", "gemspec_jars.rb", "lib/**/*.rb", "spec/**/*.rb", "locales/*", "lib/logstash/api/init.ru"]) gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) gem.name = "logstash-core" - gem.require_paths = ["lib", "vendor/jars"] + gem.require_paths = ["lib"] gem.version = LOGSTASH_CORE_VERSION - gem.add_runtime_dependency "logstash-core-event-java", "6.0.0-alpha1" + gem.platform = "java" + + gem.add_runtime_dependency "logstash-core-event-java" + gem.add_runtime_dependency "logstash-core-queue-jruby" gem.add_runtime_dependency "pry", "~> 0.10.1" #(Ruby license) gem.add_runtime_dependency "stud", "~> 0.0.19" #(Apache 2.0 license) @@ -43,21 +46,13 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "rubyzip", "~> 1.1.7" gem.add_runtime_dependency "thread_safe", "~> 0.3.5" #(Apache 2.0 license) - if RUBY_PLATFORM == 'java' - gem.platform = RUBY_PLATFORM - gem.add_runtime_dependency "jrjackson", "~> 0.4.0" #(Apache 2.0 license) - else - gem.add_runtime_dependency "oj" #(MIT-style license) - end + gem.add_runtime_dependency "jrjackson", "~> 0.4.0" #(Apache 2.0 license) - if RUBY_ENGINE == "rbx" - # rubinius puts the ruby stdlib into gems. - gem.add_runtime_dependency "rubysl" + gem.add_runtime_dependency "jar-dependencies" + # as of Feb 3rd 2016, the ruby-maven gem is resolved to version 3.3.3 and that version + # has an rdoc problem that causes a bundler exception. 3.3.9 is the current latest version + # which does not have this problem. + gem.add_runtime_dependency "ruby-maven", "~> 3.3.9" - # Include racc to make the xml tests pass. - # https://github.com/rubinius/rubinius/issues/2632#issuecomment-26954565 - gem.add_runtime_dependency "racc" - end - - gem.add_runtime_dependency 'jar-dependencies', '~> 0.3.4' + eval(File.read(File.expand_path("../gemspec_jars.rb", __FILE__))) end diff --git a/logstash-core/settings.gradle b/logstash-core/settings.gradle new file mode 100644 index 000000000..4da77f258 --- /dev/null +++ b/logstash-core/settings.gradle @@ -0,0 +1 @@ +rootProject.name = 'logstash-core' diff --git a/logstash-core/spec/logstash/agent_spec.rb b/logstash-core/spec/logstash/agent_spec.rb index a9ec4cde4..b46144dda 100644 --- a/logstash-core/spec/logstash/agent_spec.rb +++ b/logstash-core/spec/logstash/agent_spec.rb @@ -396,6 +396,7 @@ describe LogStash::Agent do let!(:dummy_output) { DummyOutput.new } let!(:dummy_output2) { DummyOutput2.new } + let(:initial_generator_threshold) { 1000 } before :each do allow(DummyOutput).to receive(:new).at_least(:once).with(anything).and_return(dummy_output) @@ -414,7 +415,8 @@ describe LogStash::Agent do subject.execute end - sleep(0.01) until dummy_output.events.size > 1 + # wait for some events to reach the dummy_output + sleep(0.01) until dummy_output.events.size > initial_generator_threshold end after :each do @@ -431,15 +433,7 @@ describe LogStash::Agent do let(:new_config_generator_counter) { 500 } let(:new_config) { "input { generator { count => #{new_config_generator_counter} } } output { dummyoutput2 {} }" } before :each do - # We know that the store has more events coming in. - i = 0 - while dummy_output.events.size <= new_config_generator_counter - i += 1 - raise "Waiting too long!" if i > 20 - sleep(0.1) - end - # Also force a flush to disk to make sure ruby reload it. File.open(config_path, "w") do |f| f.write(new_config) f.fsync @@ -449,21 +443,18 @@ describe LogStash::Agent do # wait until pipeline restarts sleep(0.01) until dummy_output2.events.size > 0 - - # be eventually consistent. - sleep(0.01) while dummy_output2.events.size < new_config_generator_counter end it "resets the pipeline metric collector" do snapshot = subject.metric.collector.snapshot_metric value = snapshot.metric_store.get_with_path("/stats/pipelines")[:stats][:pipelines][:main][:events][:in].value - expect(value).to eq(new_config_generator_counter) + expect(value).to be <= new_config_generator_counter end it "does not reset the global event count" do snapshot = subject.metric.collector.snapshot_metric value = snapshot.metric_store.get_with_path("/stats/events")[:stats][:events][:in].value - expect(value).to be > new_config_generator_counter + expect(value).to be > initial_generator_threshold end it "increases the successful reload count" do @@ -496,15 +487,7 @@ describe LogStash::Agent do let(:new_config) { "input { generator { count => " } let(:new_config_generator_counter) { 500 } before :each do - # We know that the store has more events coming in. - i = 0 - while dummy_output.events.size <= new_config_generator_counter - i += 1 - raise "Waiting too long!" if i > 20 - sleep(0.1) - end - # Also force a flush to disk to make sure ruby reload it. File.open(config_path, "w") do |f| f.write(new_config) f.fsync diff --git a/logstash-core/spec/logstash/pipeline_pq_file_spec.rb b/logstash-core/spec/logstash/pipeline_pq_file_spec.rb new file mode 100644 index 000000000..9349868ea --- /dev/null +++ b/logstash-core/spec/logstash/pipeline_pq_file_spec.rb @@ -0,0 +1,128 @@ +# encoding: utf-8 +require "spec_helper" +require "logstash/inputs/generator" +require "logstash/filters/multiline" + +class PipelinePqFileOutput < LogStash::Outputs::Base + config_name "pipelinepqfileoutput" + milestone 2 + + attr_reader :num_closes, :event_count + + def self.make_shared + @concurrency = :shared + end + + def initialize(params={}) + super + @num_closes = 0 + @event_count = 0 + @mutex = Mutex.new + end + + def register + self.class.make_shared + end + + def receive(event) + @mutex.synchronize do + @event_count = @event_count.succ + end + end + + def close + @num_closes = 1 + end +end + +describe LogStash::Pipeline do + let(:pipeline_settings_obj) { LogStash::SETTINGS } + let(:pipeline_id) { "main" } + + let(:multiline_id) { "my-multiline" } + let(:output_id) { "my-pipelinepqfileoutput" } + let(:generator_id) { "my-generator" } + let(:config) do + <<-EOS + input { + generator { + count => #{number_of_events} + id => "#{generator_id}" + } + } + filter { + multiline { + id => "#{multiline_id}" + pattern => "hello" + what => next + } + } + output { + pipelinepqfileoutput { + id => "#{output_id}" + } + } + EOS + end + + let(:pipeline_settings) { { "queue.type" => queue_type, "pipeline.workers" => worker_thread_count, "pipeline.id" => pipeline_id} } + + subject { described_class.new(config, pipeline_settings_obj, metric) } + + let(:counting_output) { PipelinePqFileOutput.new({ "id" => output_id }) } + let(:metric_store) { subject.metric.collector.snapshot_metric.metric_store } + let(:metric) { LogStash::Instrument::Metric.new(LogStash::Instrument::Collector.new) } + let(:base_queue_path) { pipeline_settings_obj.get("path.queue") } + let(:this_queue_folder) { File.join(base_queue_path, SecureRandom.hex(8)) } + + let(:worker_thread_count) { 8 } # 1 4 8 + let(:number_of_events) { 100_000 } + let(:page_capacity) { 1 * 1024 * 512 } # 1 128 + let(:queue_type) { "persisted" } # "memory" "memory_acked" + let(:times) { [] } + + before :each do + FileUtils.mkdir_p(this_queue_folder) + + pipeline_settings_obj.set("path.queue", this_queue_folder) + allow(PipelinePqFileOutput).to receive(:new).with(any_args).and_return(counting_output) + allow(LogStash::Plugin).to receive(:lookup).with("input", "generator").and_return(LogStash::Inputs::Generator) + allow(LogStash::Plugin).to receive(:lookup).with("codec", "plain").and_return(LogStash::Codecs::Plain) + allow(LogStash::Plugin).to receive(:lookup).with("filter", "multiline").and_return(LogStash::Filters::Multiline) + allow(LogStash::Plugin).to receive(:lookup).with("output", "pipelinepqfileoutput").and_return(PipelinePqFileOutput) + + pipeline_workers_setting = LogStash::SETTINGS.get_setting("pipeline.workers") + allow(pipeline_workers_setting).to receive(:default).and_return(worker_thread_count) + pipeline_settings.each {|k, v| pipeline_settings_obj.set(k, v) } + pipeline_settings_obj.set("queue.page_capacity", page_capacity) + Thread.new do + # make sure we have received all the generated events + while counting_output.event_count < number_of_events do + sleep 1 + end + subject.shutdown + end + times.push(Time.now.to_f) + subject.run + times.unshift(Time.now.to_f - times.first) + end + + after :each do + # Dir.rm_rf(this_queue_folder) + end + + let(:collected_metric) { metric_store.get_with_path("stats/pipelines/") } + + it "populates the pipelines core metrics" do + _metric = collected_metric[:stats][:pipelines][:main][:events] + expect(_metric[:duration_in_millis].value).not_to be_nil + expect(_metric[:in].value).to eq(number_of_events) + expect(_metric[:filtered].value).to eq(number_of_events) + expect(_metric[:out].value).to eq(number_of_events) + STDOUT.puts " queue.type: #{pipeline_settings_obj.get("queue.type")}" + STDOUT.puts " queue.page_capacity: #{pipeline_settings_obj.get("queue.page_capacity") / 1024}KB" + STDOUT.puts " workers: #{worker_thread_count}" + STDOUT.puts " events: #{number_of_events}" + STDOUT.puts " took: #{times.first}s" + end +end diff --git a/logstash-core/spec/logstash/pipeline_spec.rb b/logstash-core/spec/logstash/pipeline_spec.rb index 197f0631b..06007ecb1 100644 --- a/logstash-core/spec/logstash/pipeline_spec.rb +++ b/logstash-core/spec/logstash/pipeline_spec.rb @@ -447,6 +447,9 @@ describe LogStash::Pipeline do allow(settings).to receive(:get_value).with("pipeline.id").and_return("main") allow(settings).to receive(:get_value).with("metric.collect").and_return(false) allow(settings).to receive(:get_value).with("config.debug").and_return(false) + allow(settings).to receive(:get).with("queue.type").and_return("memory") + allow(settings).to receive(:get).with("queue.page_capacity").and_return(1024 * 1024) + allow(settings).to receive(:get).with("queue.max_events").and_return(250) pipeline = LogStash::Pipeline.new(config, settings) expect(pipeline.metric).to be_kind_of(LogStash::Instrument::NullMetric) @@ -509,7 +512,7 @@ describe LogStash::Pipeline do pipeline = LogStash::Pipeline.new(config, pipeline_settings_obj) Thread.new { pipeline.run } sleep 0.1 while !pipeline.ready? - wait(5).for do + wait(3).for do # give us a bit of time to flush the events output.events.empty? end.to be_falsey @@ -549,10 +552,11 @@ describe LogStash::Pipeline do end context "#started_at" do + # use a run limiting count to shutdown the pipeline automatically let(:config) do <<-EOS input { - generator {} + generator { count => 10 } } EOS end @@ -564,8 +568,7 @@ describe LogStash::Pipeline do end it "return when the pipeline started working" do - t = Thread.new { subject.run } - sleep(0.1) + subject.run expect(subject.started_at).to be < Time.now subject.shutdown end @@ -604,7 +607,7 @@ describe LogStash::Pipeline do let(:pipeline_settings) { { "pipeline.id" => pipeline_id } } let(:pipeline_id) { "main" } - let(:number_of_events) { 1000 } + let(:number_of_events) { 420 } let(:multiline_id) { "my-multiline" } let(:multiline_id_other) { "my-multiline_other" } let(:dummy_output_id) { "my-dummyoutput" } @@ -648,13 +651,10 @@ describe LogStash::Pipeline do Thread.new { subject.run } # make sure we have received all the generated events - - times = 0 - while dummyoutput.events.size < number_of_events - times += 1 - sleep 0.25 - raise "Waited too long" if times > 4 - end + wait(3).for do + # give us a bit of time to flush the events + dummyoutput.events.size < number_of_events + end.to be_falsey end after :each do diff --git a/logstash-core/spec/logstash/settings/bytes_spec.rb b/logstash-core/spec/logstash/settings/bytes_spec.rb new file mode 100644 index 000000000..b4fe0aab7 --- /dev/null +++ b/logstash-core/spec/logstash/settings/bytes_spec.rb @@ -0,0 +1,53 @@ +# encoding: utf-8 +require "spec_helper" +require "logstash/settings" + +describe LogStash::Setting::Bytes do + let(:multipliers) do + { + "b" => 1, + "kb" => 1 << 10, + "mb" => 1 << 20, + "gb" => 1 << 30, + "tb" => 1 << 40, + "pb" => 1 << 50, + } + end + + let(:number) { Flores::Random.number(0..1000) } + let(:unit) { Flores::Random.item(multipliers.keys) } + let(:default) { "0b" } + + subject { described_class.new("a byte value", default, false) } + + describe "#set" do + + # Hard-coded test just to make sure at least one known case is working + context "when given '10mb'" do + it "returns 10485760" do + expect(subject.set("10mb")).to be == 10485760 + end + end + + context "when given a string" do + context "which is a valid byte unit" do + let(:text) { "#{number}#{unit}" } + + before { subject.set(text) } + + it "should coerce it to a Fixnum" do + expect(subject.value).to be_a(Fixnum) + end + end + + context "which is not a valid byte unit" do + values = [ "hello world", "1234", "", "-__-" ] + values.each do |value| + it "should fail" do + expect { subject.set(value) }.to raise_error + end + end + end + end + end +end diff --git a/logstash-core/spec/logstash/settings/writable_directory_spec.rb b/logstash-core/spec/logstash/settings/writable_directory_spec.rb index be46b28a0..2e3221073 100644 --- a/logstash-core/spec/logstash/settings/writable_directory_spec.rb +++ b/logstash-core/spec/logstash/settings/writable_directory_spec.rb @@ -79,12 +79,15 @@ describe LogStash::Setting::WritableDirectory do it_behaves_like "failure" end - context "but is a socket" do + # Skip this test due to a testing bug on OSX. + # `path` is rejected on OSX because it is too long (but passes on Linux) + xcontext "but is a socket" do let(:socket) { UNIXServer.new(path) } before { socket } # realize `socket` value after { socket.close } it_behaves_like "failure" end + context "but is a symlink" do before { File::symlink("whatever", path) } it_behaves_like "failure" diff --git a/logstash-core/spec/logstash/util/byte_value_spec.rb b/logstash-core/spec/logstash/util/byte_value_spec.rb new file mode 100644 index 000000000..a18e4ff11 --- /dev/null +++ b/logstash-core/spec/logstash/util/byte_value_spec.rb @@ -0,0 +1,33 @@ +require "logstash/util/byte_value" +require "flores/random" + +describe LogStash::Util::ByteValue do + let(:multipliers) do + { + "b" => 1, + "kb" => 1 << 10, + "mb" => 1 << 20, + "gb" => 1 << 30, + "tb" => 1 << 40, + "pb" => 1 << 50, + } + end + + let(:number) { Flores::Random.number(0..100000000000) } + let(:unit) { Flores::Random.item(multipliers.keys) } + let(:text) { "#{number}#{unit}" } + + describe "#parse" do + # Expect a whole-unit byte value. Fractions of a byte don't make sense here. :) + let(:expected) { (number * multipliers[unit]).to_i } + subject { described_class.parse(text) } + + it "should return a Numeric" do + expect(subject).to be_a(Numeric) + end + + it "should have an expected byte value" do + expect(subject).to be == expected + end + end +end diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/Batch.java b/logstash-core/src/main/java/org/logstash/ackedqueue/Batch.java new file mode 100644 index 000000000..7ff83a1c7 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/Batch.java @@ -0,0 +1,44 @@ +package org.logstash.ackedqueue; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +public class Batch implements Closeable { + + private final List elements; + + private final List seqNums; + private final Queue queue; + private final AtomicBoolean closed; + + public Batch(List elements, List seqNums, Queue q) { + this.elements = elements; + this.seqNums = seqNums; + this.queue = q; + this.closed = new AtomicBoolean(false); + } + + // close acks the batch ackable events + public void close() throws IOException { + if (closed.getAndSet(true) == false) { + this.queue.ack(this.seqNums); + } else { + // TODO: how should we handle double-closing? + throw new IOException("double closing batch"); + } + } + + public int size() { + return elements.size(); + } + + public List getElements() { + return elements; + } + + public Queue getQueue() { + return queue; + } +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/Checkpoint.java b/logstash-core/src/main/java/org/logstash/ackedqueue/Checkpoint.java new file mode 100644 index 000000000..4cee0611d --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/Checkpoint.java @@ -0,0 +1,47 @@ +package org.logstash.ackedqueue; + +public class Checkpoint { +// Checkpoint file structure see FileCheckpointIO + + public static final int VERSION = 1; + + private final int pageNum; // local per-page page number + private final int firstUnackedPageNum; // queue-wide global pointer, only valid in the head checkpoint + private final long firstUnackedSeqNum; // local per-page unacknowledged tracking + private final long minSeqNum; // local per-page minimum seqNum + private final int elementCount; // local per-page element count + + + public Checkpoint(int pageNum, int firstUnackedPageNum, long firstUnackedSeqNum, long minSeqNum, int elementCount) { + this.pageNum = pageNum; + this.firstUnackedPageNum = firstUnackedPageNum; + this.firstUnackedSeqNum = firstUnackedSeqNum; + this.minSeqNum = minSeqNum; + this.elementCount = elementCount; + } + + public int getPageNum() { + return this.pageNum; + } + + public int getFirstUnackedPageNum() { + return this.firstUnackedPageNum; + } + + public long getFirstUnackedSeqNum() { + return this.firstUnackedSeqNum; + } + + public long getMinSeqNum() { + return this.minSeqNum; + } + + public int getElementCount() { + return this.elementCount; + } + + public String toString() { + return "pageNum=" + this.pageNum + ", firstUnackedPageNum=" + this.firstUnackedPageNum + ", firstUnackedSeqNum=" + this.firstUnackedSeqNum + ", minSeqNum=" + this.minSeqNum + ", elementCount=" + this.elementCount; + } + +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/FileSettings.java b/logstash-core/src/main/java/org/logstash/ackedqueue/FileSettings.java new file mode 100644 index 000000000..cb0024e28 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/FileSettings.java @@ -0,0 +1,79 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.CheckpointIOFactory; +import org.logstash.common.io.PageIOFactory; + +public class FileSettings implements Settings { + private String dirForFiles; + private CheckpointIOFactory checkpointIOFactory; + private PageIOFactory pageIOFactory; + private Class elementClass; + private int capacity; + private int maxUnread; + + private FileSettings() { this(""); } + + public FileSettings(String dirPath) { + this.dirForFiles = dirPath; + this.maxUnread = 0; + } + + @Override + public Settings setCheckpointIOFactory(CheckpointIOFactory factory) { + this.checkpointIOFactory = factory; + return this; + } + + @Override + public Settings setElementIOFactory(PageIOFactory factory) { + this.pageIOFactory = factory; + return this; + } + + @Override + public Settings setElementClass(Class elementClass) { + this.elementClass = elementClass; + return this; + } + + @Override + public Settings setCapacity(int capacity) { + this.capacity = capacity; + return this; + } + + @Override + public Settings setMaxUnread(int maxUnread) { + this.maxUnread = maxUnread; + return this; + } + + @Override + public CheckpointIOFactory getCheckpointIOFactory() { + return checkpointIOFactory; + } + + public PageIOFactory getPageIOFactory() { + return pageIOFactory; + } + + @Override + public Class getElementClass() { + return this.elementClass; + } + + @Override + public String getDirPath() { + return dirForFiles; + } + + @Override + public int getCapacity() { + return capacity; + } + + @Override + public int getMaxUnread() { + return this.maxUnread; + } +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/HeadPage.java b/logstash-core/src/main/java/org/logstash/ackedqueue/HeadPage.java new file mode 100644 index 000000000..301f083b5 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/HeadPage.java @@ -0,0 +1,106 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.CheckpointIO; +import org.logstash.common.io.PageIO; + +import java.io.IOException; +import java.util.BitSet; + +public class HeadPage extends Page { + + // create a new HeadPage object and new page.{pageNum} empty valid data file + public HeadPage(int pageNum, Queue queue, PageIO pageIO) throws IOException { + super(pageNum, queue, 0, 0, 0, new BitSet(), pageIO); + pageIO.create(); + } + + // create a new HeadPage object from an existing checkpoint and open page.{pageNum} empty valid data file + public HeadPage(Checkpoint checkpoint, Queue queue, PageIO pageIO) throws IOException { + super(checkpoint.getPageNum(), queue, checkpoint.getMinSeqNum(), checkpoint.getElementCount(), checkpoint.getFirstUnackedSeqNum(), new BitSet(), pageIO); + + // open the data file and reconstruct the IO object internal state + pageIO.open(checkpoint.getMinSeqNum(), checkpoint.getElementCount()); + + // this page ackedSeqNums bitset is a new empty bitset, if we have some acked elements, set them in the bitset + if (checkpoint.getFirstUnackedSeqNum() > checkpoint.getMinSeqNum()) { + this.ackedSeqNums.flip(0, (int) (checkpoint.getFirstUnackedSeqNum() - checkpoint.getMinSeqNum())); + } + } + + // verify if data size plus overhead is not greater than the page capacity + public boolean hasCapacity(int byteSize) { + return this.pageIO.persistedByteCount(byteSize) <= this.pageIO.getCapacity(); + } + + public boolean hasSpace(int byteSize) { + return this.pageIO.hasSpace((byteSize)); + } + + // NOTE: + // we have a page concern inconsistency where readBatch() takes care of the + // deserialization and returns a Batch object which contains the deserialized + // elements objects of the proper elementClass but HeadPage.write() deals with + // a serialized element byte[] and serialization is done at the Queue level to + // be able to use the Page.hasSpace() method with the serialized element byte size. + // + public void write(byte[] bytes, long seqNum) throws IOException { + this.pageIO.write(bytes, seqNum); + + if (this.minSeqNum <= 0) { + this.minSeqNum = seqNum; + this.firstUnreadSeqNum = seqNum; + } + this.elementCount++; + } + + public void ensurePersistedUpto(long seqNum) throws IOException { + long lastCheckpointUptoSeqNum = this.lastCheckpoint.getMinSeqNum() + this.lastCheckpoint.getElementCount(); + + // if the last checkpoint for this headpage already included the given seqNum, no need to fsync/checkpoint + if (seqNum > lastCheckpointUptoSeqNum) { + // head page checkpoint does a data file fsync + checkpoint(); + } + } + + + public TailPage behead() throws IOException { + // first do we need to checkpoint+fsync the headpage a last time? + if (this.elementCount > this.lastCheckpoint.getElementCount()) { + checkpoint(); + } + + TailPage tailPage = new TailPage(this); + + // first thing that must be done after beheading is to create a new checkpoint for that new tail page + // tail page checkpoint does NOT includes a fsync + tailPage.checkpoint(); + + // TODO: should we have a better deactivation strategy to avoid too rapid reactivation scenario? + Page firstUnreadPage = queue.firstUnreadPage(); + if (firstUnreadPage == null || (tailPage.getPageNum() > firstUnreadPage.getPageNum())) { + // deactivate if this new tailPage is not where the read is occuring + tailPage.getPageIO().deactivate(); + } + + return tailPage; + } + + public void checkpoint() throws IOException { + // TODO: not concurrent for first iteration: + + // first fsync data file + this.pageIO.ensurePersisted(); + + // then write new checkpoint + + CheckpointIO io = queue.getCheckpointIO(); + this.lastCheckpoint = io.write(io.headFileName(), this.pageNum, this.queue.firstUnackedPageNum(), firstUnackedSeqNum(), this.minSeqNum, this.elementCount); + } + + public void close() throws IOException { + checkpoint(); + this.pageIO.close(); + } + +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/MemorySettings.java b/logstash-core/src/main/java/org/logstash/ackedqueue/MemorySettings.java new file mode 100644 index 000000000..c7e3dd8c4 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/MemorySettings.java @@ -0,0 +1,81 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.CheckpointIOFactory; +import org.logstash.common.io.PageIOFactory; + +public class MemorySettings implements Settings { + private CheckpointIOFactory checkpointIOFactory; + private PageIOFactory pageIOFactory; + private Class elementClass; + private int capacity; + private final String dirPath; + private int maxUnread; + + public MemorySettings() { + this(""); + } + + public MemorySettings(String dirPath) { + this.dirPath = dirPath; + this.maxUnread = 0; + } + + @Override + public Settings setCheckpointIOFactory(CheckpointIOFactory factory) { + this.checkpointIOFactory = factory; + return this; + } + + @Override + public Settings setElementIOFactory(PageIOFactory factory) { + this.pageIOFactory = factory; + return this; + } + + @Override + public Settings setElementClass(Class elementClass) { + this.elementClass = elementClass; + return this; + } + + @Override + public Settings setCapacity(int capacity) { + this.capacity = capacity; + return this; + } + + @Override + public Settings setMaxUnread(int maxUnread) { + this.maxUnread = maxUnread; + return this; + } + + @Override + public CheckpointIOFactory getCheckpointIOFactory() { + return checkpointIOFactory; + } + + public PageIOFactory getPageIOFactory() { + return pageIOFactory; + } + + @Override + public Class getElementClass() { + return this.elementClass; + } + + @Override + public String getDirPath() { + return this.dirPath; + } + + @Override + public int getCapacity() { + return this.capacity; + } + + @Override + public int getMaxUnread() { + return this.maxUnread; + } +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/Page.java b/logstash-core/src/main/java/org/logstash/ackedqueue/Page.java new file mode 100644 index 000000000..ab3f0611b --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/Page.java @@ -0,0 +1,151 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.PageIO; + +import java.io.Closeable; +import java.io.IOException; +import java.util.BitSet; +import java.util.List; +import java.util.stream.Collectors; + +public abstract class Page implements Closeable { + protected final int pageNum; + protected long minSeqNum; // TODO: see if we can meke it final? + protected int elementCount; + protected long firstUnreadSeqNum; + protected final Queue queue; + protected PageIO pageIO; + + // bit 0 is minSeqNum + // TODO: go steal LocalCheckpointService in feature/seq_no from ES + // TODO: https://github.com/elastic/elasticsearch/blob/feature/seq_no/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointService.java + protected BitSet ackedSeqNums; + protected Checkpoint lastCheckpoint; + + public Page(int pageNum, Queue queue, long minSeqNum, int elementCount, long firstUnreadSeqNum, BitSet ackedSeqNums, PageIO pageIO) { + this.pageNum = pageNum; + this.queue = queue; + + this.minSeqNum = minSeqNum; + this.elementCount = elementCount; + this.firstUnreadSeqNum = firstUnreadSeqNum; + this.ackedSeqNums = ackedSeqNums; + this.lastCheckpoint = new Checkpoint(0, 0, 0, 0, 0); + this.pageIO = pageIO; + } + + public String toString() { + return "pageNum=" + this.pageNum + ", minSeqNum=" + this.minSeqNum + ", elementCount=" + this.elementCount + ", firstUnreadSeqNum=" + this.firstUnreadSeqNum; + } + + // NOTE: + // we have a page concern inconsistency where readBatch() takes care of the + // deserialization and returns a Batch object which contains the deserialized + // elements objects of the proper elementClass but HeadPage.write() deals with + // a serialized element byte[] and serialization is done at the Queue level to + // be able to use the Page.hasSpace() method with the serialized element byte size. + // + // @param limit the batch size limit + // @param elementClass the concrete element class for deserialization + // @return Batch batch of elements read when the number of elements can be <= limit + public Batch readBatch(int limit) throws IOException { + + // first make sure this page is activated, activating previously activated is harmless + this.pageIO.activate(); + + SequencedList serialized = this.pageIO.read(this.firstUnreadSeqNum, limit); + List deserialized = serialized.getElements().stream().map(e -> this.queue.deserialize(e)).collect(Collectors.toList()); + + assert serialized.getSeqNums().get(0) == this.firstUnreadSeqNum : + String.format("firstUnreadSeqNum=%d != first result seqNum=%d", this.firstUnreadSeqNum, serialized.getSeqNums().get(0)); + + Batch batch = new Batch(deserialized, serialized.getSeqNums(), this.queue); + + this.firstUnreadSeqNum += deserialized.size(); + + return batch; + } + + public boolean isFullyRead() { + return unreadCount() <= 0; +// return this.elementCount <= 0 || this.firstUnreadSeqNum > maxSeqNum(); + } + + public boolean isFullyAcked() { + // TODO: it should be something similar to this when we use a proper bitset class like ES + // this.ackedSeqNum.firstUnackedBit >= this.elementCount; + // TODO: for now use a naive & inneficient mechanism with a simple Bitset + return this.elementCount > 0 && this.ackedSeqNums.cardinality() >= this.elementCount; + } + + public long unreadCount() { + return this.elementCount <= 0 ? 0 : Math.max(0, (maxSeqNum() - this.firstUnreadSeqNum) + 1); + } + + public void ack(List seqNums) throws IOException { + for (long seqNum : seqNums) { + // TODO: eventually refactor to use new bit handling class + + assert seqNum >= this.minSeqNum : + String.format("seqNum=%d is smaller than minSeqnum=%d", seqNum, this.minSeqNum); + + assert seqNum < this.minSeqNum + this.elementCount: + String.format("seqNum=%d is greater than minSeqnum=%d + elementCount=%d = %d", seqNum, this.minSeqNum, this.elementCount, this.minSeqNum + this.elementCount); + int index = (int)(seqNum - this.minSeqNum); + + this.ackedSeqNums.set(index); + } + + // checkpoint if totally acked or we acked more than 1024 elements in this page since last checkpoint + long firstUnackedSeqNum = firstUnackedSeqNum(); + + if (isFullyAcked()) { + // TODO: here if consumer is faster than producer, the head page may be always fully acked and we may end up fsync'ing too ofter? + checkpoint(); + + assert firstUnackedSeqNum >= this.minSeqNum + this.elementCount - 1: + String.format("invalid firstUnackedSeqNum=%d for minSeqNum=%d and elementCount=%d and cardinality=%d", firstUnackedSeqNum, this.minSeqNum, this.elementCount, this.ackedSeqNums.cardinality()); + + } else if (firstUnackedSeqNum > this.lastCheckpoint.getFirstUnackedSeqNum() + 1024) { + // did we acked more that 1024 elements? if so we should checkpoint now + checkpoint(); + } + } + + public abstract void checkpoint() throws IOException; + + public abstract void close() throws IOException; + + public int getPageNum() { + return pageNum; + } + + public long getMinSeqNum() { + return this.minSeqNum; + } + + public int getElementCount() { + return elementCount; + } + + public Queue getQueue() { + return queue; + } + + public PageIO getPageIO() { + return pageIO; + } + + protected long maxSeqNum() { + return this.minSeqNum + this.elementCount - 1; + } + + protected long firstUnackedSeqNum() { + // TODO: eventually refactor to use new bithandling class + return this.ackedSeqNums.nextClearBit(0) + this.minSeqNum; + } + + protected int firstUnackedPageNumFromQueue() { + return queue.firstUnackedPageNum(); + } +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/Queue.java b/logstash-core/src/main/java/org/logstash/ackedqueue/Queue.java new file mode 100644 index 000000000..57c138bfa --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/Queue.java @@ -0,0 +1,519 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.CheckpointIO; +import org.logstash.common.io.PageIO; +import org.logstash.common.io.PageIOFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.nio.file.NoSuchFileException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + + +// TODO: Notes +// +// - time-based fsync +// +// - tragic errors handling +// - what errors cause whole queue to be broken +// - where to put try/catch for these errors + + +public class Queue implements Closeable { + protected long seqNum; + protected HeadPage headPage; + + // complete list of all non fully acked pages. note that exact sequenciality by pageNum cannot be assumed + // because any fully acked page will be removed from this list potentially creating pageNum gaps in the list. + protected final List tailPages; + + // this list serves the only purpose of quickly retrieving the first unread page, operation necessary on every read + // reads will simply remove the first page from the list when fully read and writes will append new pages upon beheading + protected final List unreadTailPages; + + protected volatile long unreadCount; + + private final CheckpointIO checkpointIO; + private final PageIOFactory pageIOFactory; + private final int pageCapacity; + private final String dirPath; + private final int maxUnread; + + private final AtomicBoolean closed; + + // deserialization + private final Class elementClass; + private final Method deserializeMethod; + + // thread safety + final Lock lock = new ReentrantLock(); + final Condition notFull = lock.newCondition(); + final Condition notEmpty = lock.newCondition(); + + public Queue(Settings settings) { + this(settings.getDirPath(), settings.getCapacity(), settings.getCheckpointIOFactory().build(settings.getDirPath()), settings.getPageIOFactory(), settings.getElementClass(), settings.getMaxUnread()); + } + + public Queue(String dirPath, int pageCapacity, CheckpointIO checkpointIO, PageIOFactory pageIOFactory, Class elementClass, int maxUnread) { + this.dirPath = dirPath; + this.pageCapacity = pageCapacity; + this.checkpointIO = checkpointIO; + this.pageIOFactory = pageIOFactory; + this.elementClass = elementClass; + this.tailPages = new ArrayList<>(); + this.unreadTailPages = new ArrayList<>(); + this.closed = new AtomicBoolean(true); // not yet opened + this.maxUnread = maxUnread; + this.unreadCount = 0; + + // retrieve the deserialize method + try { + Class[] cArg = new Class[1]; + cArg[0] = byte[].class; + this.deserializeMethod = this.elementClass.getDeclaredMethod("deserialize", cArg); + } catch (NoSuchMethodException e) { + throw new QueueRuntimeException("cannot find deserialize method on class " + this.elementClass.getName(), e); + } + } + + // moved queue opening logic in open() method until we have something in place to used in-memory checkpoints for testing + // because for now we need to pass a Queue instance to the Page and we don't want to trigger a Queue recovery when + // testing Page + public void open() throws IOException { + final int headPageNum; + + if (!this.closed.get()) { throw new IOException("queue already opened"); } + + Checkpoint headCheckpoint; + try { + headCheckpoint = checkpointIO.read(checkpointIO.headFileName()); + } catch (NoSuchFileException e) { + headCheckpoint = null; + } + + // if there is no head checkpoint, create a new headpage and checkpoint it and exit method + if (headCheckpoint == null) { + this.seqNum = 0; + headPageNum = 0; + + newCheckpointedHeadpage(headPageNum); + this.closed.set(false); + + return; + } + + // at this point we have a head checkpoint to figure queue recovery + + // reconstruct all tail pages state upto but excluding the head page + for (int pageNum = headCheckpoint.getFirstUnackedPageNum(); pageNum < headCheckpoint.getPageNum(); pageNum++) { + Checkpoint tailCheckpoint = checkpointIO.read(checkpointIO.tailFileName(pageNum)); + + if (tailCheckpoint == null) { throw new IOException(checkpointIO.tailFileName(pageNum) + " not found"); } + + PageIO pageIO = this.pageIOFactory.build(pageNum, this.pageCapacity, this.dirPath); + TailPage tailPage = new TailPage(tailCheckpoint, this, pageIO); + + // if this page is not the first tail page, deactivate it + // we keep the first tail page activated since we know the next read operation will be in that one + if (pageNum > headCheckpoint.getFirstUnackedPageNum()) { pageIO.deactivate(); } + + // track the seqNum as we rebuild tail pages, prevent empty pages with a minSeqNum of 0 to reset seqNum + if (tailPage.maxSeqNum() > this.seqNum) { this.seqNum = tailPage.maxSeqNum(); } + + insertTailPage(tailPage); + } + + // transform the head page into a tail page only if the headpage is non-empty + + if (headCheckpoint.getMinSeqNum() <= 0 && headCheckpoint.getElementCount() <= 0) { + PageIO headPageIO = this.pageIOFactory.build(headCheckpoint.getPageNum(), this.pageCapacity, this.dirPath); + this.headPage = new HeadPage(headCheckpoint, this, headPageIO); + } else { + PageIO tailPageIO = this.pageIOFactory.build(headCheckpoint.getPageNum(), this.pageCapacity, this.dirPath); + TailPage tailPage = new TailPage(headCheckpoint, this, tailPageIO); + + // track the seqNum as we add this new tail page, prevent empty tailPage with a minSeqNum of 0 to reset seqNum + if (tailPage.maxSeqNum() > this.seqNum) { this.seqNum = tailPage.maxSeqNum(); } + + insertTailPage(tailPage); + + headPageNum = headCheckpoint.getPageNum() + 1; + newCheckpointedHeadpage(headPageNum); + } + + // TODO: here do directory traversal and cleanup lingering pages? could be a background operations to not delay queue start? + + this.closed.set(false); + } + + // insert a recovered tail page into the tail pages state tracking + // and purge it if it is found to be fully acked + private void insertTailPage(TailPage p) throws IOException { + if (!p.isFullyAcked()) { + this.tailPages.add(p); + if (!p.isFullyRead()) { + this.unreadTailPages.add(p); + this.unreadCount += p.unreadCount(); + } + } else { + // for some reason we found a fully acked page, let's purge it. + p.purge(); + } + } + + // create a new empty headpage for the given pageNum and imidiately checkpoint it + // @param pageNum the page number of the new head page + private void newCheckpointedHeadpage(int pageNum) throws IOException { + PageIO headPageIO = this.pageIOFactory.build(pageNum, this.pageCapacity, this.dirPath); + this.headPage = new HeadPage(pageNum, this, headPageIO); + this.headPage.checkpoint(); + + } + + // @param element the Queueable object to write to the queue + // @return long written sequence number + public long write(Queueable element) throws IOException { + long seqNum = nextSeqNum(); + byte[] data = element.serialize(); + + if (! this.headPage.hasCapacity(data.length)) { + throw new IOException("data to be written is bigger than page capacity"); + } + + // the write strategy with regard to the isFull() state is to assume there is space for this element + // and write it, then after write verify if we just filled the queue and wait on the notFull condition + // *after* the write which is both safer for a crash condition, and the queue closing sequence. In the former case + // holding an element in memory while wainting for the notFull condition would mean always having the current write + // element at risk in the always-full queue state. In the later, when closing a full queue, it would be impossible + // to write the current element. + + lock.lock(); + try { + boolean wasEmpty = (firstUnreadPage() == null); + + // create a new head page if the current does not have suffient space left for data to be written + if (! this.headPage.hasSpace(data.length)) { + // beheading includes checkpoint+fsync if required + TailPage tailPage = this.headPage.behead(); + + this.tailPages.add(tailPage); + if (! tailPage.isFullyRead()) { + this.unreadTailPages.add(tailPage); + } + + // create new head page + int headPageNum = tailPage.pageNum + 1; + PageIO pageIO = this.pageIOFactory.build(headPageNum, this.pageCapacity, this.dirPath); + this.headPage = new HeadPage(headPageNum, this, pageIO); + this.headPage.checkpoint(); + } + + this.headPage.write(data, seqNum); + this.unreadCount++; + + // if the queue was empty before write, signal non emptiness + if (wasEmpty) { notEmpty.signal(); } + + // now check if we reached a queue full state and block here until it is not full + // for the next write or the queue was closed. + while (isFull() && !isClosed()) { + try { + notFull.await(); + } catch (InterruptedException e) { + // the thread interrupt() has been called while in the await() blocking call. + // at this point the interrupted flag is reset and Thread.interrupted() will return false + // to any upstream calls on it. for now our choice is to return normally and set back + // the Thread.interrupted() flag so it can be checked upstream. + + // this is a bit tricky in the case of the queue full condition blocking state. + // TODO: we will want to avoid initiating a new write operation if Thread.interrupted() was called. + + // set back the interrupted flag + Thread.currentThread().interrupt(); + + return seqNum; + } + } + + return seqNum; + } finally { + lock.unlock(); + } + } + + // @return true if the queue is deemed at full capacity + public boolean isFull() { + // TODO: I am not sure if having unreadCount as volatile is sufficient here. all unreadCount updates are done inside syncronized + // TODO: sections, I believe that to only read the value here, having it as volatile is sufficient? + return (this.maxUnread > 0) ? this.unreadCount >= this.maxUnread : false; + } + + // @param seqNum the element sequence number upper bound for which persistence should be garanteed (by fsync'ing) + public void ensurePersistedUpto(long seqNum) throws IOException{ + lock.lock(); + try { + this.headPage.ensurePersistedUpto(seqNum); + } finally { + lock.unlock(); + } + } + + // non-blockin queue read + // @param limit read the next bach of size up to this limit. the returned batch size can be smaller than than the requested limit if fewer elements are available + // @return Batch the batch containing 1 or more element up to the required limit or null of no elements were available + public Batch nonBlockReadBatch(int limit) throws IOException { + lock.lock(); + try { + Page p = firstUnreadPage(); + if (p == null) { + return null; + } + + return _readPageBatch(p, limit); + } finally { + lock.unlock(); + } + } + + // blocking readBatch notes: + // the queue close() notifies all pending blocking read so that they unblock if the queue is being closed. + // this means that all blocking read methods need to verify for the queue close condition. + // + // blocking queue read until elements are available for read + // @param limit read the next bach of size up to this limit. the returned batch size can be smaller than than the requested limit if fewer elements are available + // @return Batch the batch containing 1 or more element up to the required limit or null if no elements were available or the blocking call was interrupted + public Batch readBatch(int limit) throws IOException { + Page p; + + lock.lock(); + try { + while ((p = firstUnreadPage()) == null && !isClosed()) { + try { + notEmpty.await(); + } catch (InterruptedException e) { + // the thread interrupt() has been called while in the await() blocking call. + // at this point the interrupted flag is reset and Thread.interrupted() will return false + // to any upstream calls on it. for now our choice is to simply return null and set back + // the Thread.interrupted() flag so it can be checked upstream. + + // set back the interrupted flag + Thread.currentThread().interrupt(); + + return null; + } + } + + // need to check for close since it is a condition for exiting the while loop + if (isClosed()) { return null; } + + return _readPageBatch(p, limit); + } finally { + lock.unlock(); + } + } + + // blocking queue read until elements are available for read or the given timeout is reached. + // @param limit read the next batch of size up to this limit. the returned batch size can be smaller than than the requested limit if fewer elements are available + // @param timeout the maximum time to wait in milliseconds + // @return Batch the batch containing 1 or more element up to the required limit or null if no elements were available or the blocking call was interrupted + public Batch readBatch(int limit, long timeout) throws IOException { + Page p; + + lock.lock(); + try { + // wait only if queue is empty + if ((p = firstUnreadPage()) == null) { + try { + notEmpty.await(timeout, TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + // the thread interrupt() has been called while in the await() blocking call. + // at this point the interrupted flag is reset and Thread.interrupted() will return false + // to any upstream calls on it. for now our choice is to simply return null and set back + // the Thread.interrupted() flag so it can be checked upstream. + + // set back the interrupted flag + Thread.currentThread().interrupt(); + + return null; + } + + // if after returnining from wait queue is still empty, or the queue was closed return null + if ((p = firstUnreadPage()) == null || isClosed()) { return null; } + } + + return _readPageBatch(p, limit); + } finally { + lock.unlock(); + } + } + + private Batch _readPageBatch(Page p, int limit) throws IOException { + boolean wasFull = isFull(); + + Batch b = p.readBatch(limit); + this.unreadCount -= b.size(); + + if (p.isFullyRead()) { + removeUnreadPage(p); + } + + if (wasFull) { notFull.signal(); } + + return b; + } + + private static class TailPageResult { + public TailPage page; + public int index; + + public TailPageResult(TailPage page, int index) { + this.page = page; + this.index = index; + } + } + + // perform a binary search through tail pages to find in which page this seqNum falls into + private TailPageResult binaryFindPageForSeqnum(long seqNum) { + int lo = 0; + int hi = this.tailPages.size() - 1; + while (lo <= hi) { + int mid = lo + (hi - lo) / 2; + TailPage p = this.tailPages.get(mid); + + if (seqNum < p.getMinSeqNum()) { + hi = mid - 1; + } else if (seqNum >= (p.getMinSeqNum() + p.getElementCount())) { + lo = mid + 1; + } else { + return new TailPageResult(p, mid); + } + } + return null; + } + + // perform a linear search through tail pages to find in which page this seqNum falls into + private TailPageResult linearFindPageForSeqnum(long seqNum) { + for (int i = 0; i < this.tailPages.size(); i++) { + TailPage p = this.tailPages.get(i); + if (p.getMinSeqNum() > 0 && seqNum >= p.getMinSeqNum() && seqNum < p.getMinSeqNum() + p.getElementCount()) { + return new TailPageResult(p, i); + } + } + return null; + } + + public void ack(List seqNums) throws IOException { + // as a first implementation we assume that all batches are created from the same page + // so we will avoid multi pages acking here for now + + // find the page to ack by travesing from oldest tail page + long firstAckSeqNum = seqNums.get(0); + + lock.lock(); + try { + // dual search strategy: if few tail pages search linearily otherwise perform binary search + TailPageResult result = (this.tailPages.size() > 3) ? binaryFindPageForSeqnum(firstAckSeqNum) : linearFindPageForSeqnum(firstAckSeqNum); + + if (result == null) { + // if not found then it is in head page + assert this.headPage.getMinSeqNum() > 0 && firstAckSeqNum >= this.headPage.getMinSeqNum() && firstAckSeqNum < this.headPage.getMinSeqNum() + this.headPage.getElementCount(): + String.format("seqNum=%d is not in head page with minSeqNum=%d", firstAckSeqNum, this.headPage.getMinSeqNum()); + this.headPage.ack(seqNums); + } else { + result.page.ack(seqNums); + + // cleanup fully acked tail page + if (result.page.isFullyAcked()) { + this.tailPages.remove(result.index); + this.headPage.checkpoint(); + result.page.purge(); + } + } + } finally { + lock.unlock(); + } + } + + public CheckpointIO getCheckpointIO() { + return this.checkpointIO; + } + + // deserialize a byte array into the required element class. + // @param bytes the byte array to deserialize + // @return Queueable the deserialized byte array into the required Queuable interface implementation concrete class + public Queueable deserialize(byte[] bytes) { + try { + return (Queueable)this.deserializeMethod.invoke(this.elementClass, bytes); + } catch (IllegalAccessException|InvocationTargetException e) { + throw new QueueRuntimeException("deserialize invocation error", e); + } + } + + public void close() throws IOException { + // TODO: review close strategy and exception handling and resiliency of first closing tail pages if crash in the middle + + if (closed.getAndSet(true) == false) { + lock.lock(); + try { + // TODO: not sure if we need to do this here since the headpage close will also call ensurePersited + ensurePersistedUpto(this.seqNum); + + for (TailPage p : this.tailPages) { p.close(); } + this.headPage.close(); + + // release all referenced objects + this.tailPages.clear(); + this.unreadTailPages.clear(); + this.headPage = null; + + // unblock blocked reads which will return null by checking of isClosed() + // no data will be lost because the actual read has not been performed + notEmpty.signalAll(); + + + // unblock blocked writes. a write is blocked *after* the write has been performed so + // unblocking is safe and will return from the write call + notFull.signalAll(); + } finally { + lock.unlock(); + } + } + } + + protected Page firstUnreadPage() throws IOException { + // look at head page if no unreadTailPages + return (this.unreadTailPages.isEmpty()) ? (this.headPage.isFullyRead() ? null : this.headPage) : this.unreadTailPages.get(0); + } + + private void removeUnreadPage(Page p) { + // HeadPage is not part of the unreadTailPages, just ignore + if (p instanceof TailPage){ + // the page to remove should always be the first one + assert this.unreadTailPages.get(0) == p : String.format("unread page is not first in unreadTailPages list"); + this.unreadTailPages.remove(0); + } + } + + protected int firstUnackedPageNum() { + if (this.tailPages.isEmpty()) { + return this.headPage.getPageNum(); + } + return this.tailPages.get(0).getPageNum(); + } + + protected long nextSeqNum() { + return this.seqNum += 1; + } + + protected boolean isClosed() { + return this.closed.get(); + } +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/QueueRuntimeException.java b/logstash-core/src/main/java/org/logstash/ackedqueue/QueueRuntimeException.java new file mode 100644 index 000000000..06b8639d5 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/QueueRuntimeException.java @@ -0,0 +1,29 @@ +package org.logstash.ackedqueue; + +public class QueueRuntimeException extends RuntimeException { + + public static QueueRuntimeException newFormatMessage(String fmt, Object... args) { + return new QueueRuntimeException( + String.format(fmt, args) + ); + } + + public QueueRuntimeException() { + } + + public QueueRuntimeException(String message) { + super(message); + } + + public QueueRuntimeException(String message, Throwable cause) { + super(message, cause); + } + + public QueueRuntimeException(Throwable cause) { + super(cause); + } + + public QueueRuntimeException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); + } +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/Queueable.java b/logstash-core/src/main/java/org/logstash/ackedqueue/Queueable.java new file mode 100644 index 000000000..2becec11d --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/Queueable.java @@ -0,0 +1,10 @@ +package org.logstash.ackedqueue; + +import java.io.IOException; + +public interface Queueable { + + byte[] serialize() throws IOException; + + static Object deserialize(byte[] bytes) { throw new RuntimeException("please implement deserialize"); }; +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/SequencedList.java b/logstash-core/src/main/java/org/logstash/ackedqueue/SequencedList.java new file mode 100644 index 000000000..8bb580fe0 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/SequencedList.java @@ -0,0 +1,21 @@ +package org.logstash.ackedqueue; + +import java.util.List; + +public class SequencedList { + private final List elements; + private final List seqNums; + + public SequencedList(List elements, List seqNums) { + this.elements = elements; + this.seqNums = seqNums; + } + + public List getElements() { + return elements; + } + + public List getSeqNums() { + return seqNums; + } +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/Settings.java b/logstash-core/src/main/java/org/logstash/ackedqueue/Settings.java new file mode 100644 index 000000000..1b847e0fc --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/Settings.java @@ -0,0 +1,28 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.CheckpointIOFactory; +import org.logstash.common.io.PageIOFactory; + +public interface Settings { + Settings setCheckpointIOFactory(CheckpointIOFactory factory); + + Settings setElementIOFactory(PageIOFactory factory); + + Settings setElementClass(Class elementClass); + + Settings setCapacity(int capacity); + + Settings setMaxUnread(int maxUnread); + + CheckpointIOFactory getCheckpointIOFactory(); + + PageIOFactory getPageIOFactory(); + + Class getElementClass(); + + String getDirPath(); + + int getCapacity(); + + int getMaxUnread(); +} diff --git a/logstash-core/src/main/java/org/logstash/ackedqueue/TailPage.java b/logstash-core/src/main/java/org/logstash/ackedqueue/TailPage.java new file mode 100644 index 000000000..8a8b2b721 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/ackedqueue/TailPage.java @@ -0,0 +1,48 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.CheckpointIO; +import org.logstash.common.io.PageIO; + +import java.io.IOException; +import java.util.BitSet; + +public class TailPage extends Page { + + // create a new TailPage object from a HeadPage object + public TailPage(HeadPage page) { + super(page.pageNum, page.queue, page.minSeqNum, page.elementCount, page.firstUnreadSeqNum, page.ackedSeqNums, page.pageIO); + } + + // create a new TailPage object for an exiting Checkpoint and data file + public TailPage(Checkpoint checkpoint, Queue queue, PageIO pageIO) throws IOException { + super(checkpoint.getPageNum(), queue, checkpoint.getMinSeqNum(), checkpoint.getElementCount(), checkpoint.getFirstUnackedSeqNum(), new BitSet(), pageIO); + + // open the data file and reconstruct the IO object internal state + pageIO.open(checkpoint.getMinSeqNum(), checkpoint.getElementCount()); + + // this page ackedSeqNums bitset is a new empty bitset, if we have some acked elements, set them in the bitset + if (checkpoint.getFirstUnackedSeqNum() > checkpoint.getMinSeqNum()) { + this.ackedSeqNums.flip(0, (int) (checkpoint.getFirstUnackedSeqNum() - checkpoint.getMinSeqNum())); + } + } + + public void checkpoint() throws IOException { + // TODO: not concurrent for first iteration: + + // since this is a tail page and no write can happen in this page, there is no point in performing a fsync on this page, just stamp checkpoint + CheckpointIO io = queue.getCheckpointIO(); + this.lastCheckpoint = io.write(io.tailFileName(this.pageNum), this.pageNum, this.queue.firstUnackedPageNum(), firstUnackedSeqNum(), this.minSeqNum, this.elementCount); + } + + // delete all IO files associated with this page + public void purge() throws IOException { + this.pageIO.purge(); + CheckpointIO io = queue.getCheckpointIO(); + io.purge(io.tailFileName(this.pageNum)); + } + + public void close() throws IOException { + checkpoint(); + this.pageIO.close(); + } +} \ No newline at end of file diff --git a/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksum.java b/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksum.java new file mode 100644 index 000000000..79bbd7af3 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksum.java @@ -0,0 +1,67 @@ +package org.logstash.common.io; + +import java.util.zip.Checksum; + +/** + * Wraps another {@link Checksum} with an internal buffer + * to speed up checksum calculations. + */ +public class BufferedChecksum implements Checksum { + private final Checksum in; + private final byte buffer[]; + private int upto; + /** Default buffer size: 256 */ + public static final int DEFAULT_BUFFERSIZE = 256; + + /** Create a new BufferedChecksum with {@link #DEFAULT_BUFFERSIZE} */ + public BufferedChecksum(Checksum in) { + this(in, DEFAULT_BUFFERSIZE); + } + + /** Create a new BufferedChecksum with the specified bufferSize */ + public BufferedChecksum(Checksum in, int bufferSize) { + this.in = in; + this.buffer = new byte[bufferSize]; + } + + @Override + public void update(int b) { + if (upto == buffer.length) { + flush(); + } + buffer[upto++] = (byte) b; + } + + @Override + public void update(byte[] b, int off, int len) { + if (len >= buffer.length) { + flush(); + in.update(b, off, len); + } else { + if (upto + len > buffer.length) { + flush(); + } + System.arraycopy(b, off, buffer, upto, len); + upto += len; + } + } + + @Override + public long getValue() { + flush(); + return in.getValue(); + } + + @Override + public void reset() { + upto = 0; + in.reset(); + } + + private void flush() { + if (upto > 0) { + in.update(buffer, 0, upto); + } + upto = 0; + } +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksumStreamInput.java b/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksumStreamInput.java new file mode 100644 index 000000000..beed52387 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksumStreamInput.java @@ -0,0 +1,104 @@ +package org.logstash.common.io; + +import java.io.IOException; +import java.util.zip.CRC32; +import java.util.zip.Checksum; + +/** + * Similar to Lucene's BufferedChecksumIndexInput, however this wraps a + * {@link StreamInput} so anything read will update the checksum + */ +public final class BufferedChecksumStreamInput extends StreamInput { + private static final int SKIP_BUFFER_SIZE = 1024; + private byte[] skipBuffer; + private final StreamInput in; + private final Checksum digest; + + public BufferedChecksumStreamInput(StreamInput in) { + this.in = in; + this.digest = new BufferedChecksum(new CRC32()); + } + + public BufferedChecksumStreamInput(StreamInput in, BufferedChecksumStreamInput reuse) { + this.in = in; + if (reuse == null ) { + this.digest = new BufferedChecksum(new CRC32()); + } else { + this.digest = reuse.digest; + digest.reset(); + this.skipBuffer = reuse.skipBuffer; + } + } + + public long getChecksum() { + return this.digest.getValue(); + } + + @Override + public byte readByte() throws IOException { + final byte b = in.readByte(); + digest.update(b); + return b; + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + in.readBytes(b, offset, len); + digest.update(b, offset, len); + } + + @Override + public void reset() throws IOException { + in.reset(); + digest.reset(); + } + + @Override + public int read() throws IOException { + return readByte() & 0xFF; + } + + @Override + public void close() throws IOException { + in.close(); + } + + @Override + public boolean markSupported() { + return in.markSupported(); + } + + + @Override + public long skip(long numBytes) throws IOException { + if (numBytes < 0) { + throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes); + } + if (skipBuffer == null) { + skipBuffer = new byte[SKIP_BUFFER_SIZE]; + } + assert skipBuffer.length == SKIP_BUFFER_SIZE; + long skipped = 0; + for (; skipped < numBytes; ) { + final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped); + readBytes(skipBuffer, 0, step); + skipped += step; + } + return skipped; + } + + @Override + public int available() throws IOException { + return in.available(); + } + + @Override + public synchronized void mark(int readlimit) { + in.mark(readlimit); + } + + public void resetDigest() { + digest.reset(); + } +} + diff --git a/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksumStreamOutput.java b/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksumStreamOutput.java new file mode 100644 index 000000000..f37b71f92 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/BufferedChecksumStreamOutput.java @@ -0,0 +1,57 @@ +package org.logstash.common.io; + + +import java.io.IOException; +import java.util.zip.CRC32; +import java.util.zip.Checksum; + +/** + * Similar to Lucene's BufferedChecksumIndexOutput, however this wraps a + * {@link StreamOutput} so anything written will update the checksum + */ +public final class BufferedChecksumStreamOutput extends StreamOutput { + private final StreamOutput out; + private final Checksum digest; + + public BufferedChecksumStreamOutput(StreamOutput out) { + this.out = out; + this.digest = new BufferedChecksum(new CRC32()); + } + + public long getChecksum() { + return this.digest.getValue(); + } + + @Override + public void writeByte(byte b) throws IOException { + out.writeByte(b); + digest.update(b); + } + + @Override + public void writeBytes(byte[] b, int offset, int length) throws IOException { + out.writeBytes(b, offset, length); + digest.update(b, offset, length); + } + + @Override + public void flush() throws IOException { + out.flush(); + } + + @Override + public void close() throws IOException { + out.close(); + } + + @Override + public void reset() throws IOException { + out.reset(); + digest.reset(); + } + + public void resetDigest() { + digest.reset(); + } +} + diff --git a/logstash-core/src/main/java/org/logstash/common/io/ByteArrayStreamOutput.java b/logstash-core/src/main/java/org/logstash/common/io/ByteArrayStreamOutput.java new file mode 100644 index 000000000..6f49581e9 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/ByteArrayStreamOutput.java @@ -0,0 +1,65 @@ +package org.logstash.common.io; + +import java.nio.ByteBuffer; + +public class ByteArrayStreamOutput extends StreamOutput { + private byte[] bytes; + + private int pos; + private int limit; + + public ByteArrayStreamOutput(byte[] bytes) { + reset(bytes); + } + + public ByteArrayStreamOutput(ByteBuffer bytebuffer) { + reset(bytebuffer.array()); + } + + public ByteArrayStreamOutput(ByteBuffer bytebuffer, int offset, int len) { + reset(bytebuffer.array(), offset, len); + } + + public ByteArrayStreamOutput(byte[] bytes, int offset, int len) { + reset(bytes, offset, len); + } + + public void reset(byte[] bytes) { + reset(bytes, 0, bytes.length); + } + + public void reset(byte[] bytes, int offset, int len) { + this.bytes = bytes; + pos = offset; + limit = offset + len; + } + + public void setWriteWindow(int offset, int len) { + pos = offset; + limit = offset + len; + } + + public void reset() { + } + + public void reset(int offset) { + pos = offset; + } + + public int getPosition() { + return pos; + } + + @Override + public void writeByte(byte b) { + assert pos < limit : String.format("ByteArrayStreamOutput#writeByte pos=%d !< limit=%d", pos, limit); + bytes[pos++] = b; + } + + @Override + public void writeBytes(byte[] b, int offset, int length) { + assert pos + length <= limit; + System.arraycopy(b, offset, bytes, pos, length); + pos += length; + } +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/ByteBufferPageIO.java b/logstash-core/src/main/java/org/logstash/common/io/ByteBufferPageIO.java new file mode 100644 index 000000000..e2619a5b3 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/ByteBufferPageIO.java @@ -0,0 +1,248 @@ +package org.logstash.common.io; + +import org.logstash.ackedqueue.Queueable; +import org.logstash.ackedqueue.SequencedList; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.CRC32; +import java.util.zip.Checksum; + +// TODO: currently assuming continuous seqNum is the byte buffer where we can deduct the maxSeqNum from the min + count. +// TODO: we could change this and support non-continuous seqNums but I am not sure we should. +// TODO: checksum is not currently computed. + +public class ByteBufferPageIO implements PageIO { + public static final byte VERSION = 1; + public static final int CHECKSUM_SIZE = Integer.BYTES; + public static final int LENGTH_SIZE = Integer.BYTES; + public static final int SEQNUM_SIZE = Long.BYTES; + public static final int MIN_RECORD_SIZE = SEQNUM_SIZE + LENGTH_SIZE + CHECKSUM_SIZE; + public static final int HEADER_SIZE = 1; // version byte + static final List EMPTY_READ = new ArrayList<>(0); + + private final int capacity; + private final List offsetMap; // has to be extendable + private final ByteBuffer buffer; + private long minSeqNum; // TODO: to make minSeqNum final we have to pass in the minSeqNum in the constructor and not set it on first write + private int elementCount; + private int head; + private byte version; + private Checksum checkSummer; + + public ByteBufferPageIO(int pageNum, int capacity, String path) throws IOException { + this(capacity, new byte[0]); + } + + public ByteBufferPageIO(int capacity) throws IOException { + this(capacity, new byte[0]); + } + + public ByteBufferPageIO(int capacity, byte[] initialBytes) throws IOException { + this.capacity = capacity; + if (initialBytes.length > capacity) { + throw new IOException("initial bytes greater than capacity"); + } + + this.buffer = ByteBuffer.allocate(capacity); + this.buffer.put(initialBytes); + + this.offsetMap = new ArrayList<>(); + this.checkSummer = new CRC32(); + } + + @Override + public void open(long minSeqNum, int elementCount) throws IOException { + this.minSeqNum = minSeqNum; + this.elementCount = elementCount; + + this.buffer.position(0); + this.version = this.buffer.get(); + this.head = 1; + + if (this.elementCount > 0) { + + // TODO: refactor the read logic below to DRY with the read() method. + + // set head by skipping over all elements + for (int i = 0; i < this.elementCount; i++) { + if (this.head + SEQNUM_SIZE + LENGTH_SIZE > capacity) { + throw new IOException(String.format("cannot read seqNum and length bytes past buffer capacity")); + } + + long seqNum = this.buffer.getLong(); + + if (i == 0 && seqNum != this.minSeqNum) { + throw new IOException(String.format("first seqNum=%d is different than minSeqNum=%d", seqNum, this.minSeqNum)); + } + + this.offsetMap.add(head); + this.head += SEQNUM_SIZE; + + + int length = this.buffer.getInt(); + this.head += LENGTH_SIZE; + + if (this.head + length + CHECKSUM_SIZE > capacity) { + throw new IOException(String.format("cannot read element payload and checksum past buffer capacity")); + } + + // skip over data + this.head += length; + this.head += CHECKSUM_SIZE; + + this.buffer.position(head); + } + } + } + + @Override + public void create() throws IOException { + this.buffer.position(0); + this.buffer.put(VERSION); + this.head = 1; + this.minSeqNum = 0L; + this.elementCount = 0; + } + + @Override + public int getCapacity() { + return this.capacity; + } + + public long getMinSeqNum() { + return this.minSeqNum; + } + + @Override + public boolean hasSpace(int bytes) { + int bytesLeft = this.capacity - this.head; + return persistedByteCount(bytes) <= bytesLeft; + } + + @Override + public void write(byte[] bytes, long seqNum) throws IOException { + // since writes always happen at head, we can just append head to the offsetMap + assert this.offsetMap.size() == this.elementCount : + String.format("offsetMap size=%d != elementCount=%d", this.offsetMap.size(), this.elementCount); + + int initialHead = this.head; + + this.buffer.position(this.head); + this.buffer.putLong(seqNum); + this.buffer.putInt(bytes.length); + this.buffer.put(bytes); + this.buffer.putInt(checksum(bytes)); + this.head += persistedByteCount(bytes.length); + assert this.head == this.buffer.position() : + String.format("head=%d != buffer position=%d", this.head, this.buffer.position()); + + if (this.elementCount <= 0) { + this.minSeqNum = seqNum; + } + this.offsetMap.add(initialHead); + this.elementCount++; + } + + @Override + public SequencedList read(long seqNum, int limit) throws IOException { + assert seqNum >= this.minSeqNum : + String.format("seqNum=%d < minSeqNum=%d", seqNum, this.minSeqNum); + assert seqNum <= maxSeqNum() : + String.format("seqNum=%d is > maxSeqNum=%d", seqNum, maxSeqNum()); + + List elements = new ArrayList<>(); + List seqNums = new ArrayList<>(); + + int offset = this.offsetMap.get((int)(seqNum - this.minSeqNum)); + + this.buffer.position(offset); + + for (int i = 0; i < limit; i++) { + long readSeqNum = this.buffer.getLong(); + + assert readSeqNum == (seqNum + i) : + String.format("unmatched seqNum=%d to readSeqNum=%d", seqNum + i, readSeqNum); + + int readLength = this.buffer.getInt(); + byte[] readBytes = new byte[readLength]; + this.buffer.get(readBytes); + int checksum = this.buffer.getInt(); + int computedChecksum = checksum(readBytes); + if (computedChecksum != checksum) { + throw new IOException(String.format("computed checksum=%d != checksum for file=%d", computedChecksum, checksum)); + } + + elements.add(readBytes); + seqNums.add(readSeqNum); + + if (seqNum + i >= maxSeqNum()) { + break; + } + } + + return new SequencedList<>(elements, seqNums); + } + + @Override + public void deactivate() { + // nothing to do + } + + @Override + public void activate() { + // nothing to do + } + + @Override + public void ensurePersisted() { + // nothing to do + } + + @Override + public void purge() throws IOException { + // do nothing + } + + @Override + public void close() throws IOException { + // TODO: not sure if we need to do something here since in-memory pages are ephemeral + } + + private int checksum(byte[] bytes) { + checkSummer.reset(); + checkSummer.update(bytes, 0, bytes.length); + return (int) checkSummer.getValue(); + } + + // TODO: static method for tests - should refactor + public static int _persistedByteCount(int byteCount) { + return SEQNUM_SIZE + LENGTH_SIZE + byteCount + CHECKSUM_SIZE; + } + + @Override + public int persistedByteCount(int byteCount) { + return ByteBufferPageIO._persistedByteCount(byteCount); + } + + private long maxSeqNum() { + return this.minSeqNum + this.elementCount - 1; + } + + + // below public methods only used by tests + + public int getWritePosition() { + return this.head; + } + + public int getElementCount() { + return this.elementCount; + } + + public byte[] dump() { + return this.buffer.array(); + } +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/ByteBufferStreamInput.java b/logstash-core/src/main/java/org/logstash/common/io/ByteBufferStreamInput.java new file mode 100644 index 000000000..8afeb4eef --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/ByteBufferStreamInput.java @@ -0,0 +1,93 @@ +package org.logstash.common.io; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; + +public class ByteBufferStreamInput extends StreamInput { + + private final ByteBuffer buffer; + + public ByteBufferStreamInput(ByteBuffer buffer) { + this.buffer = buffer; + } + + @Override + public int read() throws IOException { + if (!buffer.hasRemaining()) { + return -1; + } + return buffer.get() & 0xFF; + } + + @Override + public byte readByte() throws IOException { + if (!buffer.hasRemaining()) { + throw new EOFException(); + } + return buffer.get(); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + if (!buffer.hasRemaining()) { + return -1; + } + + len = Math.min(len, buffer.remaining()); + buffer.get(b, off, len); + return len; + } + + @Override + public long skip(long n) throws IOException { + if (n > buffer.remaining()) { + int ret = buffer.position(); + buffer.position(buffer.limit()); + return ret; + } + buffer.position((int) (buffer.position() + n)); + return n; + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + if (buffer.remaining() < len) { + throw new EOFException(); + } + buffer.get(b, offset, len); + } + + @Override + public void reset() throws IOException { + buffer.reset(); + } + + public void movePosition(int position) { + buffer.position(position); + } + + public void rewind() throws IOException { + buffer.rewind(); + } + + @Override + public int available() throws IOException { + return buffer.remaining(); + } + + @Override + public void mark(int readlimit) { + buffer.mark(); + } + + @Override + public boolean markSupported() { + return true; + } + + @Override + public void close() throws IOException { + } +} + diff --git a/logstash-core/src/main/java/org/logstash/common/io/CheckpointIO.java b/logstash-core/src/main/java/org/logstash/common/io/CheckpointIO.java new file mode 100644 index 000000000..cd28cf66e --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/CheckpointIO.java @@ -0,0 +1,22 @@ +package org.logstash.common.io; + +import org.logstash.ackedqueue.Checkpoint; +import java.io.IOException; + +public interface CheckpointIO { + + // @return Checkpoint the written checkpoint object + Checkpoint write(String fileName, int pageNum, int firstUnackedPageNum, long firstUnackedSeqNum, long minSeqNum, int elementCount) throws IOException; + + Checkpoint read(String fileName) throws IOException; + + void purge(String fileName) throws IOException; + + void purge() throws IOException; + + // @return the head page checkpoint file name + String headFileName(); + + // @return the tail page checkpoint file name for given page number + String tailFileName(int pageNum); +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/CheckpointIOFactory.java b/logstash-core/src/main/java/org/logstash/common/io/CheckpointIOFactory.java new file mode 100644 index 000000000..574ca42d3 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/CheckpointIOFactory.java @@ -0,0 +1,6 @@ +package org.logstash.common.io; + +@FunctionalInterface +public interface CheckpointIOFactory { + CheckpointIO build(String dirPath); +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/FileCheckpointIO.java b/logstash-core/src/main/java/org/logstash/common/io/FileCheckpointIO.java new file mode 100644 index 000000000..12e8e1977 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/FileCheckpointIO.java @@ -0,0 +1,108 @@ +package org.logstash.common.io; + +import org.logstash.ackedqueue.Checkpoint; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +public class FileCheckpointIO implements CheckpointIO { +// Checkpoint file structure +// +// byte version; +// int pageNum; +// int firstUnackedPageNum; +// long firstUnackedSeqNum; +// long minSeqNum; +// int elementCount; + + public static final int BUFFER_SIZE = Short.BYTES // version + + Integer.BYTES // pageNum + + Integer.BYTES // firstUnackedPageNum + + Long.BYTES // firstUnackedSeqNum + + Long.BYTES // minSeqNum + + Integer.BYTES // eventCount + + Integer.BYTES; // checksum + + private final String dirPath; + private final String HEAD_CHECKPOINT = "checkpoint.head"; + private final String TAIL_CHECKPOINT = "checkpoint."; + + public FileCheckpointIO(String dirPath) { + this.dirPath = dirPath; + } + + @Override + public Checkpoint read(String fileName) throws IOException { + Path path = Paths.get(dirPath, fileName); + InputStream is = Files.newInputStream(path); + return read(new BufferedChecksumStreamInput(new InputStreamStreamInput(is))); + } + + @Override + public Checkpoint write(String fileName, int pageNum, int firstUnackedPageNum, long firstUnackedSeqNum, long minSeqNum, int elementCount) throws IOException { + Path path = Paths.get(dirPath, fileName); + Checkpoint checkpoint = new Checkpoint(pageNum, firstUnackedPageNum, firstUnackedSeqNum, minSeqNum, elementCount); + final byte[] buffer = new byte[BUFFER_SIZE]; + write(checkpoint, buffer); + Files.write(path, buffer); + return checkpoint; + } + + @Override + public void purge(String fileName) throws IOException { + Path path = Paths.get(dirPath, fileName); + Files.delete(path); + } + + @Override + public void purge() throws IOException { + // TODO: dir traversal and delete all checkpoints? + } + + // @return the head page checkpoint file name + @Override + public String headFileName() { + return HEAD_CHECKPOINT; + } + + // @return the tail page checkpoint file name for given page number + @Override + public String tailFileName(int pageNum) { + return TAIL_CHECKPOINT + pageNum; + } + + private Checkpoint read(BufferedChecksumStreamInput crcsi) throws IOException { + int version = (int) crcsi.readShort(); + // TODO - build reader for this version + int pageNum = crcsi.readInt(); + int firstUnackedPageNum = crcsi.readInt(); + long firstUnackedSeqNum = crcsi.readLong(); + long minSeqNum = crcsi.readLong(); + int elementCount = crcsi.readInt(); + + int calcCrc32 = (int)crcsi.getChecksum(); + int readCrc32 = crcsi.readInt(); + if (readCrc32 != calcCrc32) { + throw new IOException(String.format("Checkpoint checksum mismatch, expected: %d, actual: %d", calcCrc32, readCrc32)); + } + if (version != Checkpoint.VERSION) { + throw new IOException("Unknown file format version: " + version); + } + + return new Checkpoint(pageNum, firstUnackedPageNum, firstUnackedSeqNum, minSeqNum, elementCount); + } + + private void write(Checkpoint checkpoint, byte[] buf) throws IOException { + BufferedChecksumStreamOutput output = new BufferedChecksumStreamOutput(new ByteArrayStreamOutput(buf)); + output.writeShort((short)Checkpoint.VERSION); + output.writeInt(checkpoint.getPageNum()); + output.writeInt(checkpoint.getFirstUnackedPageNum()); + output.writeLong(checkpoint.getFirstUnackedSeqNum()); + output.writeLong(checkpoint.getMinSeqNum()); + output.writeInt(checkpoint.getElementCount()); + output.writeInt((int)output.getChecksum()); + } +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/InputStreamStreamInput.java b/logstash-core/src/main/java/org/logstash/common/io/InputStreamStreamInput.java new file mode 100644 index 000000000..712e42b87 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/InputStreamStreamInput.java @@ -0,0 +1,77 @@ +package org.logstash.common.io; + +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; + +public class InputStreamStreamInput extends StreamInput { + + private final InputStream is; + + public InputStreamStreamInput(InputStream is) { + this.is = is; + } + + @Override + public byte readByte() throws IOException { + int ch = is.read(); + if (ch < 0) + throw new EOFException(); + return (byte) (ch); + } + + @Override + public void readBytes(byte[] b, int offset, int len) throws IOException { + if (len < 0) + throw new IndexOutOfBoundsException(); + final int read = Streams.readFully(is, b, offset, len); + if (read != len) { + throw new EOFException(); + } + } + + @Override + public void reset() throws IOException { + is.reset(); + } + + @Override + public boolean markSupported() { + return is.markSupported(); + } + + @Override + public void mark(int readlimit) { + is.mark(readlimit); + } + + @Override + public void close() throws IOException { + is.close(); + } + + @Override + public int available() throws IOException { + return is.available(); + } + + @Override + public int read() throws IOException { + return is.read(); + } + + @Override + public int read(byte[] b) throws IOException { + return is.read(b); + } + + @Override + public int read(byte[] b, int off, int len) throws IOException { + return is.read(b, off, len); + } + + @Override + public long skip(long n) throws IOException { + return is.skip(n); + } +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/MemoryCheckpointIO.java b/logstash-core/src/main/java/org/logstash/common/io/MemoryCheckpointIO.java new file mode 100644 index 000000000..0dc5b8b63 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/MemoryCheckpointIO.java @@ -0,0 +1,60 @@ +package org.logstash.common.io; + +import org.logstash.ackedqueue.Checkpoint; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class MemoryCheckpointIO implements CheckpointIO { + + private final String HEAD_CHECKPOINT = "checkpoint.head"; + private final String TAIL_CHECKPOINT = "checkpoint."; + + private static final Map sources = new HashMap<>(); + + private final String dirPath; + + public static void clearSources() { + sources.clear(); + } + + public MemoryCheckpointIO(String dirPath) { + this.dirPath = dirPath; + } + + @Override + public Checkpoint read(String fileName) throws IOException { + return this.sources.get(fileName); + } + + @Override + public Checkpoint write(String fileName, int pageNum, int firstUnackedPageNum, long firstUnackedSeqNum, long minSeqNum, int elementCount) throws IOException { + Checkpoint checkpoint = new Checkpoint(pageNum, firstUnackedPageNum, firstUnackedSeqNum, minSeqNum, elementCount); + this.sources.put(fileName, checkpoint); + return checkpoint; + } + + @Override + public void purge(String fileName) { + this.sources.remove(fileName); + } + + @Override + public void purge() { + this.sources.clear(); + } + + // @return the head page checkpoint file name + @Override + public String headFileName() { + return HEAD_CHECKPOINT; + } + + // @return the tail page checkpoint file name for given page number + @Override + public String tailFileName(int pageNum) { + return TAIL_CHECKPOINT + pageNum; + } + +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/MmapPageIO.java b/logstash-core/src/main/java/org/logstash/common/io/MmapPageIO.java new file mode 100644 index 000000000..0b86477f8 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/MmapPageIO.java @@ -0,0 +1,254 @@ +package org.logstash.common.io; + +import org.logstash.ackedqueue.Queueable; +import org.logstash.ackedqueue.SequencedList; + +import java.io.File; +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.zip.CRC32; +import java.util.zip.Checksum; + +// TODO: this essentially a copy of ByteBufferPageIO and should be DRY'ed - temp impl to test file based stress test + +public class MmapPageIO implements PageIO { + public static final byte VERSION = 1; + public static final int CHECKSUM_SIZE = Integer.BYTES; + public static final int LENGTH_SIZE = Integer.BYTES; + public static final int SEQNUM_SIZE = Long.BYTES; + public static final int MIN_RECORD_SIZE = SEQNUM_SIZE + CHECKSUM_SIZE; + public static final int HEADER_SIZE = 1; // version byte + static final List EMPTY_READ = new ArrayList<>(0); + + private final int capacity; + private final String dirPath; + private final int pageNum; + private final List offsetMap; // has to be extendable + + private MappedByteBuffer buffer; + private File file; + private FileChannel channel; + + private long minSeqNum; // TODO: to make minSeqNum final we have to pass in the minSeqNum in the constructor and not set it on first write + private int elementCount; + private int head; + private byte version; + private Checksum checkSummer; + + public MmapPageIO(int pageNum, int capacity, String dirPath) throws IOException { + this.pageNum = pageNum; + this.capacity = capacity; + this.dirPath = dirPath; + this.offsetMap = new ArrayList<>(); + this.checkSummer = new CRC32(); + } + + @Override + public void open(long minSeqNum, int elementCount) throws IOException { + this.minSeqNum = minSeqNum; + this.elementCount = elementCount; + + this.file = buildPath().toFile(); + RandomAccessFile raf = new RandomAccessFile(this.file, "rw"); + this.channel = raf.getChannel(); + this.buffer = this.channel.map(FileChannel.MapMode.READ_WRITE, 0, this.capacity); + raf.close(); + this.buffer.load(); + + this.buffer.position(0); + this.version = this.buffer.get(); + this.head = 1; + + if (this.elementCount > 0) { + + // TODO: refactor the read logic below to DRY with the read() method. + + // set head by skipping over all elements + for (int i = 0; i < this.elementCount; i++) { + if (this.head + SEQNUM_SIZE + LENGTH_SIZE > capacity) { + throw new IOException(String.format("cannot read seqNum and length bytes past buffer capacity")); + } + + long seqNum = this.buffer.getLong(); + + if (i == 0 && seqNum != this.minSeqNum) { + throw new IOException(String.format("first seqNum=%d is different than minSeqNum=%d", seqNum, this.minSeqNum)); + } + + this.offsetMap.add(head); + this.head += SEQNUM_SIZE; + + + int length = this.buffer.getInt(); + this.head += LENGTH_SIZE; + + if (this.head + length + CHECKSUM_SIZE > capacity) { + throw new IOException(String.format("cannot read element payload and checksum past buffer capacity")); + } + + // skip over data + this.head += length; + this.head += CHECKSUM_SIZE; + + this.buffer.position(head); + } + } + } + + @Override + public void create() throws IOException { + this.file = buildPath().toFile(); + RandomAccessFile raf = new RandomAccessFile(this.file, "rw"); + this.channel = raf.getChannel(); + this.buffer = this.channel.map(FileChannel.MapMode.READ_WRITE, 0, this.capacity); + raf.close(); + + this.buffer.position(0); + this.buffer.put(VERSION); + this.head = 1; + this.minSeqNum = 0; + this.elementCount = 0; + } + + @Override + public int getCapacity() { + return this.capacity; + } + + public long getMinSeqNum() { + return this.minSeqNum; + } + + @Override + public boolean hasSpace(int bytes) { + int bytesLeft = this.capacity - this.head; + return persistedByteCount(bytes) <= bytesLeft; + } + + @Override + public void write(byte[] bytes, long seqNum) throws IOException { + // since writes always happen at head, we can just append head to the offsetMap + assert this.offsetMap.size() == this.elementCount : + String.format("offsetMap size=%d != elementCount=%d", this.offsetMap.size(), this.elementCount); + + int initialHead = this.head; + + this.buffer.position(this.head); + this.buffer.putLong(seqNum); + this.buffer.putInt(bytes.length); + this.buffer.put(bytes); + this.buffer.putInt(checksum(bytes)); + this.head += persistedByteCount(bytes.length); + assert this.head == this.buffer.position() : + String.format("head=%d != buffer position=%d", this.head, this.buffer.position()); + + if (this.elementCount <= 0) { + this.minSeqNum = seqNum; + } + this.offsetMap.add(initialHead); + this.elementCount++; + } + + @Override + public SequencedList read(long seqNum, int limit) throws IOException { + assert seqNum >= this.minSeqNum : + String.format("seqNum=%d < minSeqNum=%d", seqNum, this.minSeqNum); + assert seqNum <= maxSeqNum() : + String.format("seqNum=%d is > maxSeqNum=%d", seqNum, maxSeqNum()); + + List elements = new ArrayList<>(); + List seqNums = new ArrayList<>(); + + int offset = this.offsetMap.get((int)(seqNum - this.minSeqNum)); + + this.buffer.position(offset); + + for (int i = 0; i < limit; i++) { + long readSeqNum = this.buffer.getLong(); + + assert readSeqNum == (seqNum + i) : + String.format("unmatched seqNum=%d to readSeqNum=%d", seqNum + i, readSeqNum); + + int readLength = this.buffer.getInt(); + byte[] readBytes = new byte[readLength]; + this.buffer.get(readBytes); + int checksum = this.buffer.getInt(); + int computedChecksum = checksum(readBytes); + if (computedChecksum != checksum) { + throw new IOException(String.format("computed checksum=%d != checksum for file=%d", computedChecksum, checksum)); + } + + elements.add(readBytes); + seqNums.add(readSeqNum); + + if (seqNum + i >= maxSeqNum()) { + break; + } + } + + return new SequencedList(elements, seqNums); + } + + @Override + public void deactivate() throws IOException { + close(); // close can be called multiple times + } + + @Override + public void activate() throws IOException { + if (this.channel == null) { + RandomAccessFile raf = new RandomAccessFile(this.file, "rw"); + this.channel = raf.getChannel(); + this.buffer = this.channel.map(FileChannel.MapMode.READ_WRITE, 0, this.capacity); + raf.close(); + this.buffer.load(); + } + // TODO: do we need to check is the channel is still open? not sure how it could be closed + } + + @Override + public void ensurePersisted() { + this.buffer.force(); + } + + @Override + public void purge() throws IOException { + close(); + Files.delete(buildPath()); + } + + @Override + public void close() throws IOException { + if (this.channel != null && this.channel.isOpen()) { + this.channel.close(); + } + this.channel = null; + this.buffer = null; + } + + private int checksum(byte[] bytes) { + checkSummer.reset(); + checkSummer.update(bytes, 0, bytes.length); + return (int) checkSummer.getValue(); + } + + @Override + public int persistedByteCount(int byteCount) { + return SEQNUM_SIZE + LENGTH_SIZE + byteCount + CHECKSUM_SIZE; + } + + private long maxSeqNum() { + return this.minSeqNum + this.elementCount - 1; + } + + private Path buildPath() { + return Paths.get(this.dirPath, "page." + this.pageNum); + } +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/PageIO.java b/logstash-core/src/main/java/org/logstash/common/io/PageIO.java new file mode 100644 index 000000000..0fa3ede20 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/PageIO.java @@ -0,0 +1,47 @@ +package org.logstash.common.io; + +import org.logstash.ackedqueue.Queueable; +import org.logstash.ackedqueue.SequencedList; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +public interface PageIO extends Closeable { + + // the concrete class should be constructed with the pageNum, capacity and dirPath attributes + + // open an existing data container and reconstruct internal state if required + void open(long minSeqNum, int elementCount) throws IOException; + + // create a new empty data file + void create() throws IOException; + + // verify if the data container has space for the given number of bytes + boolean hasSpace(int bytes); + + // write the given bytes to the data container + void write(byte[] bytes, long seqNum) throws IOException; + + // read up to limit number of items starting at give seqNum + SequencedList read(long seqNum, int limit) throws IOException; + + // @return the data container total capacity in bytes + int getCapacity(); + + // @return the actual persisted byte count (with overhead) for the given data bytes + int persistedByteCount(int bytes); + + // signal that this data page is not active and resources can be released + void deactivate() throws IOException; + + // signal that this data page is active will be read or written to + // should do nothing if page is aready active + void activate() throws IOException; + + // issue the proper data container "fsync" sematic + void ensurePersisted(); + + // delete/unlink/remove data file + void purge() throws IOException; +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/PageIOFactory.java b/logstash-core/src/main/java/org/logstash/common/io/PageIOFactory.java new file mode 100644 index 000000000..ca9d7fa20 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/PageIOFactory.java @@ -0,0 +1,8 @@ +package org.logstash.common.io; + +import java.io.IOException; + +@FunctionalInterface +public interface PageIOFactory { + PageIO build(int pageNum, int capacity, String dirPath) throws IOException; +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/StreamInput.java b/logstash-core/src/main/java/org/logstash/common/io/StreamInput.java new file mode 100644 index 000000000..c387657a3 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/StreamInput.java @@ -0,0 +1,86 @@ +package org.logstash.common.io; + +import java.io.IOException; +import java.io.InputStream; + +public abstract class StreamInput extends InputStream { + /** + * Reads and returns a single byte. + */ + public abstract byte readByte() throws IOException; + + /** + * Reads a specified number of bytes into an array at the specified offset. + * + * @param b the array to read bytes into + * @param offset the offset in the array to start storing bytes + * @param len the number of bytes to read + */ + public abstract void readBytes(byte[] b, int offset, int len) throws IOException; + + /** + * Reads four bytes and returns an int. + */ + public int readInt() throws IOException { + return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) + | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); + } + + /** + * Reads an int stored in variable-length format. Reads between one and + * five bytes. Smaller values take fewer bytes. Negative numbers + * will always use all 5 bytes and are therefore better serialized + * using {@link #readInt} + */ + public int readVInt() throws IOException { + byte b = readByte(); + int i = b & 0x7F; + if ((b & 0x80) == 0) { + return i; + } + b = readByte(); + i |= (b & 0x7F) << 7; + if ((b & 0x80) == 0) { + return i; + } + b = readByte(); + i |= (b & 0x7F) << 14; + if ((b & 0x80) == 0) { + return i; + } + b = readByte(); + i |= (b & 0x7F) << 21; + if ((b & 0x80) == 0) { + return i; + } + b = readByte(); + assert (b & 0x80) == 0; + return i | ((b & 0x7F) << 28); + } + + /** + * Reads two bytes and returns a short. + */ + public short readShort() throws IOException { + int i = ((readByte() & 0xFF) << 8); + int j = (readByte() & 0xFF); + return (short) (i | j); + } + + /** + * Reads eight bytes and returns a long. + */ + public long readLong() throws IOException { + return (((long) readInt()) << 32) | (readInt() & 0xFFFFFFFFL); + } + + public byte[] readByteArray() throws IOException { + int length = readInt(); + byte[] values = new byte[length]; + for (int i = 0; i < length; i++) { + values[i] = readByte(); + } + return values; + } + +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/StreamOutput.java b/logstash-core/src/main/java/org/logstash/common/io/StreamOutput.java new file mode 100644 index 000000000..76c4271e4 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/StreamOutput.java @@ -0,0 +1,74 @@ +package org.logstash.common.io; + +import java.io.IOException; +import java.io.OutputStream; + +public abstract class StreamOutput extends OutputStream { + @Override + public void write(int b) throws IOException { + writeByte((byte) b); + } + + public abstract void writeByte(byte b) throws IOException; + + public abstract void writeBytes(byte[] b, int offset, int length) throws IOException; + + public abstract void reset() throws IOException; + + /** + * Writes an int in a variable-length format. Writes between one and + * five bytes. Smaller values take fewer bytes. Negative numbers + * will always use all 5 bytes and are therefore better serialized + * using {@link #writeInt} + */ + public void writeVInt(int i) throws IOException { + while ((i & ~0x7F) != 0) { + writeByte((byte) ((i & 0x7f) | 0x80)); + i >>>= 7; + } + writeByte((byte) i); + } + + /** + * Writes a short as two bytes. + */ + public void writeShort(short i) throws IOException { + writeByte((byte)(i >> 8)); + writeByte((byte) i); + } + + /** + * Writes an int as four bytes. + */ + public void writeInt(int i) throws IOException { + writeByte((byte) (i >> 24)); + writeByte((byte) (i >> 16)); + writeByte((byte) (i >> 8)); + writeByte((byte) i); + } + + public void writeIntArray(int[] values) throws IOException { + writeVInt(values.length); + for (int value : values) { + writeInt(value); + } + } + + /** + * Writes a long as eight bytes. + */ + public void writeLong(long i) throws IOException { + writeInt((int) (i >> 32)); + writeInt((int) i); + } + + /** + * Writes an array of bytes. + * + * @param b the bytes to write + */ + public void writeByteArray(byte[] b) throws IOException { + writeInt(b.length); + writeBytes(b, 0, b.length); + } +} diff --git a/logstash-core/src/main/java/org/logstash/common/io/Streams.java b/logstash-core/src/main/java/org/logstash/common/io/Streams.java new file mode 100644 index 000000000..a156640ab --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/Streams.java @@ -0,0 +1,60 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.logstash.common.io; + +import java.io.IOException; +import java.io.InputStream; +import java.io.Reader; + +public abstract class Streams { + + public static int readFully(Reader reader, char[] dest) throws IOException { + return readFully(reader, dest, 0, dest.length); + } + + public static int readFully(Reader reader, char[] dest, int offset, int len) throws IOException { + int read = 0; + while (read < len) { + final int r = reader.read(dest, offset + read, len - read); + if (r == -1) { + break; + } + read += r; + } + return read; + } + + public static int readFully(InputStream reader, byte[] dest) throws IOException { + return readFully(reader, dest, 0, dest.length); + } + + public static int readFully(InputStream reader, byte[] dest, int offset, int len) throws IOException { + int read = 0; + while (read < len) { + final int r = reader.read(dest, offset + read, len - read); + if (r == -1) { + break; + } + read += r; + } + return read; + } +} + diff --git a/logstash-core/src/main/java/org/logstash/common/io/wip/MemoryPageIOStream.java b/logstash-core/src/main/java/org/logstash/common/io/wip/MemoryPageIOStream.java new file mode 100644 index 000000000..5c8ec1537 --- /dev/null +++ b/logstash-core/src/main/java/org/logstash/common/io/wip/MemoryPageIOStream.java @@ -0,0 +1,279 @@ +package org.logstash.common.io.wip; + +import org.logstash.ackedqueue.Checkpoint; +import org.logstash.ackedqueue.Queueable; +import org.logstash.ackedqueue.SequencedList; +import org.logstash.common.io.BufferedChecksumStreamInput; +import org.logstash.common.io.BufferedChecksumStreamOutput; +import org.logstash.common.io.ByteArrayStreamOutput; +import org.logstash.common.io.ByteBufferStreamInput; +import org.logstash.common.io.PageIO; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; + +public class MemoryPageIOStream implements PageIO { + static final int CHECKSUM_SIZE = Integer.BYTES; + static final int LENGTH_SIZE = Integer.BYTES; + static final int SEQNUM_SIZE = Long.BYTES; + static final int MIN_RECORD_SIZE = SEQNUM_SIZE + LENGTH_SIZE + CHECKSUM_SIZE; + static final int VERSION_SIZE = Integer.BYTES; + + private final byte[] buffer; + private final int capacity; + private int writePosition; + private int readPosition; + private int elementCount; + private long minSeqNum; + private ByteBufferStreamInput streamedInput; + private ByteArrayStreamOutput streamedOutput; + private BufferedChecksumStreamOutput crcWrappedOutput; + private final List offsetMap; + private String dirPath = ""; + private String headerDetails = ""; + + public int persistedByteCount(byte[] data) { + return persistedByteCount(data.length); + } + + @Override + public int persistedByteCount(int length) { + return MIN_RECORD_SIZE + length; + } + + public MemoryPageIOStream(int pageNum, int capacity, String dirPath) throws IOException { + this(capacity, new byte[capacity]); + this.dirPath = dirPath; + } + + public MemoryPageIOStream(int capacity, String dirPath) throws IOException { + this(capacity, new byte[capacity]); + this.dirPath = dirPath; + } + + public MemoryPageIOStream(int capacity) throws IOException { + this(capacity, new byte[capacity]); + } + + public MemoryPageIOStream(int capacity, byte[] initialBytes) throws IOException { + this.capacity = capacity; + if (initialBytes.length > capacity) { + throw new IOException("initial bytes greater than capacity"); + } + buffer = initialBytes; + offsetMap = new ArrayList<>(); + streamedInput = new ByteBufferStreamInput(ByteBuffer.wrap(buffer)); + streamedOutput = new ByteArrayStreamOutput(buffer); + crcWrappedOutput = new BufferedChecksumStreamOutput(streamedOutput); + } + + @Override + public void open(long minSeqNum, int elementCount) throws IOException { + this.minSeqNum = minSeqNum; + this.elementCount = elementCount; + writePosition = verifyHeader(); + readPosition = writePosition; + if (elementCount > 0) { + long seqNumRead; + BufferedChecksumStreamInput in = new BufferedChecksumStreamInput(streamedInput); + for (int i = 0; i < this.elementCount; i++) { + if (writePosition + SEQNUM_SIZE + LENGTH_SIZE > capacity) { + throw new IOException(String.format("cannot read seqNum and length bytes past buffer capacity")); + } + + seqNumRead = in.readLong(); + + //verify that the buffer starts with the min sequence number + if (i == 0 && seqNumRead != this.minSeqNum) { + String msg = String.format("Page minSeqNum mismatch, expected: %d, actual: %d", this.minSeqNum, seqNumRead); + throw new IOException(msg); + } + + in.resetDigest(); + byte[] bytes = in.readByteArray(); + int actualChecksum = (int) in.getChecksum(); + int expectedChecksum = in.readInt(); + + if (actualChecksum != expectedChecksum) { + // explode with tragic error + } + + offsetMap.add(writePosition); + writePosition += persistedByteCount(bytes); + } + setReadPoint(this.minSeqNum); + } + } + + @Override + public void create() throws IOException { + writePosition = addHeader(); + readPosition = writePosition; + this.minSeqNum = 1L; + this.elementCount = 0; + } + + @Override + public boolean hasSpace(int byteSize) { + return this.capacity >= writePosition + persistedByteCount(byteSize); + } + + @Override + public void write(byte[] bytes, long seqNum) throws IOException { + int pos = this.writePosition; + int writeLength = persistedByteCount(bytes); + writeToBuffer(seqNum, bytes, writeLength); + writePosition += writeLength; + assert writePosition == streamedOutput.getPosition() : + String.format("writePosition=%d != streamedOutput position=%d", writePosition, streamedOutput.getPosition()); + if (elementCount <= 0) { + this.minSeqNum = seqNum; + } + this.offsetMap.add(pos); + elementCount++; + } + + @Override + public SequencedList read(long seqNum, int limit) throws IOException { + if (elementCount == 0) { + return new SequencedList<>(new ArrayList<>(), new ArrayList<>()); + } + setReadPoint(seqNum); + return read(limit); + } + + @Override + public int getCapacity() { + return capacity; + } + + @Override + public void deactivate() { + // do nothing + } + + @Override + public void activate() { + // do nothing + } + + @Override + public void ensurePersisted() { + // do nothing + } + + @Override + public void purge() throws IOException { + // do nothing + } + + @Override + public void close() throws IOException { + // TBD + } + + //@Override + public void setPageHeaderDetails(String details) { + headerDetails = details; + } + + public int getWritePosition() { + return writePosition; + } + + public int getElementCount() { + return elementCount; + } + + public long getMinSeqNum() { + return minSeqNum; + } + + // used in tests + public byte[] getBuffer() { + return buffer; + } + + // used in tests + public String readHeaderDetails() throws IOException { + int tempPosition = readPosition; + streamedInput.movePosition(0); + int ver = streamedInput.readInt(); + String details = new String(streamedInput.readByteArray()); + streamedInput.movePosition(tempPosition); + return details; + } + + private void setReadPoint(long seqNum) throws IOException { + int readPosition = offsetMap.get(calcRelativeSeqNum(seqNum)); + streamedInput.movePosition(readPosition); + } + + private int calcRelativeSeqNum(long seqNum) { + return (int) (seqNum - minSeqNum); + } + + private int addHeader() throws IOException { + streamedOutput.writeInt(Checkpoint.VERSION); + byte[] details = headerDetails.getBytes(); + streamedOutput.writeByteArray(details); + return VERSION_SIZE + LENGTH_SIZE + details.length; + } + + private int verifyHeader() throws IOException { + int ver = streamedInput.readInt(); + if (ver != Checkpoint.VERSION) { + String msg = String.format("Page version mismatch, expecting: %d, this version: %d", Checkpoint.VERSION, ver); + throw new IOException(msg); + } + int len = streamedInput.readInt(); + streamedInput.skip(len); + return VERSION_SIZE + LENGTH_SIZE + len; + } + + private void writeToBuffer(long seqNum, byte[] data, int len) throws IOException { + streamedOutput.setWriteWindow(writePosition, len); + crcWrappedOutput.writeLong(seqNum); + crcWrappedOutput.resetDigest(); + crcWrappedOutput.writeByteArray(data); + long checksum = crcWrappedOutput.getChecksum(); + crcWrappedOutput.writeInt((int) checksum); + crcWrappedOutput.flush(); + crcWrappedOutput.close(); + } + + private SequencedList read(int limit) throws IOException { + List elements = new ArrayList<>(); + List seqNums = new ArrayList<>(); + + int upto = available(limit); + for (int i = 0; i < upto; i++) { + long seqNum = readSeqNum(); + byte[] data = readData(); + skipChecksum(); + elements.add(data); + seqNums.add(seqNum); + } + return new SequencedList<>(elements, seqNums); + } + + private long readSeqNum() throws IOException { + return streamedInput.readLong(); + } + + private byte[] readData() throws IOException { + return streamedInput.readByteArray(); + } + + private void skipChecksum() throws IOException { + streamedInput.skip(CHECKSUM_SIZE); + } + + private int available(int sought) { + if (elementCount < 1) return 0; + if (elementCount < sought) return elementCount; + return sought; + } +} diff --git a/logstash-core/src/test/java/org/logstash/ackedqueue/CheckpointTest.java b/logstash-core/src/test/java/org/logstash/ackedqueue/CheckpointTest.java new file mode 100644 index 000000000..83d1ac5aa --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/ackedqueue/CheckpointTest.java @@ -0,0 +1,21 @@ +package org.logstash.ackedqueue; + +import org.junit.Test; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class CheckpointTest { + + @Test + public void newInstance() { + Checkpoint checkpoint = new Checkpoint(1, 2, 3, 4, 5); + + assertThat(checkpoint.getPageNum(), is(equalTo(1))); + assertThat(checkpoint.getFirstUnackedPageNum(), is(equalTo(2))); + assertThat(checkpoint.getFirstUnackedSeqNum(), is(equalTo(3L))); + assertThat(checkpoint.getMinSeqNum(), is(equalTo(4L))); + assertThat(checkpoint.getElementCount(), is(equalTo(5))); + } +} \ No newline at end of file diff --git a/logstash-core/src/test/java/org/logstash/ackedqueue/HeadPageTest.java b/logstash-core/src/test/java/org/logstash/ackedqueue/HeadPageTest.java new file mode 100644 index 000000000..0477b1b20 --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/ackedqueue/HeadPageTest.java @@ -0,0 +1,116 @@ +package org.logstash.ackedqueue; + +import org.junit.Test; +import org.logstash.common.io.ByteBufferPageIO; +import org.logstash.common.io.FileCheckpointIOTest; +import org.logstash.common.io.PageIO; + +import java.io.IOException; +import java.net.URL; +import java.nio.file.NoSuchFileException; +import java.nio.file.Paths; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class HeadPageTest { + + @Test + public void newHeadPage() throws IOException { + Settings s = TestSettings.getSettings(100); + Queue q = new Queue(s); + PageIO pageIO = s.getPageIOFactory().build(0, 100, "dummy"); + HeadPage p = new HeadPage(0, q, pageIO); + + assertThat(p.getPageNum(), is(equalTo(0))); + assertThat(p.isFullyRead(), is(true)); + assertThat(p.isFullyAcked(), is(false)); + assertThat(p.hasSpace(10), is(true)); + assertThat(p.hasSpace(100), is(false)); + } + + @Test + public void pageWrite() throws IOException { + Queueable element = new StringElement("foobarbaz"); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(element.serialize().length); + + Settings s = TestSettings.getSettings(singleElementCapacity); + Queue q = new Queue(s); + PageIO pageIO = s.getPageIOFactory().build(0, singleElementCapacity, "dummy"); + HeadPage p = new HeadPage(0, q, pageIO); + + assertThat(p.hasSpace(element.serialize().length), is(true)); + p.write(element.serialize(), 0); + + assertThat(p.hasSpace(element.serialize().length), is(false)); + assertThat(p.isFullyRead(), is(false)); + assertThat(p.isFullyAcked(), is(false)); + } + + @Test + public void pageWriteAndReadSingle() throws IOException { + long seqNum = 1L; + Queueable element = new StringElement("foobarbaz"); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(element.serialize().length); + + Settings s = TestSettings.getSettings(singleElementCapacity); + Queue q = new Queue(s); + PageIO pageIO = s.getPageIOFactory().build(0, singleElementCapacity, "dummy"); + HeadPage p = new HeadPage(0, q, pageIO); + + assertThat(p.hasSpace(element.serialize().length), is(true)); + p.write(element.serialize(), seqNum); + + Batch b = p.readBatch(1); + + assertThat(b.getElements().size(), is(equalTo(1))); + assertThat(b.getElements().get(0).toString(), is(equalTo(element.toString()))); + + assertThat(p.hasSpace(element.serialize().length), is(false)); + assertThat(p.isFullyRead(), is(true)); + assertThat(p.isFullyAcked(), is(false)); + } + + @Test + public void pageWriteAndReadMulti() throws IOException { + long seqNum = 1L; + Queueable element = new StringElement("foobarbaz"); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(element.serialize().length); + + Settings s = TestSettings.getSettings(singleElementCapacity); + Queue q = new Queue(s); + PageIO pageIO = s.getPageIOFactory().build(0, singleElementCapacity, "dummy"); + HeadPage p = new HeadPage(0, q, pageIO); + + assertThat(p.hasSpace(element.serialize().length), is(true)); + p.write(element.serialize(), seqNum); + + Batch b = p.readBatch(10); + + assertThat(b.getElements().size(), is(equalTo(1))); + assertThat(b.getElements().get(0).toString(), is(equalTo(element.toString()))); + + assertThat(p.hasSpace(element.serialize().length), is(false)); + assertThat(p.isFullyRead(), is(true)); + assertThat(p.isFullyAcked(), is(false)); + } + + @Test + public void pageViaQueueOpenForHeadCheckpointWithoutSupportingPageFiles() throws Exception { + URL url = FileCheckpointIOTest.class.getResource("checkpoint.head"); + String dirPath = Paths.get(url.toURI()).getParent().toString(); + Queueable element = new StringElement("foobarbaz"); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(element.serialize().length); + Settings s = TestSettings.getSettingsCheckpointFilePageMemory(singleElementCapacity, dirPath); + TestQueue q = new TestQueue(s); + try { + q.open(); + } catch (NoSuchFileException e) { + assertThat(e.getMessage(), containsString("checkpoint.2")); + } + HeadPage p = q.getHeadPage(); + assertThat(p, is(equalTo(null))); + } +} diff --git a/logstash-core/src/test/java/org/logstash/ackedqueue/QueueTest.java b/logstash-core/src/test/java/org/logstash/ackedqueue/QueueTest.java new file mode 100644 index 000000000..31024220d --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/ackedqueue/QueueTest.java @@ -0,0 +1,392 @@ +package org.logstash.ackedqueue; + +import org.junit.Test; +import org.logstash.common.io.ByteBufferPageIO; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Random; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.nullValue; +import static org.hamcrest.CoreMatchers.notNullValue; +import static org.hamcrest.MatcherAssert.assertThat; + +public class QueueTest { + + @Test + public void newQueue() throws IOException { + Queue q = new TestQueue(TestSettings.getSettings(10)); + q.open(); + + assertThat(q.nonBlockReadBatch(1), is(equalTo(null))); + } + + @Test + public void singleWriteRead() throws IOException { + Queue q = new TestQueue(TestSettings.getSettings(100)); + q.open(); + + Queueable element = new StringElement("foobarbaz"); + q.write(element); + + Batch b = q.nonBlockReadBatch(1); + + assertThat(b.getElements().size(), is(equalTo(1))); + assertThat(b.getElements().get(0).toString(), is(equalTo(element.toString()))); + assertThat(q.nonBlockReadBatch(1), is(equalTo(null))); + } + + @Test + public void singleWriteMultiRead() throws IOException { + Queue q = new TestQueue(TestSettings.getSettings(100)); + q.open(); + + Queueable element = new StringElement("foobarbaz"); + q.write(element); + + Batch b = q.nonBlockReadBatch(2); + + assertThat(b.getElements().size(), is(equalTo(1))); + assertThat(b.getElements().get(0).toString(), is(equalTo(element.toString()))); + assertThat(q.nonBlockReadBatch(2), is(equalTo(null))); + } + + @Test + public void multiWriteSamePage() throws IOException { + Queue q = new TestQueue(TestSettings.getSettings(100)); + q.open(); + + List elements = Arrays.asList(new StringElement("foobarbaz1"), new StringElement("foobarbaz2"), new StringElement("foobarbaz3")); + + for (Queueable e : elements) { + q.write(e); + } + + Batch b = q.nonBlockReadBatch(2); + + assertThat(b.getElements().size(), is(equalTo(2))); + assertThat(b.getElements().get(0).toString(), is(equalTo(elements.get(0).toString()))); + assertThat(b.getElements().get(1).toString(), is(equalTo(elements.get(1).toString()))); + + b = q.nonBlockReadBatch(2); + + assertThat(b.getElements().size(), is(equalTo(1))); + assertThat(b.getElements().get(0).toString(), is(equalTo(elements.get(2).toString()))); + } + + @Test + public void writeMultiPage() throws IOException { + List elements = Arrays.asList(new StringElement("foobarbaz1"), new StringElement("foobarbaz2"), new StringElement("foobarbaz3"), new StringElement("foobarbaz4")); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(elements.get(0).serialize().length); + + TestQueue q = new TestQueue(TestSettings.getSettings(2 * singleElementCapacity)); + q.open(); + + for (Queueable e : elements) { + q.write(e); + } + + // total of 2 pages: 1 head and 1 tail + assertThat(q.getTailPages().size(), is(equalTo(1))); + + assertThat(q.getTailPages().get(0).isFullyRead(), is(equalTo(false))); + assertThat(q.getTailPages().get(0).isFullyAcked(), is(equalTo(false))); + assertThat(q.getHeadPage().isFullyRead(), is(equalTo(false))); + assertThat(q.getHeadPage().isFullyAcked(), is(equalTo(false))); + + Batch b = q.nonBlockReadBatch(10); + assertThat(b.getElements().size(), is(equalTo(2))); + + assertThat(q.getTailPages().size(), is(equalTo(1))); + + assertThat(q.getTailPages().get(0).isFullyRead(), is(equalTo(true))); + assertThat(q.getTailPages().get(0).isFullyAcked(), is(equalTo(false))); + assertThat(q.getHeadPage().isFullyRead(), is(equalTo(false))); + assertThat(q.getHeadPage().isFullyAcked(), is(equalTo(false))); + + b = q.nonBlockReadBatch(10); + assertThat(b.getElements().size(), is(equalTo(2))); + + assertThat(q.getTailPages().get(0).isFullyRead(), is(equalTo(true))); + assertThat(q.getTailPages().get(0).isFullyAcked(), is(equalTo(false))); + assertThat(q.getHeadPage().isFullyRead(), is(equalTo(true))); + assertThat(q.getHeadPage().isFullyAcked(), is(equalTo(false))); + + b = q.nonBlockReadBatch(10); + assertThat(b, is(equalTo(null))); + } + + + @Test + public void writeMultiPageWithInOrderAcking() throws IOException { + List elements = Arrays.asList(new StringElement("foobarbaz1"), new StringElement("foobarbaz2"), new StringElement("foobarbaz3"), new StringElement("foobarbaz4")); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(elements.get(0).serialize().length); + + TestQueue q = new TestQueue(TestSettings.getSettings(2 * singleElementCapacity)); + q.open(); + + for (Queueable e : elements) { + q.write(e); + } + + Batch b = q.nonBlockReadBatch(10); + + assertThat(b.getElements().size(), is(equalTo(2))); + assertThat(q.getTailPages().size(), is(equalTo(1))); + + // lets keep a ref to that tail page before acking + TailPage tailPage = q.getTailPages().get(0); + + assertThat(tailPage.isFullyRead(), is(equalTo(true))); + + // ack first batch which includes all elements from tailPages + b.close(); + + assertThat(q.getTailPages().size(), is(equalTo(0))); + assertThat(tailPage.isFullyRead(), is(equalTo(true))); + assertThat(tailPage.isFullyAcked(), is(equalTo(true))); + + b = q.nonBlockReadBatch(10); + + assertThat(b.getElements().size(), is(equalTo(2))); + assertThat(q.getHeadPage().isFullyRead(), is(equalTo(true))); + assertThat(q.getHeadPage().isFullyAcked(), is(equalTo(false))); + + b.close(); + + assertThat(q.getHeadPage().isFullyAcked(), is(equalTo(true))); + } + + @Test + public void writeMultiPageWithInOrderAckingCheckpoints() throws IOException { + List elements1 = Arrays.asList(new StringElement("foobarbaz1"), new StringElement("foobarbaz2")); + List elements2 = Arrays.asList(new StringElement("foobarbaz3"), new StringElement("foobarbaz4")); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(elements1.get(0).serialize().length); + + Settings settings = TestSettings.getSettings(2 * singleElementCapacity); + TestQueue q = new TestQueue(settings); + q.open(); + + assertThat(q.getHeadPage().getPageNum(), is(equalTo(0))); + Checkpoint c = q.getCheckpointIO().read("checkpoint.head"); + assertThat(c.getPageNum(), is(equalTo(0))); + assertThat(c.getElementCount(), is(equalTo(0))); + assertThat(c.getMinSeqNum(), is(equalTo(0L))); + assertThat(c.getFirstUnackedSeqNum(), is(equalTo(0L))); + assertThat(c.getFirstUnackedPageNum(), is(equalTo(0))); + + for (Queueable e : elements1) { + q.write(e); + } + + c = q.getCheckpointIO().read("checkpoint.head"); + assertThat(c.getPageNum(), is(equalTo(0))); + assertThat(c.getElementCount(), is(equalTo(0))); + assertThat(c.getMinSeqNum(), is(equalTo(0L))); + assertThat(c.getFirstUnackedSeqNum(), is(equalTo(0L))); + assertThat(c.getFirstUnackedPageNum(), is(equalTo(0))); + +// assertThat(elements1.get(1).getSeqNum(), is(equalTo(2L))); + q.ensurePersistedUpto(2); + + c = q.getCheckpointIO().read("checkpoint.head"); + assertThat(c.getPageNum(), is(equalTo(0))); + assertThat(c.getElementCount(), is(equalTo(2))); + assertThat(c.getMinSeqNum(), is(equalTo(1L))); + assertThat(c.getFirstUnackedSeqNum(), is(equalTo(1L))); + assertThat(c.getFirstUnackedPageNum(), is(equalTo(0))); + + for (Queueable e : elements2) { + q.write(e); + } + + c = q.getCheckpointIO().read("checkpoint.head"); + assertThat(c.getPageNum(), is(equalTo(1))); + assertThat(c.getElementCount(), is(equalTo(0))); + assertThat(c.getMinSeqNum(), is(equalTo(0L))); + assertThat(c.getFirstUnackedSeqNum(), is(equalTo(0L))); + assertThat(c.getFirstUnackedPageNum(), is(equalTo(0))); + + c = q.getCheckpointIO().read("checkpoint.0"); + assertThat(c.getPageNum(), is(equalTo(0))); + assertThat(c.getElementCount(), is(equalTo(2))); + assertThat(c.getMinSeqNum(), is(equalTo(1L))); + assertThat(c.getFirstUnackedSeqNum(), is(equalTo(1L))); + + Batch b = q.nonBlockReadBatch(10); + b.close(); + + assertThat(q.getCheckpointIO().read("checkpoint.0"), is(nullValue())); + + c = q.getCheckpointIO().read("checkpoint.head"); + assertThat(c.getPageNum(), is(equalTo(1))); + assertThat(c.getElementCount(), is(equalTo(2))); + assertThat(c.getMinSeqNum(), is(equalTo(3L))); + assertThat(c.getFirstUnackedSeqNum(), is(equalTo(3L))); + assertThat(c.getFirstUnackedPageNum(), is(equalTo(1))); + + b = q.nonBlockReadBatch(10); + b.close(); + + c = q.getCheckpointIO().read("checkpoint.head"); + assertThat(c.getPageNum(), is(equalTo(1))); + assertThat(c.getElementCount(), is(equalTo(2))); + assertThat(c.getMinSeqNum(), is(equalTo(3L))); + assertThat(c.getFirstUnackedSeqNum(), is(equalTo(5L))); + assertThat(c.getFirstUnackedPageNum(), is(equalTo(1))); + } + + @Test + public void randomAcking() throws IOException { + Random random = new Random(); + + // 10 tests of random queue sizes + for (int loop = 0; loop < 10; loop++) { + int page_count = random.nextInt(10000) + 1; + int digits = new Double(Math.ceil(Math.log10(page_count))).intValue(); + + // create a queue with a single element per page + List elements = new ArrayList<>(); + for (int i = 0; i < page_count; i++) { + elements.add(new StringElement(String.format("%0" + digits + "d", i))); + } + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(elements.get(0).serialize().length); + + TestQueue q = new TestQueue(TestSettings.getSettings(singleElementCapacity)); + q.open(); + + for (Queueable e : elements) { + q.write(e); + } + + assertThat(q.getTailPages().size(), is(equalTo(page_count - 1))); + + // first read all elements + List batches = new ArrayList<>(); + for (Batch b = q.nonBlockReadBatch(1); b != null; b = q.nonBlockReadBatch(1)) { + batches.add(b); + } + assertThat(batches.size(), is(equalTo(page_count))); + + // then ack randomly + Collections.shuffle(batches); + for (Batch b : batches) { + b.close(); + } + + assertThat(q.getTailPages().size(), is(equalTo(0))); + } + } + + @Test(timeout = 5000) + public void reachMaxUnread() throws IOException, InterruptedException, ExecutionException { + Queueable element = new StringElement("foobarbaz"); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(element.serialize().length); + + Settings settings = TestSettings.getSettings(singleElementCapacity); + settings.setMaxUnread(2); // 2 so we know the first write should not block and the second should + TestQueue q = new TestQueue(settings); + q.open(); + + + long seqNum = q.write(element); + assertThat(seqNum, is(equalTo(1L))); + assertThat(q.isFull(), is(false)); + + int ELEMENT_COUNT = 1000; + for (int i = 0; i < ELEMENT_COUNT; i++) { + + // we expect the next write call to block so let's wrap it in a Future + Callable write = () -> { + return q.write(element); + }; + + ExecutorService executor = Executors.newFixedThreadPool(1); + Future future = executor.submit(write); + + while (!q.isFull()) { + // spin wait until data is written and write blocks + Thread.sleep(1); + } + assertThat(q.unreadCount, is(equalTo(2L))); + assertThat(future.isDone(), is(false)); + + // read one element, which will unblock the last write + Batch b = q.nonBlockReadBatch(1); + assertThat(b.getElements().size(), is(equalTo(1))); + + // future result is the blocked write seqNum for the second element + assertThat(future.get(), is(equalTo(2L + i))); + assertThat(q.isFull(), is(false)); + + executor.shutdown(); + } + + // since we did not ack and pages hold a single item + assertThat(q.getTailPages().size(), is(equalTo(ELEMENT_COUNT))); + } + + @Test + public void reachMaxUnreadWithAcking() throws IOException, InterruptedException, ExecutionException { + Queueable element = new StringElement("foobarbaz"); + + // TODO: add randomized testing on the page size (but must be > single element size) + Settings settings = TestSettings.getSettings(256); // 256 is arbitrary, large enough to hold a few elements + + settings.setMaxUnread(2); // 2 so we know the first write should not block and the second should + TestQueue q = new TestQueue(settings); + q.open(); + + // perform first non-blocking write + long seqNum = q.write(element); + + assertThat(seqNum, is(equalTo(1L))); + assertThat(q.isFull(), is(false)); + + int ELEMENT_COUNT = 1000; + for (int i = 0; i < ELEMENT_COUNT; i++) { + + // we expect this next write call to block so let's wrap it in a Future + Callable write = () -> { + return q.write(element); + }; + + ExecutorService executor = Executors.newFixedThreadPool(1); + Future future = executor.submit(write); + + // spin wait until data is written and write blocks + while (!q.isFull()) { Thread.sleep(1); } + + // read one element, which will unblock the last write + Batch b = q.nonBlockReadBatch(1); + assertThat(b, is(notNullValue())); + assertThat(b.getElements().size(), is(equalTo(1))); + b.close(); + + // future result is the blocked write seqNum for the second element + assertThat(future.get(), is(equalTo(2L + i))); + assertThat(q.isFull(), is(false)); + + executor.shutdown(); + } + + // all batches are acked, no tail pages should exist + assertThat(q.getTailPages().size(), is(equalTo(0))); + + // the last read unblocked the last write so some elements (1 unread and maybe some acked) should be in the head page + assertThat(q.getHeadPage().getElementCount() > 0L, is(true)); + assertThat(q.getHeadPage().unreadCount(), is(equalTo(1L))); + assertThat(q.unreadCount, is(equalTo(1L))); + } + +} \ No newline at end of file diff --git a/logstash-core/src/test/java/org/logstash/ackedqueue/StringElement.java b/logstash-core/src/test/java/org/logstash/ackedqueue/StringElement.java new file mode 100644 index 000000000..99092a908 --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/ackedqueue/StringElement.java @@ -0,0 +1,58 @@ +package org.logstash.ackedqueue; + +import java.nio.ByteBuffer; + +public class StringElement implements Queueable { + private final String content; + + public StringElement(String content) { + this.content = content; + } + + @Override + public byte[] serialize() { + byte[] contentBytes = this.content.getBytes(); + ByteBuffer buffer = ByteBuffer.allocate(contentBytes.length); + buffer.put(contentBytes); + return buffer.array(); + } + + public static StringElement deserialize(byte[] bytes) { + ByteBuffer buffer = ByteBuffer.allocate(bytes.length); + buffer.put(bytes); + + buffer.position(0); + byte[] content = new byte[bytes.length]; + buffer.get(content); + return new StringElement(new String(content)); + } + + @Override + public String toString() { + return content; + } + + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (!StringElement.class.isAssignableFrom(other.getClass())) { + return false; + } + + final StringElement element = (StringElement)other; + if ((this.content == null) ? (element.content != null) : !this.content.equals(element.content)) { + return false; + } + return true; + } + + @Override + public int hashCode() { + int hash = 13; + hash = 53 * hash + (this.content != null ? this.content.hashCode() : 0); + return hash; + } +} diff --git a/logstash-core/src/test/java/org/logstash/ackedqueue/TestQueue.java b/logstash-core/src/test/java/org/logstash/ackedqueue/TestQueue.java new file mode 100644 index 000000000..16d53ef7b --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/ackedqueue/TestQueue.java @@ -0,0 +1,17 @@ +package org.logstash.ackedqueue; + +import java.util.List; + +public class TestQueue extends Queue { + public TestQueue(Settings settings) { + super(settings); + } + + public HeadPage getHeadPage() { + return this.headPage; + } + + public List getTailPages() { + return this.tailPages; + } +} diff --git a/logstash-core/src/test/java/org/logstash/ackedqueue/TestSettings.java b/logstash-core/src/test/java/org/logstash/ackedqueue/TestSettings.java new file mode 100644 index 000000000..7478c2ec7 --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/ackedqueue/TestSettings.java @@ -0,0 +1,33 @@ +package org.logstash.ackedqueue; + +import org.logstash.common.io.ByteBufferPageIO; +import org.logstash.common.io.CheckpointIOFactory; +import org.logstash.common.io.FileCheckpointIO; +import org.logstash.common.io.MemoryCheckpointIO; +import org.logstash.common.io.PageIOFactory; + +public class TestSettings { + + public static Settings getSettings(int capacity) { + MemoryCheckpointIO.clearSources(); + Settings s = new MemorySettings(); + PageIOFactory pageIOFactory = (pageNum, size, path) -> new ByteBufferPageIO(pageNum, size, path); + CheckpointIOFactory checkpointIOFactory = (source) -> new MemoryCheckpointIO(source); + s.setCapacity(capacity); + s.setElementIOFactory(pageIOFactory); + s.setCheckpointIOFactory(checkpointIOFactory); + s.setElementClass(StringElement.class); + return s; + } + + public static Settings getSettingsCheckpointFilePageMemory(int capacity, String folder) { + Settings s = new FileSettings(folder); + PageIOFactory pageIOFactory = (pageNum, size, path) -> new ByteBufferPageIO(pageNum, size, path); + CheckpointIOFactory checkpointIOFactory = (source) -> new FileCheckpointIO(source); + s.setCapacity(capacity); + s.setElementIOFactory(pageIOFactory); + s.setCheckpointIOFactory(checkpointIOFactory); + s.setElementClass(StringElement.class); + return s; + } +} diff --git a/logstash-core/src/test/java/org/logstash/common/io/ByteBufferPageIOTest.java b/logstash-core/src/test/java/org/logstash/common/io/ByteBufferPageIOTest.java new file mode 100644 index 000000000..9edd134bb --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/common/io/ByteBufferPageIOTest.java @@ -0,0 +1,157 @@ +package org.logstash.common.io; + +import org.junit.Test; +import org.logstash.ackedqueue.Queueable; +import org.logstash.ackedqueue.SequencedList; +import org.logstash.ackedqueue.StringElement; + +import java.io.IOException; +import java.util.List; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + + +public class ByteBufferPageIOTest { + + private final int CAPACITY = 1024; + private int MIN_CAPACITY = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(0); + + private ByteBufferPageIO subject() throws IOException { + return subject(CAPACITY); + } + + private ByteBufferPageIO subject(int capacity) throws IOException { + ByteBufferPageIO io = new ByteBufferPageIO(capacity); + io.create(); + return io; + } + + private ByteBufferPageIO subject(int capacity, byte[] bytes) throws IOException { + return new ByteBufferPageIO(capacity, bytes); + } + + private Queueable buildStringElement(String str) { + return new StringElement(str); + } + + @Test + public void getWritePosition() throws IOException { + assertThat(subject().getWritePosition(), is(equalTo(1))); + } + + @Test + public void getElementCount() throws IOException { + assertThat(subject().getElementCount(), is(equalTo(0))); + } + + @Test + public void getStartSeqNum() throws IOException { + assertThat(subject().getMinSeqNum(), is(equalTo(0L))); + } + + @Test + public void hasSpace() throws IOException { + assertThat(subject(MIN_CAPACITY).hasSpace(0), is(true)); + assertThat(subject(MIN_CAPACITY).hasSpace(1), is(false)); + } + + @Test + public void hasSpaceAfterWrite() throws IOException { + Queueable element = new StringElement("foobarbaz"); + int singleElementCapacity = ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(element.serialize().length); + long seqNum = 1L; + + ByteBufferPageIO subject = subject(singleElementCapacity); + + assertThat(subject.hasSpace(element.serialize().length), is(true)); + subject.write(element.serialize(), seqNum); + assertThat(subject.hasSpace(element.serialize().length), is(false)); + assertThat(subject.hasSpace(1), is(false)); + } + + @Test + public void write() throws IOException { + Queueable element = new StringElement("foobarbaz"); + long seqNum = 42L; + ByteBufferPageIO subj = subject(); + subj.create(); + subj.write(element.serialize(), seqNum); + assertThat(subj.getWritePosition(), is(equalTo(ByteBufferPageIO.HEADER_SIZE + ByteBufferPageIO._persistedByteCount(element.serialize().length)))); + assertThat(subj.getElementCount(), is(equalTo(1))); + assertThat(subj.getMinSeqNum(), is(equalTo(seqNum))); + } + + @Test + public void recoversValidState() throws IOException { + Queueable element = new StringElement("foobarbaz"); + long seqNum = 42L; + ByteBufferPageIO subject = subject(); + subject.create(); + subject.write(element.serialize(), seqNum); + + byte[] inititalState = subject.dump(); + subject = subject(inititalState.length, inititalState); + subject.open(seqNum, 1); + assertThat(subject.getElementCount(), is(equalTo(1))); + assertThat(subject.getMinSeqNum(), is(equalTo(seqNum))); + } + + @Test(expected = IOException.class) + public void recoversInvalidState() throws IOException { + Queueable element = new StringElement("foobarbaz"); + long seqNum = 42L; + ByteBufferPageIO subject = subject(); + subject.create(); + subject.write(element.serialize(), seqNum); + + byte[] inititalState = subject.dump(); + subject(inititalState.length, inititalState); + subject.open(1L, 1); + } + + // TODO: add other invalid initial states + + @Test + public void writeRead() throws IOException { + long seqNum = 42L; + Queueable element = buildStringElement("foobarbaz"); + ByteBufferPageIO subj = subject(); + subj.create(); + subj.write(element.serialize(), seqNum); + SequencedList result = subj.read(seqNum, 1); + assertThat(result.getElements().size(), is(equalTo(1))); + Queueable readElement = StringElement.deserialize(result.getElements().get(0)); + assertThat(result.getSeqNums().get(0), is(equalTo(seqNum))); + assertThat(readElement.toString(), is(equalTo(element.toString()))); + } + + @Test + public void writeReadMulti() throws IOException { + Queueable element1 = buildStringElement("foo"); + Queueable element2 = buildStringElement("bar"); + Queueable element3 = buildStringElement("baz"); + Queueable element4 = buildStringElement("quux"); + ByteBufferPageIO subj = subject(); + subj.create(); + subj.write(element1.serialize(), 40L); + subj.write(element2.serialize(), 41L); + subj.write(element3.serialize(), 42L); + subj.write(element4.serialize(), 43L); + int batchSize = 11; + SequencedList result = subj.read(40L, batchSize); + assertThat(result.getElements().size(), is(equalTo(4))); + + assertThat(result.getSeqNums().get(0), is(equalTo(40L))); + assertThat(result.getSeqNums().get(1), is(equalTo(41L))); + assertThat(result.getSeqNums().get(2), is(equalTo(42L))); + assertThat(result.getSeqNums().get(3), is(equalTo(43L))); + + assertThat(StringElement.deserialize(result.getElements().get(0)).toString(), is(equalTo(element1.toString()))); + assertThat(StringElement.deserialize(result.getElements().get(1)).toString(), is(equalTo(element2.toString()))); + assertThat(StringElement.deserialize(result.getElements().get(2)).toString(), is(equalTo(element3.toString()))); + assertThat(StringElement.deserialize(result.getElements().get(3)).toString(), is(equalTo(element4.toString()))); + } + +} \ No newline at end of file diff --git a/logstash-core/src/test/java/org/logstash/common/io/FileCheckpointIOTest.java b/logstash-core/src/test/java/org/logstash/common/io/FileCheckpointIOTest.java new file mode 100644 index 000000000..86e860aad --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/common/io/FileCheckpointIOTest.java @@ -0,0 +1,53 @@ +package org.logstash.common.io; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.logstash.ackedqueue.Checkpoint; + +import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class FileCheckpointIOTest { + private String checkpointFolder; + private CheckpointIO io; + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + checkpointFolder = temporaryFolder + .newFolder("checkpoints") + .getPath(); + io = new FileCheckpointIO(checkpointFolder); + } + + @Test + public void read() throws Exception { + URL url = this.getClass().getResource("checkpoint.head"); + String dirPath = Paths.get(url.toURI()).getParent().toString(); + io = new FileCheckpointIO(dirPath); + Checkpoint chk = io.read("checkpoint.head"); + assertThat(chk.getMinSeqNum(), is(8L)); + } + + @Test + public void write() throws Exception { + io.write("checkpoint.head", 6, 2, 10L, 8L, 200); + io.write("checkpoint.head", 6, 2, 10L, 8L, 200); + Path fullFileName = Paths.get(checkpointFolder, "checkpoint.head"); + byte[] contents = Files.readAllBytes(fullFileName); + URL url = this.getClass().getResource("checkpoint.head"); + Path path = Paths.get(url.getPath()); + byte[] compare = Files.readAllBytes(path); + assertThat(contents, is(equalTo(compare))); + } +} \ No newline at end of file diff --git a/logstash-core/src/test/java/org/logstash/common/io/FileMmapIOTest.java b/logstash-core/src/test/java/org/logstash/common/io/FileMmapIOTest.java new file mode 100644 index 000000000..01bcfe6a4 --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/common/io/FileMmapIOTest.java @@ -0,0 +1,55 @@ +package org.logstash.common.io; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.logstash.ackedqueue.SequencedList; +import org.logstash.ackedqueue.StringElement; + +import java.util.ArrayList; +import java.util.List; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class FileMmapIOTest { + private String folder; + private MmapPageIO writeIo; + private MmapPageIO readIo; + private int pageNum; + + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Before + public void setUp() throws Exception { + pageNum = 0; + folder = temporaryFolder + .newFolder("pages") + .getPath(); + writeIo = new MmapPageIO(pageNum, 1024, folder); + readIo = new MmapPageIO(pageNum, 1024, folder); + } + + @Test + public void roundTrip() throws Exception { + List list = new ArrayList<>(); + List readList = new ArrayList<>(); + writeIo.create(); + for (int i = 1; i < 17; i++) { + StringElement input = new StringElement("element-" + i); + list.add(input); + writeIo.write(input.serialize(), i); + } + writeIo.close(); + readIo.open(1, 16); + SequencedList result = readIo.read(1, 16); + for (byte[] bytes : result.getElements()) { + StringElement element = StringElement.deserialize(bytes); + readList.add(element); + } + assertThat(readList, is(equalTo(list))); + } +} \ No newline at end of file diff --git a/logstash-core/src/test/java/org/logstash/common/io/MemoryCheckpointTest.java b/logstash-core/src/test/java/org/logstash/common/io/MemoryCheckpointTest.java new file mode 100644 index 000000000..d2629bd4b --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/common/io/MemoryCheckpointTest.java @@ -0,0 +1,45 @@ +package org.logstash.common.io; + +import org.junit.Before; +import org.junit.Test; +import org.logstash.ackedqueue.Checkpoint; +import org.logstash.ackedqueue.MemorySettings; +import org.logstash.ackedqueue.Settings; + +import java.io.IOException; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class MemoryCheckpointTest { + + private CheckpointIO io; + + @Before + public void setUp() { + Settings settings = new MemorySettings(); + CheckpointIOFactory factory = (dirPath) -> new MemoryCheckpointIO(dirPath); + settings.setCheckpointIOFactory(factory); + this.io = settings.getCheckpointIOFactory().build(settings.getDirPath()); + } + + @Test + public void writeNewReadExisting() throws IOException { + io.write("checkpoint.head", 1, 2, 3, 4, 5); + + Checkpoint checkpoint = io.read("checkpoint.head"); + + assertThat(checkpoint.getPageNum(), is(equalTo(1))); + assertThat(checkpoint.getFirstUnackedPageNum(), is(equalTo(2))); + assertThat(checkpoint.getFirstUnackedSeqNum(), is(equalTo(3L))); + assertThat(checkpoint.getMinSeqNum(), is(equalTo(4L))); + assertThat(checkpoint.getElementCount(), is(equalTo(5))); + } + + @Test + public void readInnexisting() throws IOException { + Checkpoint checkpoint = io.read("checkpoint.invalid"); + assertThat(checkpoint, is(equalTo(null))); + } +} diff --git a/logstash-core/src/test/java/org/logstash/common/io/wip/MemoryPageIOStreamTest.java b/logstash-core/src/test/java/org/logstash/common/io/wip/MemoryPageIOStreamTest.java new file mode 100644 index 000000000..363de4149 --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/common/io/wip/MemoryPageIOStreamTest.java @@ -0,0 +1,188 @@ +package org.logstash.common.io.wip; + +import org.junit.Test; +import org.logstash.ackedqueue.Queueable; +import org.logstash.ackedqueue.SequencedList; +import org.logstash.ackedqueue.StringElement; +import org.logstash.common.io.wip.MemoryPageIOStream; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.MatcherAssert.assertThat; + +public class MemoryPageIOStreamTest { + + private final int CAPACITY = 1024; + private final int EMPTY_HEADER_SIZE = Integer.BYTES + Integer.BYTES; + + private byte[] empty_page_with_header() { + byte[] result = new byte[CAPACITY]; + // version = 1, details = ABC + ByteBuffer.wrap(result).put(new byte[]{0, 0, 0, 1, 0, 0, 0, 3, 65, 66, 67}); + return result; + } + + private MemoryPageIOStream subject() throws IOException { + return subject(CAPACITY); + } + + private MemoryPageIOStream subject(int size) throws IOException { + MemoryPageIOStream io = new MemoryPageIOStream(size); + io.create(); + return io; + } + + private MemoryPageIOStream subject(byte[] bytes, long seqNum, int count) throws IOException { + MemoryPageIOStream io = new MemoryPageIOStream(bytes.length, bytes); + io.open(seqNum, count); + return io; + } + + private Queueable buildStringElement(String str) { + return new StringElement(str); + } + + @Test + public void getWritePosition() throws Exception { + assertThat(subject().getWritePosition(), is(equalTo(EMPTY_HEADER_SIZE))); + assertThat(subject(empty_page_with_header(), 1L, 0).getWritePosition(), is(equalTo(EMPTY_HEADER_SIZE + 3))); + } + + @Test + public void getElementCount() throws Exception { + assertThat(subject().getElementCount(), is(equalTo(0))); + assertThat(subject(empty_page_with_header(), 1L, 0).getElementCount(), is(equalTo(0))); + } + + @Test + public void getStartSeqNum() throws Exception { + assertThat(subject().getMinSeqNum(), is(equalTo(1L))); + assertThat(subject(empty_page_with_header(), 1L, 0).getMinSeqNum(), is(equalTo(1L))); + } + + @Test + public void readHeaderDetails() throws Exception { + MemoryPageIOStream io = new MemoryPageIOStream(CAPACITY); + io.setPageHeaderDetails("ABC"); + io.create(); + assertThat(io.readHeaderDetails(), is(equalTo("ABC"))); + assertThat(io.getWritePosition(), is(equalTo(EMPTY_HEADER_SIZE + 3))); + } + + @Test + public void hasSpace() throws Exception { + assertThat(subject().hasSpace(10), is(true)); + } + + @Test + public void write() throws Exception { + long seqNum = 42L; + Queueable element = new StringElement("foobarbaz"); + MemoryPageIOStream subj = subject(); + subj.write(element.serialize(), seqNum); + assertThat(subj.getElementCount(), is(equalTo(1))); + assertThat(subj.getMinSeqNum(), is(equalTo(seqNum))); + } + + @Test + public void writeUntilFull() throws Exception { + long seqNum = 42L; + Queueable element = new StringElement("foobarbaz"); + byte[] data = element.serialize(); + int bufferSize = 120; + MemoryPageIOStream subj = subject(bufferSize); + while (subj.hasSpace(data.length)) { + subj.write(data, seqNum); + seqNum++; + } + int recordSize = subj.persistedByteCount(data.length); + int remains = bufferSize - subj.getWritePosition(); + assertThat(recordSize, is(equalTo(25))); // element=9 + seqnum=8 + length=4 + crc=4 + assertThat(subj.getElementCount(), is(equalTo(4))); + boolean noSpaceLeft = remains < recordSize; + assertThat(noSpaceLeft, is(true)); + } + + @Test + public void read() throws Exception { + MemoryPageIOStream subj = subject(); + SequencedList result = subj.read(1L, 1); + assertThat(result.getElements().isEmpty(), is(true)); + } + + @Test + public void writeRead() throws Exception { + long seqNum = 42L; + Queueable element = buildStringElement("foobarbaz"); + MemoryPageIOStream subj = subject(); + subj.write(element.serialize(), seqNum); + SequencedList result = subj.read(seqNum, 1); + assertThat(result.getElements().size(), is(equalTo(1))); + Queueable readElement = StringElement.deserialize(result.getElements().get(0)); + assertThat(result.getSeqNums().get(0), is(equalTo(seqNum))); + assertThat(readElement.toString(), is(equalTo(element.toString()))); + } + + @Test + public void writeReadEmptyElement() throws Exception { + long seqNum = 1L; + Queueable element = buildStringElement(""); + MemoryPageIOStream subj = subject(); + subj.write(element.serialize(), seqNum); + SequencedList result = subj.read(seqNum, 1); + assertThat(result.getElements().size(), is(equalTo(1))); + Queueable readElement = StringElement.deserialize(result.getElements().get(0)); + assertThat(result.getSeqNums().get(0), is(equalTo(seqNum))); + assertThat(readElement.toString(), is(equalTo(element.toString()))); + } + + @Test + public void writeReadMulti() throws Exception { + Queueable element1 = buildStringElement("foo"); + Queueable element2 = buildStringElement("bar"); + Queueable element3 = buildStringElement("baz"); + Queueable element4 = buildStringElement("quux"); + MemoryPageIOStream subj = subject(); + subj.write(element1.serialize(), 40L); + subj.write(element2.serialize(), 42L); + subj.write(element3.serialize(), 44L); + subj.write(element4.serialize(), 46L); + int batchSize = 11; + SequencedList result = subj.read(40L, batchSize); + assertThat(result.getElements().size(), is(equalTo(4))); + + assertThat(result.getSeqNums().get(0), is(equalTo(40L))); + assertThat(result.getSeqNums().get(1), is(equalTo(42L))); + assertThat(result.getSeqNums().get(2), is(equalTo(44L))); + assertThat(result.getSeqNums().get(3), is(equalTo(46L))); + + assertThat(StringElement.deserialize(result.getElements().get(0)).toString(), is(equalTo(element1.toString()))); + assertThat(StringElement.deserialize(result.getElements().get(1)).toString(), is(equalTo(element2.toString()))); + assertThat(StringElement.deserialize(result.getElements().get(2)).toString(), is(equalTo(element3.toString()))); + assertThat(StringElement.deserialize(result.getElements().get(3)).toString(), is(equalTo(element4.toString()))); + } + + @Test + public void readFromFirstUnackedSeqNum() throws Exception { + long seqNum = 10L; + String[] values = new String[]{"aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg", "hhh", "iii", "jjj"}; + MemoryPageIOStream stream = subject(300); + for (String val : values) { + Queueable element = buildStringElement(val); + stream.write(element.serialize(), seqNum); + seqNum++; + } + MemoryPageIOStream subj = subject(stream.getBuffer(), 10L, 10); + int batchSize = 3; + seqNum = 13L; + SequencedList result = subj.read(seqNum, batchSize); + for (int i = 0; i < 3; i++) { + Queueable ele = StringElement.deserialize(result.getElements().get(i)); + assertThat(result.getSeqNums().get(i), is(equalTo(seqNum + i))); + assertThat(ele.toString(), is(equalTo(values[i + 3]))); + } + } +} \ No newline at end of file diff --git a/logstash-core/src/test/java/org/logstash/stress/Concurent.java b/logstash-core/src/test/java/org/logstash/stress/Concurent.java new file mode 100644 index 000000000..48a4a5385 --- /dev/null +++ b/logstash-core/src/test/java/org/logstash/stress/Concurent.java @@ -0,0 +1,183 @@ +package org.logstash.stress; + +import org.logstash.ackedqueue.*; +import org.logstash.common.io.*; + +import java.io.IOException; +import java.time.Duration; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.stream.Collectors; + +public class Concurent { + final static int ELEMENT_COUNT = 2000000; + final static int BATCH_SIZE = 1000; + static Settings settings; + + public static Settings memorySettings(int capacity) { + Settings s = new MemorySettings(); + PageIOFactory pageIOFactory = (pageNum, size, path) -> new ByteBufferPageIO(pageNum, size, path); + CheckpointIOFactory checkpointIOFactory = (source) -> new MemoryCheckpointIO(source); + s.setCapacity(capacity); + s.setElementIOFactory(pageIOFactory); + s.setCheckpointIOFactory(checkpointIOFactory); + s.setElementClass(StringElement.class); + return s; + } + + public static Settings fileSettings(int capacity) { + Settings s = new MemorySettings("/tmp/queue"); + PageIOFactory pageIOFactory = (pageNum, size, path) -> new MmapPageIO(pageNum, size, path); + CheckpointIOFactory checkpointIOFactory = (source) -> new FileCheckpointIO(source); + s.setCapacity(capacity); + s.setElementIOFactory(pageIOFactory); + s.setCheckpointIOFactory(checkpointIOFactory); + s.setElementClass(StringElement.class); + return s; + } + + public static Thread producer(Queue q, List input) { + return new Thread(() -> { + try { + for (StringElement element : input) { + q.write(element); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + } + + public static void oneProducersOneConsumer() throws IOException, InterruptedException { + List input = new ArrayList<>(); + List output = new ArrayList<>(); + + Instant start = Instant.now(); + + Queue q = new Queue(settings); + q.getCheckpointIO().purge(); + q.open(); + + System.out.print("stating single producers and single consumers stress test... "); + + for (int i = 0; i < ELEMENT_COUNT; i++) { + input.add(new StringElement(new Integer(i).toString())); + } + + Thread consumer = new Thread(() -> { + int consumedCount = 0; + + try { + while (consumedCount < ELEMENT_COUNT) { + Batch b = q.readBatch(BATCH_SIZE); +// if (b.getElements().size() < BATCH_SIZE) { +// System.out.println("read small batch=" + b.getElements().size()); +// } else { +// System.out.println("read batch size=" + b.getElements().size()); +// } + output.addAll((List) b.getElements()); + b.close(); + consumedCount += b.getElements().size(); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + consumer.start(); + + Thread producer = producer(q, input); + producer.start(); + + consumer.join(); + q.close(); + + Instant end = Instant.now(); + + if (! input.equals(output)) { + System.out.println("ERROR: input and output are not equal"); + } else { + System.out.println("SUCCESS, result size=" + output.size() + ", elapsed=" + Duration.between(start, end) + ", rate=" + (new Float(ELEMENT_COUNT) / Duration.between(start, end).toMillis()) * 1000); + } + } + + public static void oneProducersOneMultipleConsumer() throws IOException, InterruptedException { + final List input = new ArrayList<>(); + final Collection output = new ConcurrentLinkedQueue(); + final int CONSUMERS = 5; + List consumers = new ArrayList<>(); + + Instant start = Instant.now(); + + Queue q = new Queue(settings); + q.getCheckpointIO().purge(); + q.open(); + + System.out.print("stating single producers and multiple consumers stress test... "); + + for (int i = 0; i < ELEMENT_COUNT; i++) { + input.add(new StringElement(new Integer(i).toString())); + } + + for (int i = 0; i < CONSUMERS; i++) { + consumers.add(new Thread(() -> { + try { + while (output.size() < ELEMENT_COUNT) { + Batch b = q.readBatch(BATCH_SIZE); +// if (b.getElements().size() < BATCH_SIZE) { +// System.out.println("read small batch=" + b.getElements().size()); +// } else { +// System.out.println("read batch size=" + b.getElements().size()); +// } + output.addAll((List) b.getElements()); + b.close(); + } + // everything is read, close queue here since other consumers might be blocked trying to get next batch + q.close(); + } catch (IOException e) { + throw new RuntimeException(e); + } + })); + } + + consumers.forEach(c -> c.start()); + + Thread producer = producer(q, input); + producer.start(); + + // gotta hate exception handling in lambdas + consumers.forEach(c -> {try{c.join();} catch(InterruptedException e) {throw new RuntimeException(e);}}); + + Instant end = Instant.now(); + + List result = output.stream().collect(Collectors.toList()); + Collections.sort(result, (p1, p2) -> Integer.valueOf(p1.toString()).compareTo(Integer.valueOf(p2.toString()))); + + if (! input.equals(result)) { + System.out.println("ERROR: input and output are not equal"); + } else { + System.out.println("SUCCESS, result size=" + output.size() + ", elapsed=" + Duration.between(start, end) + ", rate=" + (new Float(ELEMENT_COUNT) / Duration.between(start, end).toMillis()) * 1000); + } + } + + + public static void main(String[] args) throws IOException, InterruptedException { + System.out.println(">>> starting in-memory stress test"); + + settings = memorySettings(1024 * 1024); // 1MB + oneProducersOneConsumer(); + oneProducersOneMultipleConsumer(); + + System.out.println("\n>>> starting file-based stress test in /tmp/queue"); + + settings = fileSettings(1024 * 1024); // 1MB + + oneProducersOneConsumer(); + oneProducersOneMultipleConsumer(); + } + +} diff --git a/logstash-core/src/test/resources/org/logstash/common/io/checkpoint.head b/logstash-core/src/test/resources/org/logstash/common/io/checkpoint.head new file mode 100644 index 000000000..d189f0a7f Binary files /dev/null and b/logstash-core/src/test/resources/org/logstash/common/io/checkpoint.head differ diff --git a/rakelib/artifacts.rake b/rakelib/artifacts.rake index c00202a4d..8c33df075 100644 --- a/rakelib/artifacts.rake +++ b/rakelib/artifacts.rake @@ -14,17 +14,28 @@ namespace "artifact" do "bin/**/*", "config/**/*", "data", + "lib/bootstrap/**/*", "lib/pluginmanager/**/*", "lib/systeminstall/**/*", + "logstash-core/lib/**/*", "logstash-core/locales/**/*", "logstash-core/vendor/**/*", "logstash-core/*.gemspec", + "logstash-core/gemspec_jars.rb", + "logstash-core-event-java/lib/**/*", "logstash-core-event-java/*.gemspec", + "logstash-core-event-java/gemspec_jars.rb", + + "logstash-core-queue-jruby/lib/**/*", + "logstash-core-queue-jruby/*.gemspec", + "logstash-core-queue-jruby/gemspec_jars.rb", + "logstash-core-plugin-api/lib/**/*", "logstash-core-plugin-api/*.gemspec", + "patterns/**/*", "vendor/??*/**/*", # To include ruby-maven's hidden ".mvn" directory, we need to diff --git a/rakelib/compile.rake b/rakelib/compile.rake index e57301258..b2df88ec2 100644 --- a/rakelib/compile.rake +++ b/rakelib/compile.rake @@ -13,7 +13,7 @@ namespace "compile" do task "logstash-core-java" do puts("Building logstash-core using gradle") - system("./gradlew", "vendor", "-p", "./logstash-core") + system("./gradlew", "jar", "-p", "./logstash-core") end task "logstash-core-event-java" do @@ -21,6 +21,11 @@ namespace "compile" do system("./gradlew", "jar", "-p", "./logstash-core-event-java") end + task "logstash-core-queue-jruby" do + puts("Building logstash-core-queue-jruby using gradle") + system("./gradlew", "jar", "-p", "./logstash-core-queue-jruby") + end + desc "Build everything" - task "all" => ["grammar", "logstash-core-java", "logstash-core-event-java"] + task "all" => ["grammar", "logstash-core-java", "logstash-core-event-java", "logstash-core-queue-jruby"] end diff --git a/rakelib/test.rake b/rakelib/test.rake index a02a5c5d6..503bec6c6 100644 --- a/rakelib/test.rake +++ b/rakelib/test.rake @@ -6,6 +6,12 @@ require "pluginmanager/util" namespace "test" do task "setup" do + + # make sure we have a ./data/queue dir here + # temporary wiring until we figure proper queue initialization sequence and in test context etc. + mkdir "data" unless File.directory?("data") + mkdir "data/queue" unless File.directory?("data/queue") + # Need to be run here as because if run aftewarse (after the bundler.setup task) then the report got wrong # numbers and misses files. There is an issue with our setup! method as this does not happen with the regular # bundler.setup used in regular bundler flows. @@ -13,6 +19,7 @@ namespace "test" do require "bootstrap/environment" LogStash::Bundler.setup!({:without => [:build]}) + require "logstash-core" require "rspec/core/runner" require "rspec" diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 9bec90f64..4e4975816 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -21,6 +21,27 @@ end RSpec.configure do |c| Flores::RSpec.configure(c) + c.before do + # TODO: commented out on post-merged in master - the logger has moved to log4j + # + # + # Force Cabin to always have a JSON subscriber. The main purpose of this + # is to catch crashes in json serialization for our logs. JSONIOThingy + # exists to validate taht what LogStash::Logging::JSON emits is always + # valid JSON. + # jsonvalidator = JSONIOThingy.new + # allow(Cabin::Channel).to receive(:new).and_wrap_original do |m, *args| + # logger = m.call(*args) + # logger.level = :debug + # logger.subscribe(LogStash::Logging::JSON.new(jsonvalidator)) + # + # logger + # end + + LogStash::SETTINGS.set("queue.type", "memory_acked") + LogStash::SETTINGS.set("queue.page_capacity", 1024 * 1024) + LogStash::SETTINGS.set("queue.max_events", 250) + end end def installed_plugins diff --git a/spec/unit/license_spec.rb b/spec/unit/license_spec.rb index 9425cfc91..88fe74d4b 100644 --- a/spec/unit/license_spec.rb +++ b/spec/unit/license_spec.rb @@ -14,7 +14,8 @@ describe "Project licenses" do /bsd/, /artistic 2.*/, /ruby/, - /lgpl/]) + /lgpl/, + /epl/]) } ## diff --git a/versions.yml b/versions.yml index 87670bec2..1c44f7e4c 100644 --- a/versions.yml +++ b/versions.yml @@ -3,4 +3,5 @@ logstash: 6.0.0-alpha1 logstash-core: 6.0.0-alpha1 logstash-core-event: 6.0.0-alpha1 logstash-core-event-java: 6.0.0-alpha1 +logstash-core-queue-jruby: 6.0.0-alpha1 logstash-core-plugin-api: 2.1.16