fix accessors field references caching performance regression

correct use of @lut and cleanups

leftover

added docs and renamed a few identifiers for clarity

typos

remove strict_set stuff since its not used anymore

removed unused specs

PathCache thread safety
This commit is contained in:
Colin Surprenant 2015-05-20 23:36:22 -04:00
parent 37d1033b88
commit 3d1847aecc
3 changed files with 74 additions and 87 deletions

View file

@ -2,90 +2,120 @@
require "logstash/namespace"
require "logstash/util"
require "thread_safe"
module LogStash::Util
# PathCache is a singleton which globally caches a parsed fields path for the path to the
# container hash and key in that hash.
# PathCache is a singleton which globally caches the relation between a field reference and its
# decomposition into a [key, path array] tuple. For example the field reference [foo][bar][baz]
# is decomposed into ["baz", ["foo", "bar"]].
module PathCache
extend self
def get(accessor)
@cache ||= {}
@cache[accessor] ||= parse(accessor)
# requiring libraries and defining constants is thread safe in JRuby so
# PathCache::CACHE will be corretly initialized, once, when accessors.rb
# will be first required
CACHE = ThreadSafe::Cache.new
def get(field_reference)
# the "get_or_default(x, nil) || put(x, parse(x))" is ~2x faster than "get || put" because the get call is
# proxied through the JRuby JavaProxy op_aref method. the correct idiom here would be to use
# "compute_if_absent(x){parse(x)}" but because of the closure creation, it is ~1.5x slower than
# "get_or_default || put".
# this "get_or_default || put" is obviously non-atomic which is not really important here
# since all threads will set the same value and this cache will stabilize very quickly after the first
# few events.
CACHE.get_or_default(field_reference, nil) || CACHE.put(field_reference, parse(field_reference))
end
def parse(accessor)
path = accessor.split(/[\[\]]/).select{|s| !s.empty?}
def parse(field_reference)
path = field_reference.split(/[\[\]]/).select{|s| !s.empty?}
[path.pop, path]
end
end
# Accessors uses a lookup table to speedup access of an accessor field of the type
# Accessors uses a lookup table to speedup access of a field reference of the form
# "[hello][world]" to the underlying store hash into {"hello" => {"world" => "foo"}}
class Accessors
# @param store [Hash] the backing data store field refereces point to
def initialize(store)
@store = store
# @lut is a lookup table between a field reference and a [target, key] tuple
# where target is the containing Hash or Array for key in @store.
# this allows us to directly access the containing object for key instead of
# walking the field reference path into the inner @store objects
@lut = {}
end
def get(accessor)
target, key = lookup(accessor)
unless target.nil?
target.is_a?(Array) ? target[key.to_i] : target[key]
end
# @param field_reference [String] the field reference
# @return [Object] the value in @store for this field reference
def get(field_reference)
target, key = lookup(field_reference)
return nil unless target
target.is_a?(Array) ? target[key.to_i] : target[key]
end
def set(accessor, value)
target, key = store_and_lookup(accessor)
# @param field_reference [String] the field reference
# @param value [Object] the value to set in @store for this field reference
# @return [Object] the value set
def set(field_reference, value)
target, key = lookup_or_create(field_reference)
target[target.is_a?(Array) ? key.to_i : key] = value
end
def strict_set(accessor, value)
set(accessor, LogStash::Event.validate_value(value))
# @param field_reference [String] the field reference to remove
# @return [Object] the removed value in @store for this field reference
def del(field_reference)
target, key = lookup(field_reference)
return nil unless target
target.is_a?(Array) ? target.delete_at(key.to_i) : target.delete(key)
end
def del(accessor)
target, key = lookup(accessor)
unless target.nil?
target.is_a?(Array) ? target.delete_at(key.to_i) : target.delete(key)
end
end
def include?(accessor)
target, key = lookup_path(accessor)
# @param field_reference [String] the field reference to test for inclusion in the store
# @return [Boolean] true if the store contains a value for this field reference
def include?(field_reference)
target, key = lookup(field_reference)
return false unless target
target.is_a?(Array) ? !target[key.to_i].nil? : target.include?(key)
end
private
def lookup(accessor)
target, key = lookup_path(accessor)
if target.nil?
[target, key]
else
@lut[accessor] = [target, key]
end
# retrieve the [target, key] tuple associated with this field reference
# @param field_reference [String] the field referece
# @return [[Object, String]] the [target, key] tuple associated with this field reference
def lookup(field_reference)
@lut[field_reference] ||= find_target(field_reference)
end
def store_and_lookup(accessor)
@lut[accessor] ||= store_path(accessor)
# retrieve the [target, key] tuple associated with this field reference and create inner
# container objects if they do not exists
# @param field_reference [String] the field referece
# @return [[Object, String]] the [target, key] tuple associated with this field reference
def lookup_or_create(field_reference)
@lut[field_reference] ||= find_or_create_target(field_reference)
end
def lookup_path(accessor)
key, path = PathCache.get(accessor)
# find the target container object in store for this field reference
# @param field_reference [String] the field referece
# @return [Object] the target container object in store associated with this field reference
def find_target(field_reference)
key, path = PathCache.get(field_reference)
target = path.inject(@store) do |r, k|
if r.nil?
return nil
end
return nil unless r
r[r.is_a?(Array) ? k.to_i : k]
end
[target, key]
target ? [target, key] : nil
end
def store_path(accessor)
# find the target container object in store for this field reference and create inner
# container objects if they do not exists
# @param field_reference [String] the field referece
# @return [Object] the target container object in store associated with this field reference
def find_or_create_target(accessor)
key, path = PathCache.get(accessor)
target = path.inject(@store) {|r, k| r[r.is_a?(Array) ? k.to_i : k] ||= {}}
[target, key]

View file

@ -33,6 +33,8 @@ Gem::Specification.new do |gem|
# filetools and rakelib
gem.add_runtime_dependency "minitar", "~> 0.5.4"
gem.add_runtime_dependency "thread_safe", "~> 0.3.5" #(Apache 2.0 license)
if RUBY_PLATFORM == 'java'
gem.platform = RUBY_PLATFORM
gem.add_runtime_dependency "jrjackson", "~> 0.2.8" #(Apache 2.0 license)

View file

@ -121,14 +121,6 @@ describe LogStash::Util::Accessors, :if => true do
expect(data).to eq({ "hello" => "foo" })
end
it "should strict_set shallow string value" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect(accessors.strict_set(str, "foo")).to eq("foo")
expect(data).to eq({ "hello" => "foo"})
end
it "should set deep string value" do
str = "[hello][world]"
data = {}
@ -145,14 +137,6 @@ describe LogStash::Util::Accessors, :if => true do
expect(data).to eq({ "hello" => { "world" => ["foo", "bar"] } })
end
it "should strict_set deep array value" do
str = "[hello][world]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect(accessors.strict_set(str, ["foo", "bar"]) ).to eq(["foo", "bar"])
expect(data).to eq({ "hello" => { "world" => ["foo", "bar"] } })
end
it "should set element within array value" do
str = "[hello][0]"
data = {"hello" => ["foo", "bar"]}
@ -181,35 +165,6 @@ describe LogStash::Util::Accessors, :if => true do
accessors = LogStash::Util::Accessors.new(data)
expect(accessors.del(str)).to eq(4)
expect(data).to eq({ "geocoords" => [2] })
end end
context "using invalid encoding" do
it "strinct_set should raise on non UTF-8 string encoding" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, "foo".encode("US-ASCII")) }.to raise_error
end
it "strinct_set should raise on non UTF-8 string encoding in array" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, ["foo", "bar".encode("US-ASCII")]) }.to raise_error
end
it "strinct_set should raise on invalid UTF-8 string encoding" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, "foo \xED\xB9\x81\xC3") }.to raise_error
end
it "strinct_set should raise on invalid UTF-8 string encoding in array" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, ["foo", "bar \xED\xB9\x81\xC3"]) }.to raise_error
end
end
end