mirror of
https://github.com/elastic/logstash.git
synced 2025-04-24 22:57:16 -04:00
fix accessors field references caching performance regression
correct use of @lut and cleanups leftover added docs and renamed a few identifiers for clarity typos remove strict_set stuff since its not used anymore removed unused specs PathCache thread safety
This commit is contained in:
parent
37d1033b88
commit
3d1847aecc
3 changed files with 74 additions and 87 deletions
|
@ -2,90 +2,120 @@
|
|||
|
||||
require "logstash/namespace"
|
||||
require "logstash/util"
|
||||
require "thread_safe"
|
||||
|
||||
module LogStash::Util
|
||||
|
||||
# PathCache is a singleton which globally caches a parsed fields path for the path to the
|
||||
# container hash and key in that hash.
|
||||
# PathCache is a singleton which globally caches the relation between a field reference and its
|
||||
# decomposition into a [key, path array] tuple. For example the field reference [foo][bar][baz]
|
||||
# is decomposed into ["baz", ["foo", "bar"]].
|
||||
module PathCache
|
||||
extend self
|
||||
|
||||
def get(accessor)
|
||||
@cache ||= {}
|
||||
@cache[accessor] ||= parse(accessor)
|
||||
# requiring libraries and defining constants is thread safe in JRuby so
|
||||
# PathCache::CACHE will be corretly initialized, once, when accessors.rb
|
||||
# will be first required
|
||||
CACHE = ThreadSafe::Cache.new
|
||||
|
||||
def get(field_reference)
|
||||
# the "get_or_default(x, nil) || put(x, parse(x))" is ~2x faster than "get || put" because the get call is
|
||||
# proxied through the JRuby JavaProxy op_aref method. the correct idiom here would be to use
|
||||
# "compute_if_absent(x){parse(x)}" but because of the closure creation, it is ~1.5x slower than
|
||||
# "get_or_default || put".
|
||||
# this "get_or_default || put" is obviously non-atomic which is not really important here
|
||||
# since all threads will set the same value and this cache will stabilize very quickly after the first
|
||||
# few events.
|
||||
CACHE.get_or_default(field_reference, nil) || CACHE.put(field_reference, parse(field_reference))
|
||||
end
|
||||
|
||||
def parse(accessor)
|
||||
path = accessor.split(/[\[\]]/).select{|s| !s.empty?}
|
||||
def parse(field_reference)
|
||||
path = field_reference.split(/[\[\]]/).select{|s| !s.empty?}
|
||||
[path.pop, path]
|
||||
end
|
||||
end
|
||||
|
||||
# Accessors uses a lookup table to speedup access of an accessor field of the type
|
||||
# Accessors uses a lookup table to speedup access of a field reference of the form
|
||||
# "[hello][world]" to the underlying store hash into {"hello" => {"world" => "foo"}}
|
||||
class Accessors
|
||||
|
||||
# @param store [Hash] the backing data store field refereces point to
|
||||
def initialize(store)
|
||||
@store = store
|
||||
|
||||
# @lut is a lookup table between a field reference and a [target, key] tuple
|
||||
# where target is the containing Hash or Array for key in @store.
|
||||
# this allows us to directly access the containing object for key instead of
|
||||
# walking the field reference path into the inner @store objects
|
||||
@lut = {}
|
||||
end
|
||||
|
||||
def get(accessor)
|
||||
target, key = lookup(accessor)
|
||||
unless target.nil?
|
||||
target.is_a?(Array) ? target[key.to_i] : target[key]
|
||||
end
|
||||
# @param field_reference [String] the field reference
|
||||
# @return [Object] the value in @store for this field reference
|
||||
def get(field_reference)
|
||||
target, key = lookup(field_reference)
|
||||
return nil unless target
|
||||
target.is_a?(Array) ? target[key.to_i] : target[key]
|
||||
end
|
||||
|
||||
def set(accessor, value)
|
||||
target, key = store_and_lookup(accessor)
|
||||
# @param field_reference [String] the field reference
|
||||
# @param value [Object] the value to set in @store for this field reference
|
||||
# @return [Object] the value set
|
||||
def set(field_reference, value)
|
||||
target, key = lookup_or_create(field_reference)
|
||||
target[target.is_a?(Array) ? key.to_i : key] = value
|
||||
end
|
||||
|
||||
def strict_set(accessor, value)
|
||||
set(accessor, LogStash::Event.validate_value(value))
|
||||
# @param field_reference [String] the field reference to remove
|
||||
# @return [Object] the removed value in @store for this field reference
|
||||
def del(field_reference)
|
||||
target, key = lookup(field_reference)
|
||||
return nil unless target
|
||||
target.is_a?(Array) ? target.delete_at(key.to_i) : target.delete(key)
|
||||
end
|
||||
|
||||
def del(accessor)
|
||||
target, key = lookup(accessor)
|
||||
unless target.nil?
|
||||
target.is_a?(Array) ? target.delete_at(key.to_i) : target.delete(key)
|
||||
end
|
||||
end
|
||||
|
||||
def include?(accessor)
|
||||
target, key = lookup_path(accessor)
|
||||
# @param field_reference [String] the field reference to test for inclusion in the store
|
||||
# @return [Boolean] true if the store contains a value for this field reference
|
||||
def include?(field_reference)
|
||||
target, key = lookup(field_reference)
|
||||
return false unless target
|
||||
|
||||
target.is_a?(Array) ? !target[key.to_i].nil? : target.include?(key)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def lookup(accessor)
|
||||
target, key = lookup_path(accessor)
|
||||
if target.nil?
|
||||
[target, key]
|
||||
else
|
||||
@lut[accessor] = [target, key]
|
||||
end
|
||||
# retrieve the [target, key] tuple associated with this field reference
|
||||
# @param field_reference [String] the field referece
|
||||
# @return [[Object, String]] the [target, key] tuple associated with this field reference
|
||||
def lookup(field_reference)
|
||||
@lut[field_reference] ||= find_target(field_reference)
|
||||
end
|
||||
|
||||
def store_and_lookup(accessor)
|
||||
@lut[accessor] ||= store_path(accessor)
|
||||
# retrieve the [target, key] tuple associated with this field reference and create inner
|
||||
# container objects if they do not exists
|
||||
# @param field_reference [String] the field referece
|
||||
# @return [[Object, String]] the [target, key] tuple associated with this field reference
|
||||
def lookup_or_create(field_reference)
|
||||
@lut[field_reference] ||= find_or_create_target(field_reference)
|
||||
end
|
||||
|
||||
def lookup_path(accessor)
|
||||
key, path = PathCache.get(accessor)
|
||||
# find the target container object in store for this field reference
|
||||
# @param field_reference [String] the field referece
|
||||
# @return [Object] the target container object in store associated with this field reference
|
||||
def find_target(field_reference)
|
||||
key, path = PathCache.get(field_reference)
|
||||
target = path.inject(@store) do |r, k|
|
||||
if r.nil?
|
||||
return nil
|
||||
end
|
||||
return nil unless r
|
||||
r[r.is_a?(Array) ? k.to_i : k]
|
||||
end
|
||||
[target, key]
|
||||
target ? [target, key] : nil
|
||||
end
|
||||
|
||||
def store_path(accessor)
|
||||
# find the target container object in store for this field reference and create inner
|
||||
# container objects if they do not exists
|
||||
# @param field_reference [String] the field referece
|
||||
# @return [Object] the target container object in store associated with this field reference
|
||||
def find_or_create_target(accessor)
|
||||
key, path = PathCache.get(accessor)
|
||||
target = path.inject(@store) {|r, k| r[r.is_a?(Array) ? k.to_i : k] ||= {}}
|
||||
[target, key]
|
||||
|
|
|
@ -33,6 +33,8 @@ Gem::Specification.new do |gem|
|
|||
# filetools and rakelib
|
||||
gem.add_runtime_dependency "minitar", "~> 0.5.4"
|
||||
|
||||
gem.add_runtime_dependency "thread_safe", "~> 0.3.5" #(Apache 2.0 license)
|
||||
|
||||
if RUBY_PLATFORM == 'java'
|
||||
gem.platform = RUBY_PLATFORM
|
||||
gem.add_runtime_dependency "jrjackson", "~> 0.2.8" #(Apache 2.0 license)
|
||||
|
|
|
@ -121,14 +121,6 @@ describe LogStash::Util::Accessors, :if => true do
|
|||
expect(data).to eq({ "hello" => "foo" })
|
||||
end
|
||||
|
||||
it "should strict_set shallow string value" do
|
||||
str = "[hello]"
|
||||
data = {}
|
||||
accessors = LogStash::Util::Accessors.new(data)
|
||||
expect(accessors.strict_set(str, "foo")).to eq("foo")
|
||||
expect(data).to eq({ "hello" => "foo"})
|
||||
end
|
||||
|
||||
it "should set deep string value" do
|
||||
str = "[hello][world]"
|
||||
data = {}
|
||||
|
@ -145,14 +137,6 @@ describe LogStash::Util::Accessors, :if => true do
|
|||
expect(data).to eq({ "hello" => { "world" => ["foo", "bar"] } })
|
||||
end
|
||||
|
||||
it "should strict_set deep array value" do
|
||||
str = "[hello][world]"
|
||||
data = {}
|
||||
accessors = LogStash::Util::Accessors.new(data)
|
||||
expect(accessors.strict_set(str, ["foo", "bar"]) ).to eq(["foo", "bar"])
|
||||
expect(data).to eq({ "hello" => { "world" => ["foo", "bar"] } })
|
||||
end
|
||||
|
||||
it "should set element within array value" do
|
||||
str = "[hello][0]"
|
||||
data = {"hello" => ["foo", "bar"]}
|
||||
|
@ -181,35 +165,6 @@ describe LogStash::Util::Accessors, :if => true do
|
|||
accessors = LogStash::Util::Accessors.new(data)
|
||||
expect(accessors.del(str)).to eq(4)
|
||||
expect(data).to eq({ "geocoords" => [2] })
|
||||
end end
|
||||
|
||||
context "using invalid encoding" do
|
||||
it "strinct_set should raise on non UTF-8 string encoding" do
|
||||
str = "[hello]"
|
||||
data = {}
|
||||
accessors = LogStash::Util::Accessors.new(data)
|
||||
expect { accessors.strict_set(str, "foo".encode("US-ASCII")) }.to raise_error
|
||||
end
|
||||
|
||||
it "strinct_set should raise on non UTF-8 string encoding in array" do
|
||||
str = "[hello]"
|
||||
data = {}
|
||||
accessors = LogStash::Util::Accessors.new(data)
|
||||
expect { accessors.strict_set(str, ["foo", "bar".encode("US-ASCII")]) }.to raise_error
|
||||
end
|
||||
|
||||
it "strinct_set should raise on invalid UTF-8 string encoding" do
|
||||
str = "[hello]"
|
||||
data = {}
|
||||
accessors = LogStash::Util::Accessors.new(data)
|
||||
expect { accessors.strict_set(str, "foo \xED\xB9\x81\xC3") }.to raise_error
|
||||
end
|
||||
|
||||
it "strinct_set should raise on invalid UTF-8 string encoding in array" do
|
||||
str = "[hello]"
|
||||
data = {}
|
||||
accessors = LogStash::Util::Accessors.new(data)
|
||||
expect { accessors.strict_set(str, ["foo", "bar \xED\xB9\x81\xC3"]) }.to raise_error
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue