Accessor#strict_set for encoding validation in specs

This commit is contained in:
Colin Surprenant 2014-03-22 01:06:25 -04:00
parent c3f826cef7
commit 3ed6078a65
4 changed files with 105 additions and 14 deletions

View file

@ -129,12 +129,12 @@ class LogStash::Event
end # def []
public
# keep []= implementation in sync with spec/test_utils.rb monkey patch
# which redefines []= but using @accessors.strict_set
def []=(str, value)
if str == TIMESTAMP && !value.is_a?(Time)
raise TypeError, "The field '@timestamp' must be a Time, not a #{value.class} (#{value})"
end
# return LogStash::Util::FieldReference.set(str, value, @data)
@accessors.set(str, value)
end # def []=

View file

@ -1,8 +1,8 @@
# encoding: utf-8
require "logstash/namespace"
require "logstash/util"
module LogStash::Util
# PathCache is a singleton which globally caches a parsed fields path for the path to the
@ -21,7 +21,6 @@ module LogStash::Util
end
end
# Accessors uses a lookup table to speedup access of an accessor field of the type
# "[hello][world]" to the underlying store hash into {"hello" => {"world" => "foo"}}
class Accessors
@ -41,6 +40,10 @@ module LogStash::Util
target[key] = value
end
def strict_set(accessor, value)
set(accessor, strict_value(value))
end
def del(accessor)
target, key = lookup(accessor)
target.delete(key)
@ -58,5 +61,19 @@ module LogStash::Util
[target, key]
end
end
end # module LogStash::Util::Accessors
def strict_value(value)
case value
when String
raise("expected UTF-8 encoding for value=#{value}, encoding=#{value.encoding.inspect}") unless value.encoding == Encoding::UTF_8
raise("invalid UTF-8 encoding for value=#{value}, encoding=#{value.encoding.inspect}") unless value.valid_encoding?
value
when Array
value.each{|v| strict_value(v)} # don't map, return original object
value
else
value
end
end
end # class Accessors
end # module LogStash::Util

View file

@ -37,6 +37,18 @@ else
$logger.level = :error
end
puts("Using Accessor#strict_set for specs")
# mokey path LogStash::Event to use strict_set in tests
# ugly, I know, but this avoids adding conditionals in performance critical section
class LogStash::Event
def []=(str, value)
if str == TIMESTAMP && !value.is_a?(Time)
raise TypeError, "The field '@timestamp' must be a Time, not a #{value.class} (#{value})"
end
@accessors.strict_set(str, value)
end # def []=
end
RSpec.configure do |config|
config.filter_run_excluding :redis => true, :socket => true, :performance => true, :elasticsearch => true, :broken => true
end

View file

@ -36,46 +36,54 @@ describe LogStash::Util::Accessors, :if => true do
insist { data }.empty?
end
it "should set value" do
it "should set string value" do
str = "simple"
data = {}
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.set(str, "things") } == "things"
insist { data } == { "simple" => "things" }
end
it "should set array value" do
str = "simple"
data = {}
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.set(str, ["foo", "bar"]) } == ["foo", "bar"]
insist { data } == { "simple" => ["foo", "bar"]}
end
end
context "using field path" do
it "should get shallow value of word key" do
it "should get shallow string value of word key" do
str = "[hello]"
data = { "hello" => "world" }
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.get(str) } == "world"
end
it "should get shallow value of key with spaces" do
it "should get shallow string value of key with spaces" do
str = "[hel lo]"
data = { "hel lo" => "world" }
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.get(str) } == "world"
end
it "should get shallow value of numeric key string" do
it "should get shallow string value of numeric key string" do
str = "[1]"
data = { "1" => "world" }
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.get(str) } == "world"
end
it "should get deep value" do
it "should get deep string value" do
str = "[hello][world]"
data = { "hello" => { "world" => "foo", "bar" => "baz" } }
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.get(str) } == data["hello"]["world"]
end
it "should get deep value" do
it "should get deep string value" do
str = "[hello][world]"
data = { "hello" => { "world" => "foo", "bar" => "baz" } }
accessors = LogStash::Util::Accessors.new(data)
@ -92,7 +100,7 @@ describe LogStash::Util::Accessors, :if => true do
insist { data["hello"] } == { "bar" => "baz" }
end
it "should set shallow value" do
it "should set shallow string value" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
@ -100,7 +108,15 @@ describe LogStash::Util::Accessors, :if => true do
insist { data } == { "hello" => "foo" }
end
it "should set deep value" do
it "should strict_set shallow string value" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.strict_set(str, "foo") } == "foo"
insist { data } == { "hello" => "foo" }
end
it "should set deep string value" do
str = "[hello][world]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
@ -108,6 +124,22 @@ describe LogStash::Util::Accessors, :if => true do
insist { data } == { "hello" => { "world" => "foo" } }
end
it "should set deep array value" do
str = "[hello][world]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.set(str, ["foo", "bar"]) } == ["foo", "bar"]
insist { data } == { "hello" => { "world" => ["foo", "bar"] } }
end
it "should strict_set deep array value" do
str = "[hello][world]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
insist { accessors.strict_set(str, ["foo", "bar"]) } == ["foo", "bar"]
insist { data } == { "hello" => { "world" => ["foo", "bar"] } }
end
it "should retrieve array item" do
data = { "hello" => { "world" => ["a", "b"], "bar" => "baz" } }
accessors = LogStash::Util::Accessors.new(data)
@ -115,4 +147,34 @@ describe LogStash::Util::Accessors, :if => true do
insist { accessors.get("[hello][world][1]") } == data["hello"]["world"][1]
end
end
context "using invalid encoding" do
it "strinct_set should raise on non UTF-8 string encoding" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, "foo".encode("US-ASCII")) }.to raise_error
end
it "strinct_set should raise on non UTF-8 string encoding in array" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, ["foo", "bar".encode("US-ASCII")]) }.to raise_error
end
it "strinct_set should raise on invalid UTF-8 string encoding" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, "foo \xED\xB9\x81\xC3") }.to raise_error
end
it "strinct_set should raise on invalid UTF-8 string encoding in array" do
str = "[hello]"
data = {}
accessors = LogStash::Util::Accessors.new(data)
expect { accessors.strict_set(str, ["foo", "bar \xED\xB9\x81\xC3"]) }.to raise_error
end
end
end