mirror of
https://github.com/elastic/logstash.git
synced 2025-04-24 06:37:19 -04:00
Grok update.
- Add docs for feature-specific libgrok version requirements - Add 'match' config; takes a hash. - Allow anything that looks like a field name as a config attribute. - Deprecate 'pattern' config. It now is the equivalent of: pattern => "mypattern" match => [ "@message", "mypattern" ] - Add some new tests to verify new libgrok inline-patterns and such.
This commit is contained in:
parent
f37c8b8a9e
commit
919122dcbc
2 changed files with 134 additions and 61 deletions
|
@ -21,16 +21,35 @@ require "set"
|
|||
# * libpcre >= 7.6
|
||||
# * libevent >= 1.3 (though older versions may worK)
|
||||
#
|
||||
# Feature requirements:
|
||||
#
|
||||
# * Int/float coercion requires >= 1.20110223.*
|
||||
# * In-line pattern definitions >= 1.20110630.*
|
||||
#
|
||||
#
|
||||
# Note:
|
||||
# CentOS 5 ships with an ancient version of pcre that does not work with grok.
|
||||
class LogStash::Filters::Grok < LogStash::Filters::Base
|
||||
config_name "grok"
|
||||
|
||||
# Specify a pattern to parse with.
|
||||
# Specify a pattern to parse with. This will match the '@message' field.
|
||||
# Multiple patterns is fine. First match breaks.
|
||||
config :pattern, :validate => :array, :required => true
|
||||
config :pattern, :validate => :array, :deprecated => true
|
||||
|
||||
# Specify a path to a directory with grok pattern files in it
|
||||
# A hash of matches of field => value
|
||||
config :match, :validate => :hash, :default => {}
|
||||
|
||||
# Any existing field name can be used as a config name here for matching
|
||||
# against.
|
||||
#
|
||||
# # this config:
|
||||
# foo => "some pattern"
|
||||
#
|
||||
# # same as:
|
||||
# match => [ "foo", "some pattern" ]
|
||||
config /[A-Za-z0-9_-]+/, :validate => :string
|
||||
|
||||
#
|
||||
# logstash ships by default with a bunch of patterns, so you don't
|
||||
# necessarily need to define this yourself unless you are adding additional
|
||||
|
@ -43,7 +62,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
|
|||
# For example:
|
||||
#
|
||||
# NUMBER \d+
|
||||
config :patterns_dir, :validate => :array
|
||||
config :patterns_dir, :validate => :array, :default => []
|
||||
|
||||
# Drop if matched. Note, this feature may not stay. It is preferable to combine
|
||||
# grok + grep filters to do parsing + dropping.
|
||||
|
@ -67,16 +86,12 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
|
|||
@@patterns_path += val.split(":")
|
||||
end
|
||||
|
||||
@@grokpiles = Hash.new { |h, k| h[k] = [] }
|
||||
@@grokpiles_lock = Mutex.new
|
||||
|
||||
public
|
||||
def register
|
||||
gem "jls-grok", ">=0.4.3"
|
||||
require "grok" # rubygem 'jls-grok'
|
||||
|
||||
@pile = Grok::Pile.new
|
||||
@patterns_dir ||= []
|
||||
@patternfiles = []
|
||||
@patterns_dir += @@patterns_path.to_a
|
||||
@logger.info("Grok patterns path: #{@patterns_dir.join(":")}")
|
||||
@patterns_dir.each do |path|
|
||||
|
@ -92,77 +107,93 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
|
|||
end
|
||||
|
||||
Dir.glob(path).each do |file|
|
||||
@logger.info("Grok loading patterns from #{file}")
|
||||
add_patterns_from_file(file)
|
||||
#@logger.info("Grok loading patterns from #{file}")
|
||||
@patternfiles << file
|
||||
end
|
||||
end
|
||||
|
||||
@pattern.each do |pattern|
|
||||
groks = @pile.compile(pattern)
|
||||
@logger.debug(["Compiled pattern", pattern, groks[-1].expanded_pattern])
|
||||
end
|
||||
@patterns = Hash.new { |h,k| h[k] = [] }
|
||||
|
||||
@@grokpiles_lock.synchronize do
|
||||
@@grokpiles[@type] << @pile
|
||||
end
|
||||
@match["@message"] = []
|
||||
@match["@message"] += @pattern if @pattern # the config 'pattern' value (array)
|
||||
|
||||
# TODO(sissel): Hash.merge actually overrides, not merges arrays.
|
||||
# Work around it by implementing our own?
|
||||
# TODO(sissel): Check if 'match' is empty?
|
||||
@match.merge(@config).each do |field, patterns|
|
||||
# Skip known config names
|
||||
next if ["add_tag", "add_field", "type", "match", "patterns_dir",
|
||||
"drop_if_match", "named_captures_only", "pattern" ].include?(field)
|
||||
if !@patterns.include?(field)
|
||||
@patterns[field] = Grok::Pile.new
|
||||
add_patterns_from_files(@patternfiles, @patterns[field])
|
||||
end
|
||||
patterns.each do |pattern|
|
||||
@logger.debug(["regexp: #{@type}/#{field}", pattern])
|
||||
@patterns[field].compile(pattern)
|
||||
end
|
||||
end # @config.each
|
||||
end # def register
|
||||
|
||||
public
|
||||
def filter(event)
|
||||
# parse it with grok
|
||||
match = false
|
||||
matched = false
|
||||
|
||||
# Only filter events we are configured for
|
||||
if event.type != @type
|
||||
return
|
||||
end
|
||||
|
||||
if @@grokpiles[event.type].length == 0
|
||||
@logger.debug("Skipping grok for event type=#{event.type} (no grokpiles defined)")
|
||||
if @type != event.type
|
||||
@logger.debug("Skipping grok for event type=#{event.type} (wanted '#{@type}')")
|
||||
return
|
||||
end
|
||||
|
||||
if !event.message.is_a?(Array)
|
||||
messages = [event.message]
|
||||
else
|
||||
messages = event.message
|
||||
end
|
||||
|
||||
messages.each do |message|
|
||||
@logger.debug(["Running grok filter", event])
|
||||
|
||||
@@grokpiles[event.type].each do |pile|
|
||||
@logger.debug(["Trying pattern for type #{event.type}", pile])
|
||||
grok, match = @pile.match(message)
|
||||
@logger.debug(["Result", { :grok => grok, :match => match }])
|
||||
break if match
|
||||
@logger.debug(["Running grok filter", event])
|
||||
@patterns.each do |field, pile|
|
||||
if !event[field]
|
||||
@logger.debug(["Skipping match object, field not present", field,
|
||||
event, event[field]])
|
||||
next
|
||||
end
|
||||
|
||||
if match
|
||||
@logger.debug(["Trying pattern for type #{event.type}", pile])
|
||||
(event[field].is_a?(Array) ? event[field] : [event[field]]).each do |fieldvalue|
|
||||
grok, match = pile.match(fieldvalue)
|
||||
next unless match
|
||||
matched = true
|
||||
|
||||
match.each_capture do |key, value|
|
||||
match_type = nil
|
||||
type_coerce = nil
|
||||
if key.include?(":")
|
||||
name, key, match_type = key.split(":")
|
||||
name, key, type_coerce = key.split(":")
|
||||
end
|
||||
|
||||
# http://code.google.com/p/logstash/issues/detail?id=45
|
||||
# Permit typing of captures by giving an additional colon and a type,
|
||||
# like: %{FOO:name:int} for int coercion.
|
||||
case match_type
|
||||
if type_coerce
|
||||
@logger.info("Match type coerce: #{type_coerce}")
|
||||
@logger.info("Patt: #{grok.pattern}")
|
||||
end
|
||||
|
||||
case type_coerce
|
||||
when "int"
|
||||
value = value.to_i
|
||||
when "float"
|
||||
value = value.to_f
|
||||
end
|
||||
|
||||
if event.message == value
|
||||
# Skip patterns that match the entire line
|
||||
if fieldvalue == value and field == "@message"
|
||||
# Skip patterns that match the entire message
|
||||
@logger.debug("Skipping capture '#{key}' since it matches the whole line.")
|
||||
next
|
||||
end
|
||||
|
||||
if @named_captures_only && key.upcase == key
|
||||
@logger.debug("Skipping capture '#{key}' since it is not a named capture and named_captures_only is true.")
|
||||
if @named_captures_only && key =~ /^[A-Z]+/
|
||||
@logger.debug("Skipping capture '#{key}' since it is not a named " \
|
||||
"capture and named_captures_only is true.")
|
||||
next
|
||||
end
|
||||
|
||||
|
@ -177,38 +208,43 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
|
|||
if !value.nil? && (!value.empty? rescue true)
|
||||
event.fields[key] << value
|
||||
end
|
||||
end
|
||||
filter_matched(event)
|
||||
else
|
||||
# Tag this event if we can't parse it. We can use this later to
|
||||
# reparse+reindex logs if we improve the patterns given .
|
||||
event.tags << "_grokparsefailure"
|
||||
end
|
||||
end # message.each
|
||||
end # match.each_capture
|
||||
|
||||
filter_matched(event)
|
||||
end # event[field]
|
||||
end # patterns.each
|
||||
|
||||
if !matched
|
||||
# Tag this event if we can't parse it. We can use this later to
|
||||
# reparse+reindex logs if we improve the patterns given .
|
||||
event.tags << "_grokparsefailure"
|
||||
end
|
||||
|
||||
#if !event.cancelled?
|
||||
#filter_matched(event)
|
||||
#end
|
||||
@logger.debug(["Event now: ", event.to_hash])
|
||||
end # def filter
|
||||
|
||||
private
|
||||
def add_patterns_from_file(file)
|
||||
def add_patterns_from_files(paths, pile)
|
||||
paths.each { |path| add_patterns_from_file(path, pile) }
|
||||
end
|
||||
|
||||
private
|
||||
def add_patterns_from_file(path, pile)
|
||||
# Check if the file path is a jar, if so, we'll have to read it ourselves
|
||||
# since libgrok won't know what to do with it.
|
||||
if file =~ /file:\/.*\.jar!.*/
|
||||
File.new(file).each do |line|
|
||||
if path =~ /file:\/.*\.jar!.*/
|
||||
File.new(path).each do |line|
|
||||
next if line =~ /^(?:\s*#|\s*$)/
|
||||
# In some cases I have seen 'file.each' yield lines with newlines at
|
||||
# the end. I don't know if this is a bug or intentional, but we need
|
||||
# to chomp it.
|
||||
name, pattern = line.chomp.split(/\s+/, 2)
|
||||
@logger.debug "Adding pattern '#{name}' from file #{file}"
|
||||
@logger.debug "Adding pattern '#{name}' from file #{path}"
|
||||
@logger.debug name => pattern
|
||||
@pile.add_pattern(name, pattern)
|
||||
pile.add_pattern(name, pattern)
|
||||
end
|
||||
else
|
||||
@pile.add_patterns_from_file(file)
|
||||
pile.add_patterns_from_file(path)
|
||||
end
|
||||
end # def add_patterns
|
||||
end # class LogStash::Filters::Grok
|
||||
|
|
|
@ -24,7 +24,10 @@ class TestFilterGrok < Test::Unit::TestCase
|
|||
end
|
||||
|
||||
@filter = LogStash::Filters::Grok.new(cfg)
|
||||
p :config => cfg, :id => @filter.object_id
|
||||
p :fizzle => @filter.pattern
|
||||
@filter.register
|
||||
#p :newfilter => @filter
|
||||
end
|
||||
|
||||
def test_grok_normal
|
||||
|
@ -110,8 +113,12 @@ class TestFilterGrok < Test::Unit::TestCase
|
|||
event.message = "#{expect}"
|
||||
|
||||
@filter.filter(event)
|
||||
assert_equal(expect.class, event.fields["foo"].first.class, "Expected field 'foo' to be of type #{expect.class.name} but got #{event.fields["foo"].first.class.name}")
|
||||
assert_equal([expect], event.fields["foo"], "Expected field 'foo' to be [#{expect.inspect}], is #{event.fields["expect"].inspect}")
|
||||
assert_equal(expect.class, event.fields["foo"].first.class,
|
||||
"Expected field 'foo' to be of type #{expect.class.name} " \
|
||||
"but got #{event.fields["foo"].first.class.name}")
|
||||
assert_equal([expect], event.fields["foo"],
|
||||
"Expected field 'foo' to be [#{expect.inspect}], is " \
|
||||
"#{event.fields["expect"].inspect}")
|
||||
end # def test_grok_type_hinting_int
|
||||
|
||||
def test_grok_type_hinting_float
|
||||
|
@ -128,4 +135,34 @@ class TestFilterGrok < Test::Unit::TestCase
|
|||
assert_equal(expect.class, event.fields["foo"].first.class, "Expected field 'foo' to be of type #{expect.class.name} but got #{event.fields["foo"].first.class.name}")
|
||||
assert_equal([expect], event.fields["foo"], "Expected field 'foo' to be [#{expect.inspect}], is #{event.fields["expect"].inspect}")
|
||||
end # def test_grok_type_hinting_float
|
||||
|
||||
def test_grok_inline_define
|
||||
test_name "grok_inline_define"
|
||||
config "pattern" => [ "%{FIZZLE=\\d+}" ]
|
||||
|
||||
event = LogStash::Event.new
|
||||
event.type = @typename
|
||||
|
||||
expect = "1234"
|
||||
event.message = "hello #{expect}"
|
||||
|
||||
@filter.filter(event)
|
||||
assert_equal(expect.class, event.fields["FIZZLE"].first.class, "Expected field 'FIZZLE' to be of type #{expect.class.name} but got #{event.fields["FIZZLE"].first.class.name}")
|
||||
assert_equal([expect], event.fields["FIZZLE"], "Expected field 'FIZZLE' to be [#{expect.inspect}], is #{event.fields["expect"].inspect}")
|
||||
end # def test_grok_type_hinting_float
|
||||
|
||||
def test_grok_field_name_attribute
|
||||
test_name "grok_field_name_attribute"
|
||||
config "rum" => [ "%{FIZZLE=\\d+}" ]
|
||||
|
||||
event = LogStash::Event.new
|
||||
event.type = @typename
|
||||
|
||||
expect = "1234"
|
||||
event.fields["rum"] = "hello #{expect}"
|
||||
|
||||
@filter.filter(event)
|
||||
assert_equal(expect.class, event.fields["FIZZLE"].first.class, "Expected field 'FIZZLE' to be of type #{expect.class.name} but got #{event.fields["FIZZLE"].first.class.name}")
|
||||
assert_equal([expect], event.fields["FIZZLE"], "Expected field 'FIZZLE' to be [#{expect.inspect}], is #{event.fields["expect"].inspect}")
|
||||
end # def test_grok_type_hinting_float
|
||||
end
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue