filter to prune fields from events

This commit is contained in:
Piavlo 2013-04-11 15:50:26 +03:00 committed by Jordan Sissel
parent 301c8c091a
commit 42935e78c8
2 changed files with 436 additions and 0 deletions

View file

@ -0,0 +1,114 @@
require "logstash/filters/base"
require "logstash/namespace"
# The prune filter is for pruning event data from @fileds based on whitelist/blacklist
# of field names or their values (names and values can also be regular expressions).
class LogStash::Filters::Prune < LogStash::Filters::Base
config_name "prune"
plugin_status "experimental"
# Trigger whether configation fields and values should be interpolated for dynamic values.
# Probably adds some performance overhead. Defaults to false.
config :interpolate, :validate => :boolean, :default => false
# Include only fields only if their names match specified regexps, default to empty list which means include everything.
#
# filter {
# %PLUGIN% {
# tags => [ "apache-accesslog" ]
# whitelist_names => [ "method", "(referrer|status)", "${some}_field" ]
# }
# }
config :whitelist_names, :validate => :array, :default => []
# Exclude fields which names match specified regexps, by default exclude unresolved %{field} strings.
#
# filter {
# %PLUGIN% {
# tags => [ "apache-accesslog" ]
# blacklist_names => [ "method", "(referrer|status)", "${some}_field" ]
# }
# }
config :blacklist_names, :validate => :array, :default => [ "%\{[^}]+\}" ]
# Include specified fields only if their values match regexps.
#
# filter {
# %PLUGIN% {
# tags => [ "apache-accesslog" ]
# whitelist_values => [ "uripath", "/index.php", method", "(GET|POST)", "status", "^[^2]" ]
# }
# }
config :whitelist_values, :validate => :hash, :default => {}
# Exclude specified fields if their values match regexps.
#
# filter {
# %PLUGIN% {
# tags => [ "apache-accesslog" ]
# blacklist_values => [ "uripath", "/index.php", "method", "(HEAD|OPTIONS)", "status", "^[^2]" ]
# }
# }
config :blacklist_values, :validate => :hash, :default => {}
public
def register
unless @interpolate
@whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(x)})
@blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(x)})
@whitelist_values.each do |key, value|
@whitelist_values[key] = Regexp.new(value)
end
@blacklist_values.each do |key, value|
@blacklist_values[key] = Regexp.new(value)
end
end
end # def register
public
def filter(event)
return unless filter?(event)
# We need to collect fields which needs to be remove ,and only in the end actually remove it
# since then interpolation mode you can get unexpected results as fields with dynamic values will not match
# since the fields to which they refer have already been removed.
fields_to_remove = []
unless @whitelist_names.empty?
@whitelist_names_regexp = Regexp.union(@whitelist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
event.fields.each_key do |field|
fields_to_remove << field unless field.match(@whitelist_names_regexp)
end
end
unless @blacklist_names.empty?
@blacklist_names_regexp = Regexp.union(@blacklist_names.map {|x| Regexp.new(event.sprintf(x))}) if @interpolate
event.fields.each_key do |field|
fields_to_remove << field if field.match(@blacklist_names_regexp)
end
end
@whitelist_values.each do |key, value|
if @interpolate
key = event.sprintf(key)
value = Regexp.new(event.sprintf(value))
end
fields_to_remove << key if event.fields[key] and not event.fields[key].match(value)
end
@blacklist_values.each do |key,value|
if @interpolate
key = event.sprintf(key)
value = Regexp.new(event.sprintf(value))
end
fields_to_remove << key if event.fields[key] and event.fields[key].match(value)
end
fields_to_remove.each do |field|
event.remove(field)
end
filter_matched(event)
end # def filter
end # class LogStash::Filters::Prune

322
spec/filters/prune.rb Normal file
View file

@ -0,0 +1,322 @@
require "test_utils"
require "logstash/filters/prune"
describe LogStash::Filters::Prune do
extend LogStash::RSpec
describe "defaults" do
config <<-CONFIG
filter {
prune { }
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == "Borat"
insist { subject["lastname"] } == "Sagdiyev"
insist { subject["fullname"] } == "Borat Sagdiyev"
insist { subject["country"] } == "Kazakhstan"
insist { subject["location"] } == "Somethere in Kazakhstan"
insist { subject["hobby"] } == "Cloud"
insist { subject["status"] } == "200"
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
insist { subject["%{hmm}"] } == nil
end
end
describe "whitelist field names" do
config <<-CONFIG
filter {
prune {
whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == "Borat"
insist { subject["lastname"] } == nil
insist { subject["fullname"] } == nil
insist { subject["country"] } == nil
insist { subject["location"] } == nil
insist { subject["hobby"] } == "Cloud"
insist { subject["status"] } == "200"
insist { subject["Borat_saying"] } == nil
insist { subject["%{hmm}"] } == nil
end
end
describe "whitelist field names aggregator" do
config <<-CONFIG
filter {
prune {
whitelist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
interpolate => true
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == "Borat"
insist { subject["lastname"] } == nil
insist { subject["fullname"] } == nil
insist { subject["country"] } == nil
insist { subject["location"] } == nil
insist { subject["hobby"] } == "Cloud"
insist { subject["status"] } == "200"
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
insist { subject["%{hmm}"] } == nil
end
end
describe "blacklist field names" do
config <<-CONFIG
filter {
prune {
blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == nil
insist { subject["lastname"] } == "Sagdiyev"
insist { subject["fullname"] } == "Borat Sagdiyev"
insist { subject["country"] } == "Kazakhstan"
insist { subject["location"] } == "Somethere in Kazakhstan"
insist { subject["hobby"] } == nil
insist { subject["status"] } == nil
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
insist { subject["%{hmm}"] } == "doh"
end
end
describe "blacklist field names interpolate" do
config <<-CONFIG
filter {
prune {
blacklist_names => [ "firstname", "(hobby|status)", "%{firstname}_saying" ]
interpolate => true
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == nil
insist { subject["lastname"] } == "Sagdiyev"
insist { subject["fullname"] } == "Borat Sagdiyev"
insist { subject["country"] } == "Kazakhstan"
insist { subject["location"] } == "Somethere in Kazakhstan"
insist { subject["hobby"] } == nil
insist { subject["status"] } == nil
insist { subject["Borat_saying"] } == nil
insist { subject["%{hmm}"] } == "doh"
end
end
describe "whitelist field values" do
config <<-CONFIG
filter {
prune {
whitelist_values => [ "firstname", "^Borat$",
"fullname", "%{firstname} Sagdiyev",
"location", "no no no",
"status", "^2",
"%{firstname}_saying", "%{hobby}.*Active" ]
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == "Borat"
insist { subject["lastname"] } == "Sagdiyev"
insist { subject["fullname"] } == nil
insist { subject["country"] } == "Kazakhstan"
insist { subject["location"] } == nil
insist { subject["hobby"] } == "Cloud"
insist { subject["status"] } == "200"
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
insist { subject["%{hmm}"] } == nil
end
end
describe "whitelist field values interpolate" do
config <<-CONFIG
filter {
prune {
whitelist_values => [ "firstname", "^Borat$",
"fullname", "%{firstname} Sagdiyev",
"location", "no no no",
"status", "^2",
"%{firstname}_saying", "%{hobby}.*Active" ]
interpolate => true
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == "Borat"
insist { subject["lastname"] } == "Sagdiyev"
insist { subject["fullname"] } == "Borat Sagdiyev"
insist { subject["country"] } == "Kazakhstan"
insist { subject["location"] } == nil
insist { subject["hobby"] } == "Cloud"
insist { subject["status"] } == "200"
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
insist { subject["%{hmm}"] } == nil
end
end
describe "blacklist field values" do
config <<-CONFIG
filter {
prune {
blacklist_values => [ "firstname", "^Borat$",
"fullname", "%{firstname} Sagdiyev",
"location", "no no no",
"status", "^2",
"%{firstname}_saying", "%{hobby}.*Active" ]
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == nil
insist { subject["lastname"] } == "Sagdiyev"
insist { subject["fullname"] } == "Borat Sagdiyev"
insist { subject["country"] } == "Kazakhstan"
insist { subject["location"] } == "Somethere in Kazakhstan"
insist { subject["hobby"] } == "Cloud"
insist { subject["status"] } == nil
insist { subject["Borat_saying"] } == "Cloud is not ready for enterprise if is not integrate with single server running Active Directory."
insist { subject["%{hmm}"] } == nil
end
end
describe "blacklist field values interpolate" do
config <<-CONFIG
filter {
prune {
blacklist_values => [ "firstname", "^Borat$",
"fullname", "%{firstname} Sagdiyev",
"location", "no no no",
"status", "^2",
"%{firstname}_saying", "%{hobby}.*Active" ]
interpolate => true
}
}
CONFIG
sample "@fields" => {
"firstname" => "Borat",
"lastname" => "Sagdiyev",
"fullname" => "Borat Sagdiyev",
"country" => "Kazakhstan",
"location" => "Somethere in Kazakhstan",
"hobby" => "Cloud",
"status" => "200",
"Borat_saying" => "Cloud is not ready for enterprise if is not integrate with single server running Active Directory.",
"%{hmm}" => "doh"
} do
insist { subject["firstname"] } == nil
insist { subject["lastname"] } == "Sagdiyev"
insist { subject["fullname"] } == nil
insist { subject["country"] } == "Kazakhstan"
insist { subject["location"] } == "Somethere in Kazakhstan"
insist { subject["hobby"] } == "Cloud"
insist { subject["status"] } == nil
insist { subject["Borat_saying"] } == nil
insist { subject["%{hmm}"] } == nil
end
end
end