Initial commit on anonymizing filter

Allows for selecting different algorithms
This commit is contained in:
Richard Pijnenburg 2012-12-20 02:05:25 +01:00
parent f7eb132544
commit 141f6fd17b
2 changed files with 214 additions and 0 deletions

View file

@ -0,0 +1,63 @@
require "logstash/filters/base"
require "logstash/namespace"
# Anonymize fields using by replacing values with a consistent hash.
class LogStash::Filters::Anonymize < LogStash::Filters::Base
config_name "anonymize"
plugin_status "experimental"
# The fields to be anonymized
config :fields, :validate => :array, :required => true
# Hashing key
config :key, :validate => :string, :required => true
# digest type
config :algorithm, :validate => ['SHA', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'MD4', 'MD5'], :required => true, :default => 'SHA1'
public
def register
# require any library
require 'openssl'
end # def register
public
def filter(event)
return unless filter?(event)
@fields.each do |field|
event[field] = anonymize(event[field])
end
end # def filter
private
def anonymize(data)
digest = algorithm()
OpenSSL::HMAC.hexdigest(digest, @key, data)
end
private
def algorithm
case @algorithm
when 'SHA'
return OpenSSL::Digest::SHA.new
when 'SHA1'
return OpenSSL::Digest::SHA1.new
when 'SHA224'
return OpenSSL::Digest::SHA224.new
when 'SHA256'
return OpenSSL::Digest::SHA256.new
when 'SHA384'
return OpenSSL::Digest::SHA384.new
when 'SHA512'
return OpenSSL::Digest::SHA512.new
when 'MD4'
return OpenSSL::Digest::MD4.new
when 'MD5'
return OpenSSL::Digest::MD5.new
else
@logger.error("Unknown algorithm")
end
end
end # class LogStash::Filters::Anonymize

151
spec/filters/anonymize.rb Normal file
View file

@ -0,0 +1,151 @@
require "test_utils"
require "logstash/filters/anonymize"
describe LogStash::Filters::Anonymize do
extend LogStash::RSpec
describe "anonymize string with SHA alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'SHA'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "0d01b2191194d261fa1a2e7c18a38d44953ab4e2"
end
end
describe "anonymize string with SHA1 alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'SHA1'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "fdc60acc4773dc5ac569ffb78fcb93c9630797f4"
end
end
describe "anonymize string with SHA224 alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'SHA224'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "5744bbcc4f64acb6a805b7fee3013a8958cc8782d3fb0fb318cec915"
end
end
describe "anonymize string with SHA256 alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'SHA256'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "345bec3eff242d53b568916c2610b3e393d885d6b96d643f38494fd74bf4a9ca"
end
end
describe "anonymize string with SHA384 alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'SHA384'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "22d4c0e8c4fbcdc4887d2038fca7650f0e2e0e2457ff41c06eb2a980dded6749561c814fe182aff93e2538d18593947a"
end
end
describe "anonymize string with SHA512 alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'SHA512'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "11c19b326936c08d6c50a3c847d883e5a1362e6a64dd55201a25f2c1ac1b673f7d8bf15b8f112a4978276d573275e3b14166e17246f670c2a539401c5bfdace8"
end
end
describe "anonymize string with MD4 alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'MD4'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "0845cb571ab3646e51a07bcabf05e33d"
end
end
describe "anonymize string with MD5 alogrithm" do
# The logstash config goes here.
# At this time, only filters are supported.
config <<-CONFIG
filter {
anonymize {
fields => ["clientip"]
key => "longencryptionkey"
algorithm => 'MD5'
}
}
CONFIG
sample "@fields" => {"clientip" => "123.123.123.123"} do
insist { subject["clientip"] } == "9336c879e305c9604a3843fc3e75948f"
end
end
end