diff --git a/Gemfile b/Gemfile index 2e74234e4..32a8aff91 100755 --- a/Gemfile +++ b/Gemfile @@ -13,6 +13,7 @@ gem "json" # Ruby license #gem "awesome_print" # MIT License gem "jruby-openssl", :platforms => :jruby # For enabling SSL support, CPL/GPL 2.0 gem "mail" #outputs/email, # License: MIT License +gem "xml-simple" # unknown license: http://xml-simple.rubyforge.org/ gem "minitest" # License: Ruby gem "rack" # License: MIT diff --git a/lib/logstash/filters/xml.rb b/lib/logstash/filters/xml.rb new file mode 100644 index 000000000..f6693f3ef --- /dev/null +++ b/lib/logstash/filters/xml.rb @@ -0,0 +1,127 @@ +require "logstash/filters/base" +require "logstash/namespace" +require "xmlsimple" +require "rexml/document" +include REXML + +# XML filter. Takes a field that contains XML and expands it into +# an actual datastructure. +class LogStash::Filters::Xml < LogStash::Filters::Base + + config_name "xml" + plugin_status "experimental" + + # Config for xml to hash is: + # + # source => dest + # + # XML in the value of the source field will be expanded into a + # datastructure in the "dest" field. Note: if the "dest" field + # already exists, it will be overridden. + config /[A-Za-z0-9_-]+/, :validate => :string + + # xpath will additionally select string values (.to_s on whatever is selected) + # from parsed XML (using each source field defined using the method above) + # and place those values in the destination fields. Configuration: + # + # xpath => [ "xpath-syntax", "destination-field" ] + # + # Values returned by XPath parsring from xpath-synatx will be put in the + # destination field. Multiple values returned will be pushed onto the + # destination field as an array. As such, multiple matches across + # multiple source fields will produce duplicate entries in the field + # + # More on xpath: http://www.w3schools.com/xpath/ + # + # The xpath functions are particularly powerful: + # http://www.w3schools.com/xpath/xpath_functions.asp + # + config :xpath, :validate => :hash, :default => {} + + # By default the filter will store the whole parsed xml in the destination + # field as described above. Setting this to false will prevent that. + config :store_xml, :validate => :boolean, :default => true + + public + def register + @xml = {} + + @config.each do |field, dest| + next if ( RESERVED + ["xpath","store_xml"] ).member?(field) + + @xml[field] = dest + end + end # def register + + public + def filter(event) + return unless filter?(event) + + matched = false + + @logger.debug("Running xml filter", :event => event) + + @xml.each do |key, dest| + if event.fields[key] + if event.fields[key].is_a?(String) + event.fields[key] = [event.fields[key]] + end + + if event.fields[key].length > 1 + @logger.warn("XML filter only works on fields of length 1", + :key => key, :value => event.fields[key]) + next + end + + raw = event.fields[key].first + + # for some reason, an empty string is considered valid XML + next if raw.strip.length == 0 + + if @xpath + begin + doc = Document.new(raw) + rescue => e + event.tags << "_xmlparsefailure" + @logger.warn("Trouble parsing xml with REXML::Document", :key => key, :raw => raw, + :exception => e, :backtrace => e.backtrace) + next + end + + @xpath.each do |xpath_src, xpath_dest| + + XPath.each(doc, xpath_src).each do |value| + # some XPath functions return empty arrays as string + if value.is_a?(Array) + next if value.length == 0 + end + + unless value.nil? + matched = true + event[xpath_dest] ||= [] + event[xpath_dest] << value.to_s + end + end + end + end + + if @store_xml + begin + event[dest] = XmlSimple.xml_in(raw) + matched = true + rescue => e + event.tags << "_xmlparsefailure" + @logger.warn("Trouble parsing xml with XmlSimple", :key => key, :raw => raw, + :exception => e, :backtrace => e.backtrace) + next + end + end + + filter_matched(event) if matched + + end + end + + @logger.debug("Event after xml filter", :event => event) + end # def filter +end # class LogStash::Filters::Xml diff --git a/test/logstash/filters/test_xml.rb b/test/logstash/filters/test_xml.rb new file mode 100644 index 000000000..aba2b2fae --- /dev/null +++ b/test/logstash/filters/test_xml.rb @@ -0,0 +1,103 @@ +require "rubygems" +require File.join(File.dirname(__FILE__), "..", "minitest") + +require "logstash/loadlibs" +require "logstash" +require "logstash/filters" +require "logstash/filters/xml" +require "logstash/event" + +describe LogStash::Filters::Xml do + before do + @filter = LogStash::Filters.from_name("xml", {}) + @typename = "xml" + end + + def config(cfg) + cfg["type"] = @typename + cfg.each_key do |key| + if cfg[key].is_a?(String) + cfg[key] = [cfg[key]] + end + end + + @filter = LogStash::Filters::Xml.new(cfg) + @filter.register + end # def config + + test "parse standard xml" do + config "raw" => "data" + + event = LogStash::Event.new + event.type = @typename + event["raw"] = '' + @filter.filter(event) + assert_equal(event["data"], {"key" => "value"}) + end # parse standard xml + + test "parse xml but do not store" do + config "raw" => "data", + "store_xml" => "false" + + event = LogStash::Event.new + event.type = @typename + event["raw"] = '' + @filter.filter(event) + assert_equal(event["data"], nil) + end # parse xml but do not store + + test "parse xml with array as a value" do + config "raw" => "data" + + event = LogStash::Event.new + event.type = @typename + event["raw"] = 'value1value2' + @filter.filter(event) + assert_equal(event["data"], {"key" => ["value1", "value2"]}) + end # parse xml with array as a value + + test "parse xml with hash as a value" do + config "raw" => "data" + + event = LogStash::Event.new + event.type = @typename + event["raw"] = 'value' + @filter.filter(event) + assert_equal(event["data"], {"key1" => [{"key2" => ["value"]}]}) + end # parse xml with array as a value + + test "bad xml" do + config "raw" => "data" + + event = LogStash::Event.new + event.type = @typename + event["raw"] = ' "data", + "xpath" => [ "/foo/key/text()", "xpath_field" ] + + event = LogStash::Event.new + event.type = @typename + event["raw"] = 'value' + @filter.filter(event) + assert_equal(event["xpath_field"].length, 1) + assert_equal(event["xpath_field"], ["value"]) + end # parse xml and store single value with xpath + + test "parse xml and store mulitple values with xpath" do + config "raw" => "data", + "xpath" => [ "/foo/key/text()", "xpath_field" ] + + event = LogStash::Event.new + event.type = @typename + event["raw"] = 'value1value2' + @filter.filter(event) + assert_equal(event["xpath_field"].length, 2) + assert_equal(event["xpath_field"], ["value1","value2"]) + end # parse xml and store mulitple values with xpath + +end # Test 'xml' filter