diff --git a/CHANGELOG b/CHANGELOG index ca77cd8c3..26155be3e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -16,6 +16,8 @@ warn you if you use it. (LOGSTASH-803) - feature: grok: Adds tag_on_failure setting so you can prevent grok from tagging events on failure. (#328, patch by Neil Prosser) + - new: uaparser: parses user agent strings in to structured data based on + BrowserScope data (#347, patch by Dan Everton) ## outputs - fix bug in mongo output that would fail to load bson_java support diff --git a/Makefile b/Makefile index 1e95017bb..7744a431a 100644 --- a/Makefile +++ b/Makefile @@ -208,6 +208,7 @@ build/flatgems: | build vendor/bundle @# all the gem specs. rsync -av $(VENDOR_DIR)/gems/jruby-openssl-*/lib/shared/jopenssl.jar $@/lib rsync -av $(VENDOR_DIR)/gems/sys-uname-*/lib/unix/ $@/lib + rsync -av $(VENDOR_DIR)/gems/user_agent_parser-*/vendor/ua-parser $@/vendor flatjar-test: GEM_HOME= GEM_PATH= java -jar build/logstash-$(VERSION)-flatjar.jar rspec $(TESTS) @@ -227,7 +228,7 @@ flatjar: build/logstash-$(VERSION)-flatjar.jar build/jar: | build build/flatgems build/monolith $(QUIET)mkdir build/jar $(QUIET)rsync -av --delete build/flatgems/lib/ build/monolith/ build/ruby/ patterns build/jar/ - $(QUIET)rsync -av --delete build/flatgems/data build/jar/ + $(QUIET)rsync -av --delete build/flatgems/data build/flatgems/vendor build/jar/ $(QUIET)(cd lib; rsync -av --delete logstash/web/public ../build/jar/logstash/web/public) $(QUIET)(cd lib; rsync -av --delete logstash/web/views ../build/jar/logstash/web/views) $(QUIET)(cd lib; rsync -av --delete logstash/certs ../build/jar/logstash/certs) diff --git a/lib/logstash/filters/uaparser.rb b/lib/logstash/filters/uaparser.rb index 00804bc4d..cacffaac2 100644 --- a/lib/logstash/filters/uaparser.rb +++ b/lib/logstash/filters/uaparser.rb @@ -1,62 +1,87 @@ require "logstash/filters/base" require "logstash/namespace" +require "tempfile" -# This filter can parse user agent strings in to structured data +# Parse user agent strings into structured data based on BrowserScope data +# +# UserAgent filter, adds information about user agent like family, operating +# system, version, and device +# +# Logstash releases ship with the regexes.yaml database made available from +# ua-parser with an Apache 2.0 license. For more details on ua-parser, see +# . class LogStash::Filters::UAParser < LogStash::Filters::Base config_name "uaparser" plugin_status "experimental" - # The field containing the user agent string to parse - config :field, :validate => :string + # The field containing the user agent string. If this field is an + # array, only the first value will be used. + config :source, :validate => :string, :required => true # The name of the field to assign the UA data hash to - config :container, :validate => :string, :default => "ua" + config :target, :validate => :string, :default => "ua" - # name with full path of BrowserScope YAML file. - # One is shipped with this filter, but you can reference - # updated versions here. See https://github.com/tobie/ua-parser - # for updates - config :regexes_path, :validate => :string + # regexes.yaml file to use + # + # If not specified, this will default to the regexes.yaml that ships + # with logstash. + config :regexes, :validate => :string public def register require 'user_agent_parser' - - if @regexes_path.nil? - @parser = UserAgentParser::Parser.new + if @regexes.nil? + begin + @parser = UserAgentParser::Parser.new() + rescue Exception => e + begin + # Running from a flatjar which has a different layout + jar_path = [__FILE__.split("!").first, "/vendor/ua-parser/regexes.yaml"].join("!") + tmp_file = Tempfile.new('logstash-uaparser-regexes') + tmp_file.write(File.read(jar_path)) + tmp_file.close # this file is reaped when ruby exits + @parser = UserAgentParser::Parser.new(tmp_file.path) + rescue => ex + raise "Failed to cache, due to: #{ex}\n#{ex.backtrace}" + end + end else - @logger.info? and @logger.info "Using regexes from", :regexes_path => @regexes_path - @parser = UserAgentParser::Parser.new(regexes_path) + @logger.info("Using user agent regexes", :regexes => @regexes) + @parser = UserAgentParser::Parser.new(@regexes) end end #def register public def filter(event) return unless filter?(event) + ua_data = nil - result = @parser.parse event[@field] + useragent = event[@source] + useragent = useragent.first if useragent.is_a? Array - ua = {} - ua['name'] = result.name - - if not result.os.nil? - ua['os'] = result.os + begin + ua_data = @parser.parse(useragent) + rescue Exception => e + @logger.error("Uknown error while parsing user agent data", :exception => e, :field => @source, :event => event) end - if not result.device.nil? - ua['device'] = result.device + unless ua_data.nil? + event[@target] = {} if event[@target].nil? + + event[@target]["name"] = ua_data.name + event[@target]["os"] = ua_data.os if not ua_data.os.nil? + event[@target]["device"] = ua_data.device if not ua_data.device.nil? + + if not ua_data.version.nil? + ua_version = ua_data.version + + event[@target]["major"] = ua_version.major + event[@target]["minor"] = ua_version.minor + end + + filter_matched(event) end - if not result.version.nil? - ua_version = result.version - - ua['major'] = ua_version.major - ua['minor'] = ua_version.minor - end - - event[@container] = ua - - filter_matched(event) end # def filter end # class LogStash::Filters::UAParser