Incorporate uaparser feedback, make flatjar work.

* Handle running inside a flatjar. (technique taken from geoip filter)
* Rename field/container to source/target
* Allow target to be an existing hash in which case data is merged
* Better comments
* Allow arrays for input (though only parse the first element)
* s/'/"/ and add parens to function calles
* Changelog entry
This commit is contained in:
Dan Everton 2013-02-06 12:23:39 +10:00
parent 3e70230c55
commit f3fa75a8fd
3 changed files with 61 additions and 33 deletions

View file

@ -16,6 +16,8 @@
warn you if you use it. (LOGSTASH-803)
- feature: grok: Adds tag_on_failure setting so you can prevent grok from
tagging events on failure. (#328, patch by Neil Prosser)
- new: uaparser: parses user agent strings in to structured data based on
BrowserScope data (#347, patch by Dan Everton)
## outputs
- fix bug in mongo output that would fail to load bson_java support

View file

@ -208,6 +208,7 @@ build/flatgems: | build vendor/bundle
@# all the gem specs.
rsync -av $(VENDOR_DIR)/gems/jruby-openssl-*/lib/shared/jopenssl.jar $@/lib
rsync -av $(VENDOR_DIR)/gems/sys-uname-*/lib/unix/ $@/lib
rsync -av $(VENDOR_DIR)/gems/user_agent_parser-*/vendor/ua-parser $@/vendor
flatjar-test:
GEM_HOME= GEM_PATH= java -jar build/logstash-$(VERSION)-flatjar.jar rspec $(TESTS)
@ -227,7 +228,7 @@ flatjar: build/logstash-$(VERSION)-flatjar.jar
build/jar: | build build/flatgems build/monolith
$(QUIET)mkdir build/jar
$(QUIET)rsync -av --delete build/flatgems/lib/ build/monolith/ build/ruby/ patterns build/jar/
$(QUIET)rsync -av --delete build/flatgems/data build/jar/
$(QUIET)rsync -av --delete build/flatgems/data build/flatgems/vendor build/jar/
$(QUIET)(cd lib; rsync -av --delete logstash/web/public ../build/jar/logstash/web/public)
$(QUIET)(cd lib; rsync -av --delete logstash/web/views ../build/jar/logstash/web/views)
$(QUIET)(cd lib; rsync -av --delete logstash/certs ../build/jar/logstash/certs)

View file

@ -1,62 +1,87 @@
require "logstash/filters/base"
require "logstash/namespace"
require "tempfile"
# This filter can parse user agent strings in to structured data
# Parse user agent strings into structured data based on BrowserScope data
#
# UserAgent filter, adds information about user agent like family, operating
# system, version, and device
#
# Logstash releases ship with the regexes.yaml database made available from
# ua-parser with an Apache 2.0 license. For more details on ua-parser, see
# <https://github.com/tobie/ua-parser/>.
class LogStash::Filters::UAParser < LogStash::Filters::Base
config_name "uaparser"
plugin_status "experimental"
# The field containing the user agent string to parse
config :field, :validate => :string
# The field containing the user agent string. If this field is an
# array, only the first value will be used.
config :source, :validate => :string, :required => true
# The name of the field to assign the UA data hash to
config :container, :validate => :string, :default => "ua"
config :target, :validate => :string, :default => "ua"
# name with full path of BrowserScope YAML file.
# One is shipped with this filter, but you can reference
# updated versions here. See https://github.com/tobie/ua-parser
# for updates
config :regexes_path, :validate => :string
# regexes.yaml file to use
#
# If not specified, this will default to the regexes.yaml that ships
# with logstash.
config :regexes, :validate => :string
public
def register
require 'user_agent_parser'
if @regexes_path.nil?
@parser = UserAgentParser::Parser.new
if @regexes.nil?
begin
@parser = UserAgentParser::Parser.new()
rescue Exception => e
begin
# Running from a flatjar which has a different layout
jar_path = [__FILE__.split("!").first, "/vendor/ua-parser/regexes.yaml"].join("!")
tmp_file = Tempfile.new('logstash-uaparser-regexes')
tmp_file.write(File.read(jar_path))
tmp_file.close # this file is reaped when ruby exits
@parser = UserAgentParser::Parser.new(tmp_file.path)
rescue => ex
raise "Failed to cache, due to: #{ex}\n#{ex.backtrace}"
end
end
else
@logger.info? and @logger.info "Using regexes from", :regexes_path => @regexes_path
@parser = UserAgentParser::Parser.new(regexes_path)
@logger.info("Using user agent regexes", :regexes => @regexes)
@parser = UserAgentParser::Parser.new(@regexes)
end
end #def register
public
def filter(event)
return unless filter?(event)
ua_data = nil
result = @parser.parse event[@field]
useragent = event[@source]
useragent = useragent.first if useragent.is_a? Array
ua = {}
ua['name'] = result.name
if not result.os.nil?
ua['os'] = result.os
begin
ua_data = @parser.parse(useragent)
rescue Exception => e
@logger.error("Uknown error while parsing user agent data", :exception => e, :field => @source, :event => event)
end
if not result.device.nil?
ua['device'] = result.device
unless ua_data.nil?
event[@target] = {} if event[@target].nil?
event[@target]["name"] = ua_data.name
event[@target]["os"] = ua_data.os if not ua_data.os.nil?
event[@target]["device"] = ua_data.device if not ua_data.device.nil?
if not ua_data.version.nil?
ua_version = ua_data.version
event[@target]["major"] = ua_version.major
event[@target]["minor"] = ua_version.minor
end
filter_matched(event)
end
if not result.version.nil?
ua_version = result.version
ua['major'] = ua_version.major
ua['minor'] = ua_version.minor
end
event[@container] = ua
filter_matched(event)
end # def filter
end # class LogStash::Filters::UAParser