From d96ac92295a98da4a60a18f8a44e912bfe5b670b Mon Sep 17 00:00:00 2001 From: Rui Alves Date: Thu, 9 Aug 2012 00:14:46 +0100 Subject: [PATCH 001/105] Committer: Rui Alves On branch master Changes to be committed: new file: lib/logstash/outputs/syslog.rb --- lib/logstash/outputs/syslog.rb | 141 +++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 lib/logstash/outputs/syslog.rb diff --git a/lib/logstash/outputs/syslog.rb b/lib/logstash/outputs/syslog.rb new file mode 100644 index 000000000..c099cc970 --- /dev/null +++ b/lib/logstash/outputs/syslog.rb @@ -0,0 +1,141 @@ +require "logstash/outputs/base" +require "logstash/namespace" +require "date" + +# Send events to syslog server +# You can send messages compliant with RFC3164 or RFC5424 +# UDP or TCP syslog transport is supported + +class LogStash::Outputs::Syslog < LogStash::Outputs::Base + + config_name "syslog" + plugin_status "beta" + + FACILITY_LABELS = [ + "kernel", + "user-level", + "mail", + "daemon", + "security/authorization", + "syslogd", + "line printer", + "network news", + "uucp", + "clock", + "security/authorization", + "ftp", + "ntp", + "log audit", + "log alert", + "clock", + "local0", + "local1", + "local2", + "local3", + "local4", + "local5", + "local6", + "local7", + ] + + SEVERITY_LABELS = [ + "emergency", + "alert", + "critical", + "error", + "warning", + "notice", + "informational", + "debug", + ] + + # syslog server address to connect to + config :host, :validate => :string, :required => true + + # syslog server port to connect to + config :port, :validate => :number, :required => true + + # syslog server protocol. you can choose between udp and tcp + config :protocol, :validate => ["tcp", "udp"], :default => "udp" + + # facility label for syslog message + config :facility, :validate => FACILITY_LABELS, :required => true + + # severity label for syslog message + config :severity, :validate => SEVERITY_LABELS, :required => true + + # source host for syslog message + config :sourcehost, :validate => :string, :default => "%{@source_host}" + + # timestamp for syslog message + config :timestamp, :validate => :string, :default => "%{@timestamp}" + + # application name for syslog message + config :appname, :validate => :string, :default => "LOGSTASH" + + # process id for syslog message + config :procid, :validate => :string, :default => "-" + + # message id for syslog message + config :msgid, :validate => :string, :default => "-" + + # syslog message format: you can choose between rfc3164 or rfc5424 + config :rfc, :validate => ["rfc3164", "rfc5424"], :default => "rfc3164" + + + public + def register + @client_socket = nil + end + + private + def udp? + @protocol == "udp" + end + + private + def rfc3164? + @rfc == "rfc3164" + end + + private + def connect + if udp? + @client_socket = UDPSocket.new + @client_socket.connect(@host, @port) + else + @client_socket = TCPSocket.new(@host, @port) + end + end + + public + def receive(event) + return unless output?(event) + + sourcehost = event.sprintf(@sourcehost) + + timestamp = event.sprintf(@timestamp) + + facility_code = FACILITY_LABELS.index(@facility) + + severity_code = SEVERITY_LABELS.index(@severity) + + priority = (facility_code * 8) + severity_code + + if rfc3164? + syslog_msg = "<"+priority.to_s()+">"+timestamp+" "+sourcehost+" "+@appname+"["+@procid+"]: "+event.message + else + syslog_msg = "<"+priority.to_s()+">1 "+timestamp+" "+sourcehost+" "+@appname+" "+@procid+" "+@msgid+" - "+event.message + end + + begin + connect unless @client_socket + @client_socket.write(syslog_msg) + @client_socket.write("\n") + rescue => e + @logger.warn(@protocol+" output exception", :host => @host, :port => @port, + :exception => e, :backtrace => e.backtrace) + @client_socket = nil + end + end +end From 73e85132ad98d5bd6317216e07eda9b9ab30001a Mon Sep 17 00:00:00 2001 From: ruckalvnet Date: Thu, 16 Aug 2012 13:13:41 +0200 Subject: [PATCH 002/105] Update lib/logstash/outputs/syslog.rb changes to timestamp format in syslog message when using rfc3164. --- lib/logstash/outputs/syslog.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/syslog.rb b/lib/logstash/outputs/syslog.rb index c099cc970..cb97f0f5a 100644 --- a/lib/logstash/outputs/syslog.rb +++ b/lib/logstash/outputs/syslog.rb @@ -114,8 +114,6 @@ class LogStash::Outputs::Syslog < LogStash::Outputs::Base sourcehost = event.sprintf(@sourcehost) - timestamp = event.sprintf(@timestamp) - facility_code = FACILITY_LABELS.index(@facility) severity_code = SEVERITY_LABELS.index(@severity) @@ -123,8 +121,10 @@ class LogStash::Outputs::Syslog < LogStash::Outputs::Base priority = (facility_code * 8) + severity_code if rfc3164? + timestamp = DateTime.iso8601(event.sprintf(@timestamp)).strftime("%b %e %H:%M:%S") syslog_msg = "<"+priority.to_s()+">"+timestamp+" "+sourcehost+" "+@appname+"["+@procid+"]: "+event.message else + timestamp = DateTime.iso8601(event.sprintf(@timestamp)).rfc3339() syslog_msg = "<"+priority.to_s()+">1 "+timestamp+" "+sourcehost+" "+@appname+" "+@procid+" "+@msgid+" - "+event.message end From 82ae95233879bf7a919a6f46b1422594d8f59de2 Mon Sep 17 00:00:00 2001 From: Nuno Valente Date: Wed, 24 Oct 2012 23:29:36 +0100 Subject: [PATCH 003/105] Fixed Stomp input plugin subscription method. The plugin was ending prematurely. --- lib/logstash/inputs/stomp.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/logstash/inputs/stomp.rb b/lib/logstash/inputs/stomp.rb index a15626399..6c13dbef5 100644 --- a/lib/logstash/inputs/stomp.rb +++ b/lib/logstash/inputs/stomp.rb @@ -58,6 +58,13 @@ class LogStash::Inputs::Stomp < LogStash::Inputs::Base e = to_event(msg.body, @stomp_url) @output_queue << e if e end + #In the event that there is only Stomp input plugin instances + #the process ends prematurely. The above code runs, and return + #the flow control to the 'run' method below. After that, the + #method "run_input" from agent.rb marks 'done' as 'true' and calls + #'finish' over the Stomp plugin instance. + #'Sleeping' the plugin leves the instance alive. + sleep end public From fe1657a23c2b1eddc49302b8fdceb65ff4dd98e4 Mon Sep 17 00:00:00 2001 From: Tomas Doran Date: Fri, 26 Oct 2012 15:55:00 +0100 Subject: [PATCH 004/105] Fix @source_host when it comes from json_event This is due to the change 1fc568e07d957ab9b629eab7550a1ae190db3c8a which tries to get the @source value set sanely for json_event events which don't have one. However Logstash::Event unilaterally resets the @source_host when @source is set, which is not what we want, at all. I have changed this so that @source_host is only set from source in the case where @source_host was previously unset --- lib/logstash/event.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index 09b314981..f5a60648d 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -15,6 +15,7 @@ class LogStash::Event @cancelled = false @data = { + "@source_host" => false, "@source" => "unknown", "@tags" => [], "@fields" => {}, @@ -78,16 +79,19 @@ class LogStash::Event public def source; @data["@source"]; end # def source - def source=(val) + def source=(val) uri = URI.parse(val) rescue nil val = uri if uri if val.is_a?(URI) @data["@source"] = val.to_s - @data["@source_host"] = val.host + maybe_new_source_host = val.host @data["@source_path"] = val.path else @data["@source"] = val - @data["@source_host"] = val + maybe_new_source_host = val + end + if !@data["@source_host"] + @data["@source_host"] = maybe_new_source_host end end # def source= From 5faee13fdc59f562c9fc7c6df8369dade898d093 Mon Sep 17 00:00:00 2001 From: theduke Date: Sun, 25 Nov 2012 03:12:10 +0100 Subject: [PATCH 005/105] Added a new drupal_dblog input (experimental). --- lib/logstash/inputs/drupal_dblog.rb | 200 ++++++++++++++++++++++++++++ logstash.gemspec | 2 + 2 files changed, 202 insertions(+) create mode 100644 lib/logstash/inputs/drupal_dblog.rb diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb new file mode 100644 index 000000000..03d7e54ff --- /dev/null +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -0,0 +1,200 @@ +require "date" +require "logstash/inputs/base" +require "logstash/namespace" +require "logstash/time" # should really use the filters/date.rb bits + +require "mysql2" +require "php_serialize" + +# Read messages as events over the network via udp. +# +class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base + config_name "drupal_dblog" + plugin_status "experimental" + + + + # The database host + config :host, :validate => :string, :required => true + + # Mysql database port + config :port, :validate => :number, :default => 3306 + + # Database name + config :database, :validate => :string, :required => true + + # Database password + config :user, :validate => :string, :required => true + + # Database password + config :password, :validate => :string, :required => true + + # Name of the Drupal site, used for event source + config :sitename, :validate => :string, :default => "" + + # Add the username in addition to the user id to the event + config :add_usernames, :validate => :boolean, :default => false + + # Time between checks in minutes + config :interval, :validate => :number, :default => 10 + + public + def initialize(params) + super + @format = "json_event" + @debug = true + end # def initialize + + public + def register + + end # def register + + public + def run(output_queue) + @logger.info("Initializing drupal_dblog", :database => @database) + + loop do + @logger.debug("Starting to fetch new watchdog entries") + start = Time.now.to_i + check_database(output_queue) + + timeTaken = Time.now.to_i - start + sleepTime = @interval * 1 - timeTaken + @logger.debug("Fetched all new watchdog entries. Sleeping for " + sleepTime.to_s + " seconds") + sleep(sleepTime) + end # loop + end # def run + + private + def check_database(output_queue) + + begin + # connect to the MySQL server + @client = Mysql2::Client.new( + :host => @host, + :port => @port, + :username => @user, + :password => @password, + :database => @database + ) + + # If no source is set, try to retrieve site name. + update_sitename + + @usermap = @add_usernames ? get_usermap : nil + + # Retrieve last pulled watchdog entry id + initialLastWid = get_last_wid + lastWid = initialLastWid ? initialLastWid : "0" + + # Fetch new entries, and create the event + results = @client.query('SELECT * from watchdog WHERE wid > ' + initialLastWid + " ORDER BY wid asc") + results.each do |row| + event = build_event(row) + if event + output_queue << to_event(JSON.dump(event), "") + lastWid = row['wid'].to_s + end + end + + set_last_wid(lastWid, initialLastWid == false) + rescue Mysql2::Error => e + @logger.info("Mysql error: ", :error => e.error) + end # begin + + # Close connection + @client.close + end # def get_net_entries + + private + def update_sitename + if @sitename == "" + result = @client.query('SELECT value FROM variable WHERE name="site_name"') + if result.first() + @sitename = PHP.unserialize(result.first()['value']) + end + end + end + + private + def get_last_wid + result = @client.query('SELECT value FROM variable WHERE name="logstash_last_wid"') + lastWid = false + + if result.count() > 0 + tmp = result.first()["value"].gsub("i:", "").gsub(";", "") + lastWid = tmp.to_i.to_s == tmp ? tmp : "0" + end + + return lastWid + end + + private + def set_last_wid(wid, insert) + # Update last import wid variable + if insert + # Does not exist yet, so insert + @client.query('INSERT INTO variable (name, value) VALUES("logstash_last_wid", "' + wid + '")') + else + @client.query('UPDATE variable SET value="' + wid + '" WHERE name="logstash_last_wid"') + end + end + + private + def get_usermap + map = {} + + @client.query("SELECT uid, name FROM users").each do |row| + map[row["uid"]] = row["name"] + end + + map[0] = "guest" + return map + end + + private + def build_event(row) + # Convert unix timestamp + timestamp = Time.at(row["timestamp"]).to_datetime.iso8601 + + msg = row["message"] + vars = {} + + # Unserialize the variables, and construct the message + if row['variables'] != 'N;' + vars = PHP.unserialize(row["variables"]) + + if vars.is_a?(Hash) + vars.each_pair do |k, v| + if msg.scan(k).length() > 0 + msg = msg.gsub(k.to_s, v.to_s) + else + # If not inside the message, add var as an additional field + row["variable_" + k] = v + end + end + end + end + + row.delete("message") + row.delete("variables") + row.delete("timestamp") + + if @add_usernames and @usermap.has_key?(row["uid"]) + row["user"] = @usermap[row["uid"]] + end + + entry = { + "@timestamp" => timestamp, + "@tags" => [], + "@type" => "watchdog", + "@source" => @sitename, + "@fields" => row, + "@message" => msg + } + + return entry + end # def build_event + +end # class LogStash::Inputs::DrupalDblog diff --git a/logstash.gemspec b/logstash.gemspec index f7a93fe32..05610aaa8 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -56,6 +56,8 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jls-lumberjack", ["0.0.4"] gem.add_runtime_dependency "geoip", [">= 1.1.0"] gem.add_runtime_dependency "beefcake", "0.3.7" + gem.add_runtime_dependency "mysql2", "0.3.11" + gem.add_runtime_dependency "php-serialize", "1.1.0" if RUBY_PLATFORM == 'java' gem.platform = RUBY_PLATFORM From 422776b76b96dab67c7511376b7bae2601d61e12 Mon Sep 17 00:00:00 2001 From: theduke Date: Sun, 25 Nov 2012 03:15:56 +0100 Subject: [PATCH 006/105] Formatting and comments. --- lib/logstash/inputs/drupal_dblog.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 03d7e54ff..693fa980a 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -2,18 +2,18 @@ require "date" require "logstash/inputs/base" require "logstash/namespace" require "logstash/time" # should really use the filters/date.rb bits - require "mysql2" require "php_serialize" -# Read messages as events over the network via udp. +# Retrieve events from a Drupal installation with DBlog enabled. +# +# To avoid pulling the same watchdog entry twice, the last pulled wid +# is saved as a variable in the Drupal database. # class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base config_name "drupal_dblog" plugin_status "experimental" - - # The database host config :host, :validate => :string, :required => true From 0df75f00846631079c71c490ec66dd64ba7254a7 Mon Sep 17 00:00:00 2001 From: theduke Date: Sun, 25 Nov 2012 03:31:39 +0100 Subject: [PATCH 007/105] Fixed sleep time and sitename config --- lib/logstash/inputs/drupal_dblog.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 693fa980a..132fb8d1d 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -60,7 +60,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base check_database(output_queue) timeTaken = Time.now.to_i - start - sleepTime = @interval * 1 - timeTaken + sleepTime = @interval * 60 - timeTaken @logger.debug("Fetched all new watchdog entries. Sleeping for " + sleepTime.to_s + " seconds") sleep(sleepTime) end # loop @@ -93,7 +93,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base results.each do |row| event = build_event(row) if event - output_queue << to_event(JSON.dump(event), "") + output_queue << to_event(JSON.dump(event), @sitename) lastWid = row['wid'].to_s end end From 9048aba3292e872bc5bafb3ee262035a1ff0dbe8 Mon Sep 17 00:00:00 2001 From: theduke Date: Mon, 26 Nov 2012 00:28:05 +0100 Subject: [PATCH 008/105] Added jRuby support. --- lib/logstash/inputs/drupal_dblog.rb | 78 ++++++++++++++++++++++++++--- logstash.gemspec | 3 +- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 132fb8d1d..e4daee98c 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -2,9 +2,55 @@ require "date" require "logstash/inputs/base" require "logstash/namespace" require "logstash/time" # should really use the filters/date.rb bits -require "mysql2" require "php_serialize" +if RUBY_PLATFORM != 'java' + require "mysql2" +else + require "java" + require "rubygems" + require "jdbc/mysql" + + + include_class "com.mysql.jdbc.Driver" + + class LogStash::Inputs::DrupalDblog::JdbcMysql + def initialize(host, username, password, database, port = nil) + port ||= 3306 + + address = "jdbc:mysql://#{host}:#{port}/#{database}" + @connection = java.sql.DriverManager.getConnection(address, username, password) + end + + def query sql + resultSet = @connection.createStatement.executeQuery sql + + meta = resultSet.getMetaData + column_count = meta.getColumnCount + + rows = [] + + while resultSet.next + res = {} + + (1..column_count).each do |i| + name = meta.getColumnName i + case meta.getColumnType i + when java.sql.Types::INTEGER + res[name] = resultSet.getInt name + else + res[name] = resultSet.getString name + end + end + + rows << res + end + + return rows + end + end +end + # Retrieve events from a Drupal installation with DBlog enabled. # # To avoid pulling the same watchdog entry twice, the last pulled wid @@ -66,18 +112,34 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base end # loop end # def run + private + def get_client + + if RUBY_PLATFORM == 'java' + @client = LogStash::Inputs::DrupalDblog::JdbcMysql.new( + :host => @host, + :port => @port, + :username => @user, + :password => @password, + :database => @database + ) + else + @client = Mysql2::Client.new( + :host => @host, + :port => @port, + :username => @user, + :password => @password, + :database => @database + ) + end + end + private def check_database(output_queue) begin # connect to the MySQL server - @client = Mysql2::Client.new( - :host => @host, - :port => @port, - :username => @user, - :password => @password, - :database => @database - ) + get_client # If no source is set, try to retrieve site name. update_sitename diff --git a/logstash.gemspec b/logstash.gemspec index 05610aaa8..73a5a20c0 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -56,7 +56,6 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jls-lumberjack", ["0.0.4"] gem.add_runtime_dependency "geoip", [">= 1.1.0"] gem.add_runtime_dependency "beefcake", "0.3.7" - gem.add_runtime_dependency "mysql2", "0.3.11" gem.add_runtime_dependency "php-serialize", "1.1.0" if RUBY_PLATFORM == 'java' @@ -65,8 +64,10 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jruby-httpclient" gem.add_runtime_dependency "jruby-openssl" gem.add_runtime_dependency "jruby-win32ole" + gem.add_runtime_dependency "jdbc-mysql" else gem.add_runtime_dependency "excon" + gem.add_runtime_dependency "mysql2", "0.3.11" end if RUBY_VERSION >= '1.9.1' From 22a86938cf31090f67d5898e10b610cdb6b21484 Mon Sep 17 00:00:00 2001 From: theduke Date: Mon, 26 Nov 2012 01:03:41 +0100 Subject: [PATCH 009/105] Finishing jruby support. --- lib/logstash/inputs/drupal_dblog.rb | 40 +++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index e4daee98c..69fb2e607 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -14,7 +14,7 @@ else include_class "com.mysql.jdbc.Driver" - class LogStash::Inputs::DrupalDblog::JdbcMysql + class JdbcMysql def initialize(host, username, password, database, port = nil) port ||= 3306 @@ -23,7 +23,16 @@ else end def query sql - resultSet = @connection.createStatement.executeQuery sql + if sql.downcase.scan('select').length > 0 + return select(sql) + else + return update(sql) + end + end + + def select sql + stmt = @connection.createStatement + resultSet = stmt.executeQuery sql meta = resultSet.getMetaData column_count = meta.getColumnCount @@ -46,8 +55,19 @@ else rows << res end + stmt.close return rows end + + def update sql + stmt = @connection.createStatement + stmt.execute_update sql + stmt.close + end + + def close + @connection.close + end end end @@ -116,12 +136,12 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base def get_client if RUBY_PLATFORM == 'java' - @client = LogStash::Inputs::DrupalDblog::JdbcMysql.new( - :host => @host, - :port => @port, - :username => @user, - :password => @password, - :database => @database + @client = JdbcMysql.new( + @host, + @user, + @password, + @database, + @port ) else @client = Mysql2::Client.new( @@ -161,8 +181,8 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base end set_last_wid(lastWid, initialLastWid == false) - rescue Mysql2::Error => e - @logger.info("Mysql error: ", :error => e.error) + rescue Exception => e + @logger.info("Mysql error: ", :error => e.message) end # begin # Close connection From 5cb06701255af4eed0185f2aa3e7830bec74b584 Mon Sep 17 00:00:00 2001 From: theduke Date: Mon, 26 Nov 2012 01:20:10 +0100 Subject: [PATCH 010/105] Removed unneccessary version numbers from gems. --- logstash.gemspec | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/logstash.gemspec b/logstash.gemspec index 73a5a20c0..3d8c1a2a8 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -56,7 +56,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jls-lumberjack", ["0.0.4"] gem.add_runtime_dependency "geoip", [">= 1.1.0"] gem.add_runtime_dependency "beefcake", "0.3.7" - gem.add_runtime_dependency "php-serialize", "1.1.0" + gem.add_runtime_dependency "php-serialize" if RUBY_PLATFORM == 'java' gem.platform = RUBY_PLATFORM @@ -67,7 +67,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jdbc-mysql" else gem.add_runtime_dependency "excon" - gem.add_runtime_dependency "mysql2", "0.3.11" + gem.add_runtime_dependency "mysql2" end if RUBY_VERSION >= '1.9.1' From 6cf83e9e7c28a8e88959bff73ca2a5d619f182e7 Mon Sep 17 00:00:00 2001 From: ruckalvnet Date: Mon, 26 Nov 2012 23:43:19 +0000 Subject: [PATCH 011/105] Update lib/logstash/outputs/syslog.rb --- lib/logstash/outputs/syslog.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/logstash/outputs/syslog.rb b/lib/logstash/outputs/syslog.rb index cb97f0f5a..5e2781c1e 100644 --- a/lib/logstash/outputs/syslog.rb +++ b/lib/logstash/outputs/syslog.rb @@ -130,12 +130,11 @@ class LogStash::Outputs::Syslog < LogStash::Outputs::Base begin connect unless @client_socket - @client_socket.write(syslog_msg) - @client_socket.write("\n") + @client_socket.write(syslog_msg + "\n") rescue => e @logger.warn(@protocol+" output exception", :host => @host, :port => @port, :exception => e, :backtrace => e.backtrace) - @client_socket = nil + @client_socket.close end end end From ed02bbd4eafe17fb084642f7f9fefd8f26ec046e Mon Sep 17 00:00:00 2001 From: theduke Date: Tue, 27 Nov 2012 23:56:32 +0100 Subject: [PATCH 012/105] Rewrote drupal_dblog to support multiple databases --- lib/logstash/inputs/drupal_dblog.rb | 130 +++++++++++++++++++--------- 1 file changed, 88 insertions(+), 42 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 69fb2e607..6f655b706 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -15,6 +15,7 @@ else include_class "com.mysql.jdbc.Driver" class JdbcMysql + def initialize(host, username, password, database, port = nil) port ||= 3306 @@ -80,23 +81,9 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base config_name "drupal_dblog" plugin_status "experimental" - # The database host - config :host, :validate => :string, :required => true + config :databases, :validate => :hash - # Mysql database port - config :port, :validate => :number, :default => 3306 - - # Database name - config :database, :validate => :string, :required => true - - # Database password - config :user, :validate => :string, :required => true - - # Database password - config :password, :validate => :string, :required => true - - # Name of the Drupal site, used for event source - config :sitename, :validate => :string, :default => "" + config :type, :validate => :string, :default => 'watchdog' # Add the username in addition to the user id to the event config :add_usernames, :validate => :boolean, :default => false @@ -113,53 +100,112 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base public def register - end # def register + public + def config_init(params) + super + + dbs = {} + valid = true + + @databases.each do |name, rawUri| + uri = URI(rawUri) + + dbs[name] = { + "scheme" => uri.scheme, + "host" => uri.host, + "user" => uri.user, + "password" => uri.password, + "database" => uri.path.sub('/', ''), + "port" => uri.port.to_i + } + + if not ( + uri.scheme and not uri.scheme.empty?\ + and uri.host and not uri.host.empty?\ + and uri.user and not uri.user.empty?\ + and uri.password\ + and uri.path and not uri.path.sub('/', '').empty? + ) + @logger.error("Drupal DBLog: Invalid database URI for #{name} : #{rawUri}") + valid = false + end + if not uri.scheme == 'mysql' + @logger.error("Drupal DBLog: Only mysql databases are supported.") + valid = false + end + end + + if not valid + @logger.error("Config validation failed.") + exit 1 + end + + @databases = dbs + end #def config_init + public def run(output_queue) - @logger.info("Initializing drupal_dblog", :database => @database) + print "LALALLA" + @logger.info("Initializing drupal_dblog") loop do - @logger.debug("Starting to fetch new watchdog entries") + @logger.debug("Drupal DBLog: Starting to fetch new watchdog entries") start = Time.now.to_i - check_database(output_queue) + + @databases.each do |name, db| + @logger.debug("Drupal DBLog: Checking database #{name}") + check_database(output_queue, db) + end timeTaken = Time.now.to_i - start + @logger.debug("Drupal DBLog: Fetched all new watchdog entries in #{timeTaken} seconds") + + # If fetching of all databases took less time than the interval, + # sleep a bit. sleepTime = @interval * 60 - timeTaken - @logger.debug("Fetched all new watchdog entries. Sleeping for " + sleepTime.to_s + " seconds") - sleep(sleepTime) + if sleepTime > 0 + @logger.debug("Drupal DBLog: Sleeping for #{sleepTime} seconds") + sleep(sleepTime) + end end # loop end # def run private - def get_client + def initialize_client(db) + if db["scheme"] == 'mysql' - if RUBY_PLATFORM == 'java' - @client = JdbcMysql.new( - @host, - @user, - @password, - @database, - @port - ) - else - @client = Mysql2::Client.new( - :host => @host, - :port => @port, - :username => @user, - :password => @password, - :database => @database - ) + if not db["port"] > 0 + db["port"] = 3306 + end + + if RUBY_PLATFORM == 'java' + @client = JdbcMysql.new( + db["host"], + db["user"], + db["password"], + db["database"], + db["port"] + ) + else + @client = Mysql2::Client.new( + :host => db["host"], + :port => db["port"], + :username => db["user"], + :password => db["password"], + :database => db["database"] + ) + end end - end + end #def get_client private - def check_database(output_queue) + def check_database(output_queue, db) begin # connect to the MySQL server - get_client + initialize_client(db) # If no source is set, try to retrieve site name. update_sitename From cb0a99ff7370d15b04ad9397f780aa9bba64a3aa Mon Sep 17 00:00:00 2001 From: theduke Date: Tue, 27 Nov 2012 23:58:48 +0100 Subject: [PATCH 013/105] Removed debug msg and replaced include_class (deprecated) with java_import --- lib/logstash/inputs/drupal_dblog.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 6f655b706..e05ad195c 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -12,7 +12,7 @@ else require "jdbc/mysql" - include_class "com.mysql.jdbc.Driver" + java_import "com.mysql.jdbc.Driver" class JdbcMysql @@ -147,7 +147,6 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base public def run(output_queue) - print "LALALLA" @logger.info("Initializing drupal_dblog") loop do From deb507b506c5c50265c23f140772d828afe3352c Mon Sep 17 00:00:00 2001 From: theduke Date: Wed, 28 Nov 2012 00:02:13 +0100 Subject: [PATCH 014/105] Added gem comments. --- logstash.gemspec | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/logstash.gemspec b/logstash.gemspec index 3d8c1a2a8..bdd2e7457 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -56,7 +56,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jls-lumberjack", ["0.0.4"] gem.add_runtime_dependency "geoip", [">= 1.1.0"] gem.add_runtime_dependency "beefcake", "0.3.7" - gem.add_runtime_dependency "php-serialize" + gem.add_runtime_dependency "php-serialize" # For input drupal_dblog if RUBY_PLATFORM == 'java' gem.platform = RUBY_PLATFORM @@ -64,10 +64,10 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "jruby-httpclient" gem.add_runtime_dependency "jruby-openssl" gem.add_runtime_dependency "jruby-win32ole" - gem.add_runtime_dependency "jdbc-mysql" + gem.add_runtime_dependency "jdbc-mysql" # For input drupal_dblog else gem.add_runtime_dependency "excon" - gem.add_runtime_dependency "mysql2" + gem.add_runtime_dependency "mysql2" # For input drupal_dblog end if RUBY_VERSION >= '1.9.1' From 27240f97cd94ec072eb6f3713a2eeb25d4d4bcfa Mon Sep 17 00:00:00 2001 From: theduke Date: Wed, 28 Nov 2012 00:48:55 +0100 Subject: [PATCH 015/105] Removed debug flag and renamed a class. --- lib/logstash/inputs/drupal_dblog.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index e05ad195c..6e41e5e27 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -14,7 +14,7 @@ else java_import "com.mysql.jdbc.Driver" - class JdbcMysql + class LogStash::DrupalDblogJavaMysqlConnection def initialize(host, username, password, database, port = nil) port ||= 3306 @@ -95,7 +95,6 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base def initialize(params) super @format = "json_event" - @debug = true end # def initialize public @@ -180,7 +179,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base end if RUBY_PLATFORM == 'java' - @client = JdbcMysql.new( + @client = LogStash::DrupalDblogJavaMysqlConnection.new( db["host"], db["user"], db["password"], From cdfabe4f2efcb42ab18ce6d5842767ccafa27cab Mon Sep 17 00:00:00 2001 From: theduke Date: Wed, 28 Nov 2012 00:55:41 +0100 Subject: [PATCH 016/105] Drupal Dblog: Fixing sitename for new multi-db structure --- lib/logstash/inputs/drupal_dblog.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 6e41e5e27..02325e269 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -112,6 +112,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base uri = URI(rawUri) dbs[name] = { + "site" => name, "scheme" => uri.scheme, "host" => uri.host, "user" => uri.user, @@ -205,8 +206,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base # connect to the MySQL server initialize_client(db) - # If no source is set, try to retrieve site name. - update_sitename + @sitename = db["site"] @usermap = @add_usernames ? get_usermap : nil From e31c518100fa0ff3e5678396cae8361c7cbbe166 Mon Sep 17 00:00:00 2001 From: theduke Date: Wed, 28 Nov 2012 01:01:57 +0100 Subject: [PATCH 017/105] Added some comments. --- lib/logstash/inputs/drupal_dblog.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 02325e269..e8b0258ed 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -11,9 +11,9 @@ else require "rubygems" require "jdbc/mysql" - java_import "com.mysql.jdbc.Driver" + # For JRuby, we need to supply a Connection class with an API like mysql2 class LogStash::DrupalDblogJavaMysqlConnection def initialize(host, username, password, database, port = nil) @@ -69,7 +69,7 @@ else def close @connection.close end - end + end # class LogStash::DrupalDblogJavaMysqlConnection end # Retrieve events from a Drupal installation with DBlog enabled. @@ -231,7 +231,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base # Close connection @client.close - end # def get_net_entries + end # def check_database private def update_sitename @@ -241,7 +241,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base @sitename = PHP.unserialize(result.first()['value']) end end - end + end # def update_sitename private def get_last_wid @@ -254,7 +254,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base end return lastWid - end + end # def get_last_wid private def set_last_wid(wid, insert) @@ -265,7 +265,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base else @client.query('UPDATE variable SET value="' + wid + '" WHERE name="logstash_last_wid"') end - end + end # def set_last_wid private def get_usermap @@ -277,7 +277,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base map[0] = "guest" return map - end + end # def get_usermap private def build_event(row) From dbb5b3430f8c107f9596066330cccf80a3b65041 Mon Sep 17 00:00:00 2001 From: Stephon Striplin Date: Wed, 28 Nov 2012 15:30:35 -0800 Subject: [PATCH 018/105] add dependency --- spec/test_utils.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/spec/test_utils.rb b/spec/test_utils.rb index 6f3380b33..a9bafffaa 100644 --- a/spec/test_utils.rb +++ b/spec/test_utils.rb @@ -1,4 +1,5 @@ require "insist" +require "logstash/agent" require "logstash/event" require "insist" require "stud/try" From ee78bfcdf4efaed69dcb8d2feea5c14e547b5077 Mon Sep 17 00:00:00 2001 From: theduke Date: Thu, 29 Nov 2012 20:32:15 +0100 Subject: [PATCH 019/105] Drupal DBLog: fixed some issues with select query. --- lib/logstash/inputs/drupal_dblog.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index e8b0258ed..6b5247640 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -212,10 +212,10 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base # Retrieve last pulled watchdog entry id initialLastWid = get_last_wid - lastWid = initialLastWid ? initialLastWid : "0" + lastWid = initialLastWid ? initialLastWid : 0 # Fetch new entries, and create the event - results = @client.query('SELECT * from watchdog WHERE wid > ' + initialLastWid + " ORDER BY wid asc") + results = @client.query('SELECT * from watchdog WHERE wid > ' + lastWid.to_s + " ORDER BY wid asc") results.each do |row| event = build_event(row) if event From 89c2ee7060a4dc1b4083e55ff7dee16c868ddf63 Mon Sep 17 00:00:00 2001 From: Stephon Striplin Date: Wed, 28 Nov 2012 10:02:46 -0800 Subject: [PATCH 020/105] add yaml config support Make headway into no longer using custom configuration file. Pass --yaml to logstash agent to make it treat configuration files as YAML. --- lib/logstash/agent.rb | 20 +++++++++++++++++--- lib/logstash/config/file.rb | 24 +++++++++++++++--------- lib/logstash/config/file/yaml.rb | 8 ++++++++ lib/logstash/config/mixin.rb | 15 +++++++++++---- 4 files changed, 51 insertions(+), 16 deletions(-) create mode 100755 lib/logstash/config/file/yaml.rb diff --git a/lib/logstash/agent.rb b/lib/logstash/agent.rb index 6083a68db..ef898fb7b 100644 --- a/lib/logstash/agent.rb +++ b/lib/logstash/agent.rb @@ -1,4 +1,5 @@ require "logstash/config/file" +require "logstash/config/file/yaml" require "logstash/filterworker" require "logstash/logging" require "logstash/sized_queue" @@ -34,6 +35,7 @@ class LogStash::Agent log_to(STDERR) @config_path = nil @config_string = nil + @is_yaml = false @logfile = nil # flag/config defaults @@ -252,13 +254,25 @@ class LogStash::Agent concatconfig = [] paths.each do |path| - concatconfig << File.new(path).read + file = File.new(path) + if File.extname(file) == '.yaml' + # assume always YAML if even one file is + @is_yaml = true + end + concatconfig << file.read end - config = LogStash::Config::File.new(nil, concatconfig.join("\n")) + config_data = concatconfig.join("\n") else # @config_string # Given a config string by the user (via the '-e' flag) - config = LogStash::Config::File.new(nil, @config_string) + config_data = @config_string end + + if @is_yaml + config = LogStash::Config::File::Yaml.new(nil, config_data) + else + config = LogStash::Config::File.new(nil, config_data) + end + config.logger = @logger config end diff --git a/lib/logstash/config/file.rb b/lib/logstash/config/file.rb index 4a94e1fd3..3a3c0f983 100644 --- a/lib/logstash/config/file.rb +++ b/lib/logstash/config/file.rb @@ -18,18 +18,24 @@ class LogStash::Config::File end end # def initialize + def _get_config_data + if @string.nil? + File.new(@path).read + else + @string + end + end + + def _get_config(data) + grammar = LogStash::Config::Grammar.new + grammar.parse(data) + grammar.config + end + public def parse - grammar = LogStash::Config::Grammar.new + @config = _get_config(_get_config_data); - if @string.nil? - grammar.parse(File.new(@path).read) - else - grammar.parse(@string) - end - - @config = grammar.config - registry = LogStash::Config::Registry::registry each do |o| # Load the base class for the type given (like inputs/base, or filters/base) diff --git a/lib/logstash/config/file/yaml.rb b/lib/logstash/config/file/yaml.rb new file mode 100755 index 000000000..2786f779e --- /dev/null +++ b/lib/logstash/config/file/yaml.rb @@ -0,0 +1,8 @@ +require "logstash/config/file" +require "yaml" + +class LogStash::Config::File::Yaml < LogStash::Config::File + def _get_config(data) + return YAML.load(data) + end +end diff --git a/lib/logstash/config/mixin.rb b/lib/logstash/config/mixin.rb index 2233506e1..22702d43c 100644 --- a/lib/logstash/config/mixin.rb +++ b/lib/logstash/config/mixin.rb @@ -342,11 +342,18 @@ module LogStash::Config::Mixin return false, "Expected boolean, got #{value.inspect}" end - if value.first !~ /^(true|false)$/ - return false, "Expected boolean 'true' or 'false', got #{value.first.inspect}" - end + bool_value = value.first + if !!bool_value == bool_value + # is_a does not work for booleans + # we have Boolean and not a string + result = bool_value + else + if bool_value !~ /^(true|false)$/ + return false, "Expected boolean 'true' or 'false', got #{bool_value.inspect}" + end - result = (value.first == "true") + result = (bool_value == "true") + end when :ipaddr if value.size > 1 # only one value wanted return false, "Expected IPaddr, got #{value.inspect}" From 7c74bcce15261dbd1e22d0bef00ae48a0d53585d Mon Sep 17 00:00:00 2001 From: Stephon Striplin Date: Wed, 28 Nov 2012 11:40:44 -0800 Subject: [PATCH 021/105] coerce scalars into single-valued arrays This makes YAML config files look more sane stdout: debug: true Instead of stdout: debug: - true --- lib/logstash/config/mixin.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/logstash/config/mixin.rb b/lib/logstash/config/mixin.rb index 22702d43c..9cf478382 100644 --- a/lib/logstash/config/mixin.rb +++ b/lib/logstash/config/mixin.rb @@ -302,6 +302,8 @@ module LogStash::Config::Mixin elsif validator.is_a?(Symbol) # TODO(sissel): Factor this out into a coersion method? # TODO(sissel): Document this stuff. + value = [*value] # coerce scalar to array if necessary + case validator when :hash if value.size % 2 == 1 From 6e6f68ff62d9bec17ccd8818374702b76525fce2 Mon Sep 17 00:00:00 2001 From: Stephon Striplin Date: Wed, 28 Nov 2012 12:16:54 -0800 Subject: [PATCH 022/105] accept hashes for the :hash validator The configuration system coerced arrays into hashes, but YAML passes hashes directly. We should attempt no coercion on them. --- lib/logstash/config/mixin.rb | 39 +++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/lib/logstash/config/mixin.rb b/lib/logstash/config/mixin.rb index 9cf478382..94465fb84 100644 --- a/lib/logstash/config/mixin.rb +++ b/lib/logstash/config/mixin.rb @@ -302,25 +302,29 @@ module LogStash::Config::Mixin elsif validator.is_a?(Symbol) # TODO(sissel): Factor this out into a coersion method? # TODO(sissel): Document this stuff. - value = [*value] # coerce scalar to array if necessary + value = hash_or_array(value) case validator when :hash - if value.size % 2 == 1 - return false, "This field must contain an even number of items, got #{value.size}" - end + if value.is_a?(Hash) + result = value + else + if value.size % 2 == 1 + return false, "This field must contain an even number of items, got #{value.size}" + end - # Convert the array the config parser produces into a hash. - result = {} - value.each_slice(2) do |key, value| - entry = result[key] - if entry.nil? - result[key] = value - else - if entry.is_a?(Array) - entry << value + # Convert the array the config parser produces into a hash. + result = {} + value.each_slice(2) do |key, value| + entry = result[key] + if entry.nil? + result[key] = value else - result[key] = [entry, value] + if entry.is_a?(Array) + entry << value + else + result[key] = [entry, value] + end end end end @@ -385,5 +389,12 @@ module LogStash::Config::Mixin # Return the validator for later use, like with type coercion. return true, result end # def validate_value + + def hash_or_array(value) + if !value.is_a?(Hash) + value = [*value] # coerce scalar to array if necessary + end + return value + end end # module LogStash::Config::DSL end # module LogStash::Config From a061b234a57bfc3fcf7d8060a0f651b748943e78 Mon Sep 17 00:00:00 2001 From: Stephon Striplin Date: Wed, 28 Nov 2012 16:58:32 -0800 Subject: [PATCH 023/105] add YAML rspec example --- spec/examples/parse-apache-logs-yaml.rb | 63 +++++++++++++++++++++++++ spec/test_utils.rb | 21 +++++++-- 2 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 spec/examples/parse-apache-logs-yaml.rb diff --git a/spec/examples/parse-apache-logs-yaml.rb b/spec/examples/parse-apache-logs-yaml.rb new file mode 100644 index 000000000..876d260cd --- /dev/null +++ b/spec/examples/parse-apache-logs-yaml.rb @@ -0,0 +1,63 @@ +require "test_utils" + +describe "apache common log format" do + extend LogStash::RSpec + + # The logstash config goes here. + # At this time, only filters are supported. + config_yaml <<-CONFIG + filter: + - grok: + pattern: "%{COMBINEDAPACHELOG}" + singles: true + - date: + timestamp: "dd/MMM/yyyy:HH:mm:ss Z" + CONFIG + + # Here we provide a sample log event for the testing suite. + # + # Any filters you define above will be applied the same way the logstash + # agent performs. Inside the 'sample ... ' block the 'subject' will be + # a LogStash::Event object for you to inspect and verify for correctness. + sample '198.151.8.4 - - [29/Aug/2012:20:17:38 -0400] "GET /favicon.ico HTTP/1.1" 200 3638 "-" "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:14.0) Gecko/20100101 Firefox/14.0.1"' do + + # These 'insist' and 'reject' calls use my 'insist' rubygem. + # See http://rubydoc.info/gems/insist for more info. + + # Require that grok does not fail to parse this event. + reject { subject["@tags"] }.include?("_grokparsefailure") + + # Ensure that grok captures certain expected fields. + insist { subject }.include?("agent") + insist { subject }.include?("bytes") + insist { subject }.include?("clientip") + insist { subject }.include?("httpversion") + insist { subject }.include?("timestamp") + insist { subject }.include?("verb") + insist { subject }.include?("response") + insist { subject }.include?("request") + + # Ensure that those fields match expected values from the event. + insist { subject["clientip"] } == "198.151.8.4" + insist { subject["timestamp"] } == "29/Aug/2012:20:17:38 -0400" + insist { subject["verb"] } == "GET" + insist { subject["request"] } == "/favicon.ico" + insist { subject["httpversion"] } == "1.1" + insist { subject["response"] } == "200" + insist { subject["bytes"] } == "3638" + insist { subject["referrer"] } == '"-"' + insist { subject["agent"] } == "\"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:14.0) Gecko/20100101 Firefox/14.0.1\"" + + # Verify date parsing + insist { subject.timestamp } == "2012-08-30T00:17:38.000Z" + end + + sample '61.135.248.195 - - [26/Sep/2012:11:49:20 -0400] "GET /projects/keynav/ HTTP/1.1" 200 18985 "" "Mozilla/5.0 (compatible; YodaoBot/1.0; http://www.yodao.com/help/webmaster/spider/; )"' do + reject { subject["@tags"] }.include?("_grokparsefailure") + insist { subject["clientip"] } == "61.135.248.195" + end + + sample '72.14.164.185 - - [25/Sep/2012:12:05:02 -0400] "GET /robots.txt HTTP/1.1" 200 - "www.brandimensions.com" "BDFetch"' do + reject { subject["@tags"] }.include?("_grokparsefailure") + end +end diff --git a/spec/test_utils.rb b/spec/test_utils.rb index a9bafffaa..ec908b9a3 100644 --- a/spec/test_utils.rb +++ b/spec/test_utils.rb @@ -19,6 +19,11 @@ module LogStash @config_str = configstr end # def config + def config_yaml(configstr) + @config_str = configstr + @is_yaml = true + end + def type(default_type) @default_type = default_type end @@ -31,8 +36,7 @@ module LogStash def sample(event, &block) default_type = @default_type || "default" default_tags = @default_tags || [] - require "logstash/config/file" - config = LogStash::Config::File.new(nil, @config_str) + config = get_config agent = LogStash::Agent.new @inputs, @filters, @outputs = agent.instance_eval { parse_config(config) } [@inputs, @filters, @outputs].flatten.each do |plugin| @@ -96,8 +100,7 @@ module LogStash end # def sample def input(&block) - require "logstash/config/file" - config = LogStash::Config::File.new(nil, @config_str) + config = get_config agent = LogStash::Agent.new it "looks good" do inputs, filters, outputs = agent.instance_eval { parse_config(config) } @@ -105,6 +108,16 @@ module LogStash end end # def input + def get_config + if @is_yaml + require "logstash/config/file/yaml" + config = LogStash::Config::File::Yaml.new(nil, @config_str) + else + require "logstash/config/file" + config = LogStash::Config::File.new(nil, @config_str) + end + end # def get_config + def agent(&block) @agent_count ||= 0 require "logstash/agent" From b5617272e768e957724360ded1529e0287c42224 Mon Sep 17 00:00:00 2001 From: Avishai Ish-Shalom Date: Sun, 2 Dec 2012 00:42:32 +0200 Subject: [PATCH 024/105] Added extra ops for mutate filter --- lib/logstash/filters/mutate.rb | 62 ++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/lib/logstash/filters/mutate.rb b/lib/logstash/filters/mutate.rb index 8f8ac4c97..c10cb11d7 100644 --- a/lib/logstash/filters/mutate.rb +++ b/lib/logstash/filters/mutate.rb @@ -100,6 +100,36 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # config :lowercase, :validate => :array + # Split a field to an array using a separator character. Only works on string fields + # + # Example: + # + # mutate { + # split => ["fieldname", ","] + # } + # + config :split, :validate => :hash + + # Join an array with a separator character, does nothing on non-array fields + # + # Example: + # + # mutate { + # join => ["fieldname", ","] + # } + # + config :join, :validate => :hash + + # Strip whitespaces + # + # Example: + # + # mutate { + # strip => ["field1", "field2"] + # } + # + config :strip, :validate => :array + public def register valid_conversions = %w(string integer float) @@ -139,6 +169,8 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base uppercase(event) if @uppercase lowercase(event) if @lowercase remove(event) if @remove + split(event) if @split + join(event) if @join filter_matched(event) end # def filter @@ -254,4 +286,34 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base end end end # def lowercase + + private + def split(event) + @split.each do |field, separator| + if event[field].is_a?(String) + event[field] = event[field].split(separator) + end + end + end + + private + def join(event) + @join.each do |field, separator| + if event[field].is_a?(Array) + event[field] = event[field].join(separator) + end + end + end + + private + def strip(event) + @strip.each do |field| + if event[field].is_a?(Array) + event[field] = event[field].map{|s| s.strip } + elsif event[field].is_a?(String) + event[field] = event[field].strip + end + end + end + end # class LogStash::Filters::Mutate From 10b8a6d5ce07453da19a104ec9a01a6c2ade973f Mon Sep 17 00:00:00 2001 From: theduke Date: Mon, 3 Dec 2012 00:29:55 +0100 Subject: [PATCH 025/105] Drupal DBLog refactoring: Moved JDBC connection class into own file. Implemented formatting and code style suggestions by Jordan. --- lib/logstash/inputs/drupal_dblog.rb | 107 ++++++------------ .../inputs/drupal_dblog/jdbcconnection.rb | 65 +++++++++++ 2 files changed, 100 insertions(+), 72 deletions(-) create mode 100644 lib/logstash/inputs/drupal_dblog/jdbcconnection.rb diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 6b5247640..0c84e44a4 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -1,75 +1,12 @@ require "date" require "logstash/inputs/base" require "logstash/namespace" -require "logstash/time" # should really use the filters/date.rb bits require "php_serialize" -if RUBY_PLATFORM != 'java' - require "mysql2" +if RUBY_PLATFORM == 'java' + require "logstash/inputs/drupal_dblog/jdbcconnection" else - require "java" - require "rubygems" - require "jdbc/mysql" - - java_import "com.mysql.jdbc.Driver" - - # For JRuby, we need to supply a Connection class with an API like mysql2 - class LogStash::DrupalDblogJavaMysqlConnection - - def initialize(host, username, password, database, port = nil) - port ||= 3306 - - address = "jdbc:mysql://#{host}:#{port}/#{database}" - @connection = java.sql.DriverManager.getConnection(address, username, password) - end - - def query sql - if sql.downcase.scan('select').length > 0 - return select(sql) - else - return update(sql) - end - end - - def select sql - stmt = @connection.createStatement - resultSet = stmt.executeQuery sql - - meta = resultSet.getMetaData - column_count = meta.getColumnCount - - rows = [] - - while resultSet.next - res = {} - - (1..column_count).each do |i| - name = meta.getColumnName i - case meta.getColumnType i - when java.sql.Types::INTEGER - res[name] = resultSet.getInt name - else - res[name] = resultSet.getString name - end - end - - rows << res - end - - stmt.close - return rows - end - - def update sql - stmt = @connection.createStatement - stmt.execute_update sql - stmt.close - end - - def close - @connection.close - end - end # class LogStash::DrupalDblogJavaMysqlConnection + require "mysql2" end # Retrieve events from a Drupal installation with DBlog enabled. @@ -81,16 +18,37 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base config_name "drupal_dblog" plugin_status "experimental" + # Specify all drupal databases that you whish to import from. + # This can be as many as you whish. + # The format is a hash, with a unique site name as the key, and a databse + # url as the value. + # + # Example: + # [ + # "site1", "mysql://user1:password@host1.com/databasename", + # "other_site", "mysql://user2:password@otherhost.com/databasename", + # ... + # ] config :databases, :validate => :hash - config :type, :validate => :string, :default => 'watchdog' - - # Add the username in addition to the user id to the event + # By default, the event only contains the current user id as a field. + # If you whish to add the username as an additional field, set this to true. config :add_usernames, :validate => :boolean, :default => false - # Time between checks in minutes + # Time between checks in minutes. config :interval, :validate => :number, :default => 10 + # Label this input with a type. + # Types are used mainly for filter activation. + # + # + # If you create an input with type "foobar", then only filters + # which also have type "foobar" will act on them. + # + # The type is also stored as part of the event itself, so you + # can also use the type to search for in the web interface. + config :type, :validate => :string, :default => 'watchdog' + public def initialize(params) super @@ -219,7 +177,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base results.each do |row| event = build_event(row) if event - output_queue << to_event(JSON.dump(event), @sitename) + output_queue << event lastWid = row['wid'].to_s end end @@ -227,6 +185,7 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base set_last_wid(lastWid, initialLastWid == false) rescue Exception => e @logger.info("Mysql error: ", :error => e.message) + throw e end # begin # Close connection @@ -307,6 +266,8 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base row.delete("variables") row.delete("timestamp") + row["severity"] = row["severity"].to_i + if @add_usernames and @usermap.has_key?(row["uid"]) row["user"] = @usermap[row["uid"]] end @@ -320,7 +281,9 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base "@message" => msg } - return entry + event = to_event(JSON.dump(entry), @sitename) + + return event end # def build_event end # class LogStash::Inputs::DrupalDblog diff --git a/lib/logstash/inputs/drupal_dblog/jdbcconnection.rb b/lib/logstash/inputs/drupal_dblog/jdbcconnection.rb new file mode 100644 index 000000000..85aea3d79 --- /dev/null +++ b/lib/logstash/inputs/drupal_dblog/jdbcconnection.rb @@ -0,0 +1,65 @@ +require "java" +require "rubygems" +require "jdbc/mysql" + +java_import "com.mysql.jdbc.Driver" + +# A JDBC mysql connection class. +# The interface is compatible with the mysql2 API. +class LogStash::DrupalDblogJavaMysqlConnection + + def initialize(host, username, password, database, port = nil) + port ||= 3306 + + address = "jdbc:mysql://#{host}:#{port}/#{database}" + @connection = java.sql.DriverManager.getConnection(address, username, password) + end # def initialize + + def query(sql) + if sql =~ /select/i + return select(sql) + else + return update(sql) + end + end # def query + + def select(sql) + stmt = @connection.createStatement + resultSet = stmt.executeQuery(sql) + + meta = resultSet.getMetaData + column_count = meta.getColumnCount + + rows = [] + + while resultSet.next + res = {} + + (1..column_count).each do |i| + name = meta.getColumnName(i) + case meta.getColumnType(i) + when java.sql.Types::INTEGER + res[name] = resultSet.getInt(name) + else + res[name] = resultSet.getString(name) + end + end + + rows << res + end + + stmt.close + return rows + end # def select + + def update(sql) + stmt = @connection.createStatement + stmt.execute_update(sql) + stmt.close + end # def update + + def close + @connection.close + end # def close + +end # class LogStash::DrupalDblogJavaMysqlConnection From 5e0b525f4a7c21a1d5b049fa67434027cc0c32fc Mon Sep 17 00:00:00 2001 From: theduke Date: Mon, 3 Dec 2012 00:40:29 +0100 Subject: [PATCH 026/105] Drupal DBLog: more detailed Plugin comments/documentation. --- lib/logstash/inputs/drupal_dblog.rb | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 0c84e44a4..0305b01e1 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -9,10 +9,16 @@ else require "mysql2" end -# Retrieve events from a Drupal installation with DBlog enabled. +# Retrieve watchdog log events from a Drupal installation with DBLog enabled. +# The events are pulled out directly from the database. +# The original events are not deleted, and on every consecutive run only new +# events are pulled. # -# To avoid pulling the same watchdog entry twice, the last pulled wid -# is saved as a variable in the Drupal database. +# The last watchdog event id that was processed is stored in the Drupal +# variable table with the name "logstash_last_wid". Delete this variable or +# set it to 0 if you want to re-import all events. +# +# More info on DBLog: http://drupal.org/documentation/modules/dblog # class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base config_name "drupal_dblog" From fac41402629e73f06759e98fad19c857f294e8e7 Mon Sep 17 00:00:00 2001 From: Rene Lengwinat Date: Wed, 5 Dec 2012 14:09:09 +0100 Subject: [PATCH 027/105] Added debug flag to graphite output plugin --- lib/logstash/outputs/graphite.rb | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/logstash/outputs/graphite.rb b/lib/logstash/outputs/graphite.rb index b0efee006..cc73aea49 100644 --- a/lib/logstash/outputs/graphite.rb +++ b/lib/logstash/outputs/graphite.rb @@ -28,6 +28,9 @@ class LogStash::Outputs::Graphite < LogStash::Outputs::Base # coerced will zero (0) config :metrics, :validate => :hash, :required => true + # Enable debug output (default: false) + config :debug, :validate => :boolean, :default => false + def register connect end # def register @@ -52,8 +55,13 @@ class LogStash::Outputs::Graphite < LogStash::Outputs::Base # Catch exceptions like ECONNRESET and friends, reconnect on failure. @metrics.each do |metric, value| + @logger.debug("processing metric=#{metric.inspect} value=#{value.inspect}") + message = [event.sprintf(metric), event.sprintf(value).to_f, event.sprintf("%{+%s}")].join(" ") + + @logger.debug("sending #{message.inspect} to #{@host}:#{@port}") + # TODO(sissel): Test error cases. Catch exceptions. Find fortune and glory. begin @socket.puts(message) From 88e7de2fca7e77a60436e51a6a3545463fe385d5 Mon Sep 17 00:00:00 2001 From: Rene Lengwinat Date: Wed, 5 Dec 2012 14:19:57 +0100 Subject: [PATCH 028/105] Fixed timestamp parsing for +%s format When using the graphite output plugin we noticed that the Date#parse + to_i invokation wasn't working as expected. - timestamp got parsed by Date#parse and converted to an unix timestamp which will result in a timestamp pointing to a 00:00:00 of that day - the classloader got confused by having time.rb twice in its path --- lib/logstash/event.rb | 4 ++-- lib/logstash/{time.rb => time_addon.rb} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename lib/logstash/{time.rb => time_addon.rb} (100%) diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index c77845468..71330e5fb 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -1,7 +1,7 @@ require "json" require "time" require "date" -require "logstash/time" +require "logstash/time_addon" require "logstash/namespace" require "uri" @@ -230,7 +230,7 @@ class LogStash::Event # Got %{+%s}, support for unix epoch time if RUBY_ENGINE != "jruby" # This is really slow. See LOGSTASH-217 - Date.parse(self.timestamp).to_i + Time.parse(self.timestamp).to_i else datetime = @@date_parser.parseDateTime(self.timestamp) (datetime.getMillis / 1000).to_i diff --git a/lib/logstash/time.rb b/lib/logstash/time_addon.rb similarity index 100% rename from lib/logstash/time.rb rename to lib/logstash/time_addon.rb From 90ba4068231ed8dd8876ad5535caf1635187c10d Mon Sep 17 00:00:00 2001 From: Rene Lengwinat Date: Wed, 5 Dec 2012 23:50:37 +0100 Subject: [PATCH 029/105] Applied feedback from jordansissel --- lib/logstash/outputs/graphite.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/logstash/outputs/graphite.rb b/lib/logstash/outputs/graphite.rb index cc73aea49..207427639 100644 --- a/lib/logstash/outputs/graphite.rb +++ b/lib/logstash/outputs/graphite.rb @@ -28,7 +28,7 @@ class LogStash::Outputs::Graphite < LogStash::Outputs::Base # coerced will zero (0) config :metrics, :validate => :hash, :required => true - # Enable debug output (default: false) + # Enable debug output config :debug, :validate => :boolean, :default => false def register @@ -55,12 +55,12 @@ class LogStash::Outputs::Graphite < LogStash::Outputs::Base # Catch exceptions like ECONNRESET and friends, reconnect on failure. @metrics.each do |metric, value| - @logger.debug("processing metric=#{metric.inspect} value=#{value.inspect}") + @logger.debug("processing", :metric => metric, :value => value) message = [event.sprintf(metric), event.sprintf(value).to_f, event.sprintf("%{+%s}")].join(" ") - @logger.debug("sending #{message.inspect} to #{@host}:#{@port}") + @logger.debug("Sending carbon message", :message => message, :host => @host, :port => @port) # TODO(sissel): Test error cases. Catch exceptions. Find fortune and glory. begin From 32d3860a8c9dce097d539be4d7df7cfcb9b85822 Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Mon, 10 Dec 2012 09:09:50 +0100 Subject: [PATCH 030/105] allow teardown in tcp input --- lib/logstash/inputs/tcp.rb | 39 ++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/lib/logstash/inputs/tcp.rb b/lib/logstash/inputs/tcp.rb index 7f63b0f8a..faecf1e91 100644 --- a/lib/logstash/inputs/tcp.rb +++ b/lib/logstash/inputs/tcp.rb @@ -67,10 +67,10 @@ class LogStash::Inputs::Tcp < LogStash::Inputs::Base end # loop do rescue => e @logger.debug("Closing connection", :client => socket.peer, - :exception => e, :backtrace => e.backtrace) + :exception => e, :backtrace => e.backtrace) rescue Timeout::Error @logger.debug("Closing connection after read timeout", - :client => socket.peer) + :client => socket.peer) end # begin begin @@ -95,15 +95,26 @@ class LogStash::Inputs::Tcp < LogStash::Inputs::Base if server? loop do # Start a new thread for each connection. - Thread.start(@server_socket.accept) do |s| - # TODO(sissel): put this block in its own method. + begin + Thread.start(@server_socket.accept) do |s| + # TODO(sissel): put this block in its own method. - # monkeypatch a 'peer' method onto the socket. - s.instance_eval { class << self; include ::LogStash::Util::SocketPeer end } - @logger.debug("Accepted connection", :client => s.peer, - :server => "#{@host}:#{@port}") - handle_socket(s, output_queue, "tcp://#{@host}:#{@port}/client/#{s.peer}") - end # Thread.start + # monkeypatch a 'peer' method onto the socket. + s.instance_eval { class << self; include ::LogStash::Util::SocketPeer end } + @logger.debug("Accepted connection", :client => s.peer, + :server => "#{@host}:#{@port}") + handle_socket(s, output_queue, "tcp://#{@host}:#{@port}/client/#{s.peer}") + + end # Thread.start + rescue IOError + if @interrupted + #Intended shutdown, get out of the loop + break + else + # Else it was a genuine IOError caused by something else, so propagate it up.. + raise + end + end end # loop else loop do @@ -114,4 +125,12 @@ class LogStash::Inputs::Tcp < LogStash::Inputs::Base end # loop end end # def run + + public + def teardown + if server? + @interrupted = true + @server_socket.close + end + end # def teardown end # class LogStash::Inputs::Tcp From 1bad64e4b1e8cca48e8140433ae5bc700f917885 Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Mon, 10 Dec 2012 09:11:11 +0100 Subject: [PATCH 031/105] initial test on tcp input with charsets --- spec/inputs/tcp.rb | 210 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) create mode 100644 spec/inputs/tcp.rb diff --git a/spec/inputs/tcp.rb b/spec/inputs/tcp.rb new file mode 100644 index 000000000..044d53810 --- /dev/null +++ b/spec/inputs/tcp.rb @@ -0,0 +1,210 @@ +# coding: utf-8 +require "test_utils" +require "socket" + +# Not sure why but each test need a different port +# TODO: timeout around the thread.join +describe "inputs/tcp" do + extend LogStash::RSpec + + describe "read json_event" do + + event_count = 10 + port = 5511 + config <<-CONFIG + input { + tcp { + type => "blah" + port => #{port} + format => "json_event" + } + } + CONFIG + + th = Thread.current + input do |plugins| + sequence = 0 + tcp = plugins.first + output = Shiftback.new do |event| + sequence += 1 + tcp.teardown if sequence == event_count + begin + insist { event["sequence"] } == sequence -1 + insist { event["message"]} == "Hello ü Û" + insist { event["message"].encoding } == Encoding.find("UTF-8") + rescue Exception => failure + # Get out of the threads nets + th.raise failure + end + end + #Prepare input + tcp.register + #Run input in a separate thread + thread = Thread.new(tcp, output) do |*args| + tcp.run(output) + end + #Send events from clients sockets + event_count.times do |value| + client_socket = TCPSocket.new("0.0.0.0", port) + event = LogStash::Event.new("@fields" => { "message" => "Hello ü Û", "sequence" => value }) + client_socket.puts event.to_json + client_socket.close + # micro sleep to ensure sequencing + sleep(0.1) + end + #wait for input termination + thread.join + end # input + end + + describe "read plain events with system defaults, should works on UTF-8 system" do + event_count = 10 + port = 5512 + config <<-CONFIG + input { + tcp { + type => "blah" + port => #{port} + } + } + CONFIG + + th = Thread.current + input do |plugins| + sequence = 0 + tcp = plugins.first + output = Shiftback.new do |event| + sequence += 1 + tcp.teardown if sequence == event_count + begin + insist { event.message } == "Hello ü Û" + insist { event.message.encoding } == Encoding.find("UTF-8") + rescue Exception => failure + # Get out of the threads nets + th.raise failure + end + end + + tcp.register + #Run input in a separate thread + thread = Thread.new(tcp, output) do |*args| + tcp.run(output) + end + #Send events from clients sockets + event_count.times do |value| + client_socket = TCPSocket.new("0.0.0.0", port) + client_socket.write "Hello ü Û" + client_socket.close + # micro sleep to ensure sequencing + sleep(0.1) + end + #wait for input termination + thread.join + end # input + end + + describe "read plain events with UTF-8 like charset, to prove that something is wrong with previous failing test" do + event_count = 10 + port = 5514 + config <<-CONFIG + input { + tcp { + type => "blah" + port => #{port} + charset => "CP65001" #that's just an alias of UTF-8 + } + } + CONFIG + + th = Thread.current + # Catch aborting reception threads + input do |plugins| + sequence = 0 + tcp = plugins.first + output = Shiftback.new do |event| + sequence += 1 + tcp.teardown if sequence == event_count + begin + insist { event.message } == "Hello ü Û" + insist { event.message.encoding } == Encoding.find("UTF-8") + rescue Exception => failure + # Get out of the threads nets + th.raise failure + end + end + + tcp.register + #Run input in a separate thread + + thread = Thread.new(tcp, output) do |*args| + tcp.run(output) + end + #Send events from clients sockets + event_count.times do |value| + client_socket = TCPSocket.new("0.0.0.0", port) + # puts "Encoding of client", client_socket.external_encoding, client_socket.internal_encoding + client_socket.write "Hello ü Û" + client_socket.close + # micro sleep to ensure sequencing, TODO must think of a cleaner solution + sleep(0.1) + end + #wait for input termination + #TODO: timeout + thread.join + end # input + end + + describe "read plain events with ISO-8859-1 charset" do + event_count = 10 + port = 5513 + charset = "ISO-8859-1" + config <<-CONFIG + input { + tcp { + type => "blah" + port => #{port} + charset => "#{charset}" + } + } + CONFIG + + th = Thread.current + input do |plugins| + sequence = 0 + tcp = plugins.first + output = Shiftback.new do |event| + sequence += 1 + tcp.teardown if sequence == event_count + begin + insist { event.message } == "Hello ü Û" + insist { event.message.encoding } == Encoding.find("UTF-8") + rescue Exception => failure + # Get out of the threads nets + th.raise failure + end + end + + tcp.register + #Run input in a separate thread + + thread = Thread.new(tcp, output) do |*args| + tcp.run(output) + end + #Send events from clients sockets + event_count.times do |value| + client_socket = TCPSocket.new("0.0.0.0", port) + #Force client encoding + client_socket.set_encoding(charset) + client_socket.write "Hello ü Û" + client_socket.close + # micro sleep to ensure sequencing + sleep(0.1) + end + #wait for input termination + thread.join + end # input + end +end + + + From a52e8d44ba9cfd017bb7bcd2aad994fd3ef05b9b Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Mon, 10 Dec 2012 13:46:48 +0100 Subject: [PATCH 032/105] jruby 1.7 already bundle joda 2.1 --- Makefile | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 9aa4c0e35..3040bfc3c 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,6 @@ # JRUBY_VERSION=1.7.0 ELASTICSEARCH_VERSION=0.19.10 -JODA_VERSION=2.1 #VERSION=$(shell ruby -r./lib/logstash/version -e 'puts LOGSTASH_VERSION') VERSION=$(shell awk -F\" '/LOGSTASH_VERSION/ {print $$2}' lib/logstash/version.rb ) @@ -16,7 +15,6 @@ JRUBY_CMD=java -jar $(JRUBY) JRUBYC=$(WITH_JRUBY) jrubyc ELASTICSEARCH_URL=http://github.com/downloads/elasticsearch/elasticsearch ELASTICSEARCH=vendor/jar/elasticsearch-$(ELASTICSEARCH_VERSION) -JODA=vendor/jar/joda-time-$(JODA_VERSION)/joda-time-$(JODA_VERSION).jar GEOIP=vendor/geoip/GeoLiteCity.dat GEOIP_URL=http://logstash.objects.dreamhost.com/maxmind/GeoLiteCity-2012-11-09.dat.gz PLUGIN_FILES=$(shell git ls-files | egrep '^lib/logstash/(inputs|outputs|filters)/' | egrep -v '/(base|threadable).rb$$|/inputs/ganglia/') @@ -96,7 +94,7 @@ $(JRUBY): | vendor/jar vendor/jar/elasticsearch-$(ELASTICSEARCH_VERSION).tar.gz: | wget-or-curl vendor/jar @echo "=> Fetching elasticsearch" $(QUIET)$(DOWNLOAD_COMMAND) $@ $(ELASTICSEARCH_URL)/elasticsearch-$(ELASTICSEARCH_VERSION).tar.gz - + vendor/jar/graphtastic-rmiclient.jar: | wget-or-curl vendor/jar @echo "=> Fetching graphtastic rmi client jar" $(QUIET)$(DOWNLOAD_COMMAND) $@ http://cloud.github.com/downloads/NickPadilla/GraphTastic/graphtastic-rmiclient.jar @@ -108,12 +106,6 @@ $(ELASTICSEARCH): $(ELASTICSEARCH).tar.gz | vendor/jar $(QUIET)tar -C $(shell dirname $@) -xf $< $(TAR_OPTS) --exclude '*sigar*' \ 'elasticsearch-$(ELASTICSEARCH_VERSION)/lib/*.jar' -vendor/jar/joda-time-$(JODA_VERSION)-dist.tar.gz: | wget-or-curl vendor/jar - $(DOWNLOAD_COMMAND) $@ "http://downloads.sourceforge.net/project/joda-time/joda-time/$(JODA_VERSION)/joda-time-$(JODA_VERSION)-dist.tar.gz" - -vendor/jar/joda-time-$(JODA_VERSION)/joda-time-$(JODA_VERSION).jar: vendor/jar/joda-time-$(JODA_VERSION)-dist.tar.gz | vendor/jar - tar -C vendor/jar -zxf $< joda-time-$(JODA_VERSION)/joda-time-$(JODA_VERSION).jar - vendor/geoip: | vendor $(QUIET)mkdir $@ @@ -132,7 +124,7 @@ vendor-gems: | vendor/bundle vendor/bundle: | vendor $(JRUBY) @echo "=> Installing gems to $@..." #$(QUIET)GEM_HOME=$(GEM_HOME) $(JRUBY_CMD) --1.9 $(GEM_HOME)/bin/bundle install --deployment - $(QUIET)GEM_HOME=./vendor/bundle/jruby/1.9/ GEM_PATH= $(JRUBY_CMD) --1.9 ./gembag.rb logstash.gemspec + $(QUIET)GEM_HOME=./vendor/bundle/jruby/1.9/ GEM_PATH= $(JRUBY_CMD) --1.9 ./gembag.rb logstash.gemspec @# Purge any junk that fattens our jar without need! @# The riak gem includes previous gems in the 'pkg' dir. :( -rm -rf $@/jruby/1.9/gems/riak-client-1.0.3/pkg @@ -152,7 +144,7 @@ build/ruby: | build # TODO(sissel): Skip sigar? # Run this one always? Hmm.. .PHONY: build/monolith -build/monolith: $(ELASTICSEARCH) $(JRUBY) $(JODA) $(GEOIP) vendor-gems | build +build/monolith: $(ELASTICSEARCH) $(JRUBY) $(GEOIP) vendor-gems | build build/monolith: compile copy-ruby-files vendor/jar/graphtastic-rmiclient.jar -$(QUIET)mkdir -p $@ @# Unpack all the 3rdparty jars and any jars in gems @@ -164,10 +156,6 @@ build/monolith: compile copy-ruby-files vendor/jar/graphtastic-rmiclient.jar $(QUIET)cp -r $$PWD/vendor/bundle/jruby/1.9/gems/jruby-openss*/lib/shared/openssl/* $@/openssl $(QUIET)cp -r $$PWD/vendor/bundle/jruby/1.9/gems/jruby-openss*/lib/shared/jopenssl/* $@/jopenssl $(QUIET)cp -r $$PWD/vendor/bundle/jruby/1.9/gems/jruby-openss*/lib/shared/openssl.rb $@/openssl.rb - @# Make sure joda-time gets unpacked last, so it overwrites the joda jruby - @# ships with. - $(QUIET)find $$PWD/vendor/jar/joda-time-$(JODA_VERSION) -name '*.jar' \ - | (cd $@; xargs -tn1 jar xf) @# Purge any extra files we don't need in META-INF (like manifests and @# signature files) -$(QUIET)rm -f $@/META-INF/*.LIST From 23e4305b55b1edb794daa6b1ef10ac3c1acd6a70 Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Mon, 10 Dec 2012 21:50:47 +0100 Subject: [PATCH 033/105] fix indent --- lib/logstash/inputs/tcp.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/inputs/tcp.rb b/lib/logstash/inputs/tcp.rb index faecf1e91..a94f84fcd 100644 --- a/lib/logstash/inputs/tcp.rb +++ b/lib/logstash/inputs/tcp.rb @@ -111,7 +111,7 @@ class LogStash::Inputs::Tcp < LogStash::Inputs::Base #Intended shutdown, get out of the loop break else - # Else it was a genuine IOError caused by something else, so propagate it up.. + # Else it was a genuine IOError caused by something else, so propagate it up.. raise end end From 0b4bcdd4ecbd087cb696e7c6cde51b3a33160ffd Mon Sep 17 00:00:00 2001 From: James Turnbull Date: Mon, 10 Dec 2012 17:20:05 -0500 Subject: [PATCH 034/105] Updated elasticsearch output to dynamically identify ElasticSearch version. --- lib/logstash/outputs/elasticsearch.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index e01afbd7d..b1dd3c0a4 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -5,7 +5,7 @@ require "logstash/outputs/base" # output for logstash. If you plan on using the logstash web interface, you'll # need to use this output. # -# *NOTE*: The elasticsearch client is version 0.19.8. Your elasticsearch +# *NOTE*: The elasticsearch client is version %ELASTICSEARCH_VERSION%. Your elasticsearch # cluster must be running 0.19.x for API compatibility. # # If you want to set other elasticsearch options that are not exposed directly From 63ef8b4f362f1708110716f78e6dada2868869a0 Mon Sep 17 00:00:00 2001 From: Nick Ethier Date: Mon, 10 Dec 2012 22:40:33 -0700 Subject: [PATCH 035/105] Add source_host to metrics filter event --- lib/logstash/filters/metrics.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/logstash/filters/metrics.rb b/lib/logstash/filters/metrics.rb index 7623eb1a3..6e8c5db89 100644 --- a/lib/logstash/filters/metrics.rb +++ b/lib/logstash/filters/metrics.rb @@ -14,7 +14,8 @@ class LogStash::Filters::Metrics < LogStash::Filters::Base def register require "metriks" - + require "socket" + @metric_meters = Hash.new { |h,k| h[k] = Metriks.meter(k) } @metric_timers = Hash.new { |h,k| h[k] = Metriks.timer(k) } end # def register @@ -33,6 +34,7 @@ class LogStash::Filters::Metrics < LogStash::Filters::Base def flush event = LogStash::Event.new + event.source_host = Socket.gethostname @metric_meters.each do |name, metric| event["#{name}.count"] = metric.count event["#{name}.rate_1m"] = metric.one_minute_rate From aa4c29a86606936324064202ad7989a41c0b099a Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Tue, 11 Dec 2012 09:47:14 +0100 Subject: [PATCH 036/105] allow custom separator in csv filter --- lib/logstash/filters/csv.rb | 10 ++++++++-- spec/filters/csv.rb | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/lib/logstash/filters/csv.rb b/lib/logstash/filters/csv.rb index bc878bc09..663f6303e 100644 --- a/lib/logstash/filters/csv.rb +++ b/lib/logstash/filters/csv.rb @@ -23,12 +23,17 @@ class LogStash::Filters::CSV < LogStash::Filters::Base # Optional. config :fields, :validate => :array, :default => [] + # Define the column separator value. If this is not specified the default + # is a comma ',' + # Optional. + config :col_sep, :validate => :string, :default => "," + public def register @csv = {} @config.each do |field, dest| - next if (RESERVED + ["fields"]).member?(field) + next if (RESERVED + ["fields", "col_sep"]).member?(field) @csv[field] = dest end @@ -60,7 +65,7 @@ class LogStash::Filters::CSV < LogStash::Filters::Base raw = event[key].first begin - values = CSV.parse_line(raw) + values = CSV.parse_line(raw, {:col_sep => @col_sep}) data = {} values.each_index do |i| field_name = @fields[i] || "field#{i+1}" @@ -82,3 +87,4 @@ class LogStash::Filters::CSV < LogStash::Filters::Base @logger.debug("Event after csv filter", :event => event) end # def filter end # class LogStash::Filters::Csv + diff --git a/spec/filters/csv.rb b/spec/filters/csv.rb index 238d7f533..f772bb315 100644 --- a/spec/filters/csv.rb +++ b/spec/filters/csv.rb @@ -38,6 +38,21 @@ describe LogStash::Filters::CSV do end end + describe "custom separator" do + config <<-CONFIG + filter { + csv { + col_sep => ";" + } + } + CONFIG + + sample "big,bird;sesame street" do + insist { subject["field1"] } == "big,bird" + insist { subject["field2"] } == "sesame street" + end + end + describe "parse csv with more data than defined field names" do config <<-CONFIG filter { From a52677946b1935591e848f30e380a5540777c38a Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 6 Dec 2012 02:02:32 -0800 Subject: [PATCH 037/105] read response body --- lib/logstash/outputs/http.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/logstash/outputs/http.rb b/lib/logstash/outputs/http.rb index 94efd02dc..480cf4b23 100644 --- a/lib/logstash/outputs/http.rb +++ b/lib/logstash/outputs/http.rb @@ -101,12 +101,16 @@ class LogStash::Outputs::Http < LogStash::Outputs::Base else request.body = encode(evt) end - puts request - puts - puts request.body + #puts "#{request.port} / #{request.protocol}" + #puts request + #puts + #puts request.body response = @agent.execute(request) - puts response - response.read_body { |c| puts c } + + # Consume body to let this connection be reused + rbody = "" + response.read_body { |c| rbody << c } + #puts rbody rescue Exception => e @logger.warn("Unhandled exception", :request => request, :response => response, :exception => e, :stacktrace => e.backtrace) end From 34c6185239a1e7328945257a4a053d2ac00bfa05 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Sat, 8 Dec 2012 22:31:06 -0800 Subject: [PATCH 038/105] - use the correct attribute for format --- docs/tutorials/getting-started-centralized.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/getting-started-centralized.md b/docs/tutorials/getting-started-centralized.md index 0be15a1c3..530653819 100644 --- a/docs/tutorials/getting-started-centralized.md +++ b/docs/tutorials/getting-started-centralized.md @@ -148,8 +148,9 @@ sample config based on the previous section. Save this as `indexer.conf` # these settings should match the output of the agent data_type => "list" key => "logstash" + # We use json_event here since the sender is a logstash agent - message_format => "json_event" + format => "json_event" } } From d7869d5e891774032eb466798d08fc65dbcc7427 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Sat, 8 Dec 2012 22:38:53 -0800 Subject: [PATCH 039/105] - fix typo --- lib/logstash/filters/kv.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/filters/kv.rb b/lib/logstash/filters/kv.rb index d38093691..972d36501 100644 --- a/lib/logstash/filters/kv.rb +++ b/lib/logstash/filters/kv.rb @@ -77,7 +77,7 @@ class LogStash::Filters::KV < LogStash::Filters::Base when Array; value.each { |v| kv_keys = parse(v, event, kv_keys) } else @logger.warn("kv filter has no support for this type of data", - :type => value.type, :value => value) + :type => value.class, :value => value) end # case value end # If we have any keys, create/append the hash From c46a8627fbcd2d3962bbf885556ee8cfad412f40 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 11 Dec 2012 01:22:51 -0800 Subject: [PATCH 040/105] - Add test to reproduce LOGSTASH-757 --- spec/filters/mutate.rb | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/spec/filters/mutate.rb b/spec/filters/mutate.rb index c94b556e4..5f31b6a62 100644 --- a/spec/filters/mutate.rb +++ b/spec/filters/mutate.rb @@ -133,7 +133,7 @@ describe LogStash::Filters::Mutate do end end - describe "regression - check grok+mutate" do + describe "regression - mutate should lowercase a field created by grok" do config <<-CONFIG filter { grok { @@ -149,4 +149,20 @@ describe LogStash::Filters::Mutate do insist { subject["foo"] } == ['hello'] end end + + describe "LOGSTASH-757: rename should do nothing with a missing field" do + config <<-CONFIG + filter { + mutate { + rename => [ "nosuchfield", "hello" ] + } + } + CONFIG + + sample "whatever" do + reject { subject.fields }.include?("nosuchfield") + reject { subject.fields }.include?("hello") + end + end end + From 5b401aedadb2562d864815b60bab9d384d842457 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 11 Dec 2012 01:25:06 -0800 Subject: [PATCH 041/105] - be explicit that Event#remove returns the value removed. --- lib/logstash/event.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index c77845468..b92573963 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -190,13 +190,13 @@ class LogStash::Event end # event.fields.each end # def append - # Remove a field + # Remove a field. Returns the value of that field when deleted public def remove(field) if @data.has_key?(field) - @data.delete(field) + return @data.delete(field) else - @data["@fields"].delete(field) + return @data["@fields"].delete(field) end end # def remove From a0cfc1d110f643c3eef36bca1ab0727b89d042e0 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 11 Dec 2012 01:25:18 -0800 Subject: [PATCH 042/105] - Don't do any rename action if the field doesn't exist (LOGSTASH-757) The test previously written to verify this bug now passes. --- lib/logstash/filters/mutate.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/filters/mutate.rb b/lib/logstash/filters/mutate.rb index 33ff575be..518d50c15 100644 --- a/lib/logstash/filters/mutate.rb +++ b/lib/logstash/filters/mutate.rb @@ -155,8 +155,8 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base def rename(event) # TODO(sissel): use event.sprintf on the field names? @rename.each do |old, new| - event[new] = event[old] - event.remove(old) + next unless event.include?(old) + event[new] = event.remove(old) end end # def rename From 732b522b3224976231d11f48d352ec7ec0802e3c Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 11 Dec 2012 01:26:32 -0800 Subject: [PATCH 043/105] - update for LOGSTASH-757 --- CHANGELOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index dd2e3f82c..10b068f0d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -31,6 +31,8 @@ matched. (LOGSTASH-705) - improvement: kv: Adds field_split, value_split, prefix, and container settings. (#225, patch by Alex Wheeler) + - mutate: rename on a nonexistant field now does nothing as expected. + (LOGSTASH-757) ## outputs - bugfix: zeromq: 'topology' is now a required setting From d246d28bbde2986c2538631c7326b0d774b9cc12 Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Tue, 11 Dec 2012 11:23:12 +0100 Subject: [PATCH 044/105] because english is kinda expressive enough language --- lib/logstash/filters/csv.rb | 6 +++--- spec/filters/csv.rb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/logstash/filters/csv.rb b/lib/logstash/filters/csv.rb index 663f6303e..d3a5da9cd 100644 --- a/lib/logstash/filters/csv.rb +++ b/lib/logstash/filters/csv.rb @@ -26,14 +26,14 @@ class LogStash::Filters::CSV < LogStash::Filters::Base # Define the column separator value. If this is not specified the default # is a comma ',' # Optional. - config :col_sep, :validate => :string, :default => "," + config :separator, :validate => :string, :default => "," public def register @csv = {} @config.each do |field, dest| - next if (RESERVED + ["fields", "col_sep"]).member?(field) + next if (RESERVED + ["fields", "separator"]).member?(field) @csv[field] = dest end @@ -65,7 +65,7 @@ class LogStash::Filters::CSV < LogStash::Filters::Base raw = event[key].first begin - values = CSV.parse_line(raw, {:col_sep => @col_sep}) + values = CSV.parse_line(raw, {:col_sep => @separator}) data = {} values.each_index do |i| field_name = @fields[i] || "field#{i+1}" diff --git a/spec/filters/csv.rb b/spec/filters/csv.rb index f772bb315..3de9e6be2 100644 --- a/spec/filters/csv.rb +++ b/spec/filters/csv.rb @@ -42,7 +42,7 @@ describe LogStash::Filters::CSV do config <<-CONFIG filter { csv { - col_sep => ";" + separator => ";" } } CONFIG From b9c94ec6562d740ae0de8e6fdfd22846fadf86b8 Mon Sep 17 00:00:00 2001 From: James Turnbull Date: Tue, 11 Dec 2012 09:48:03 -0500 Subject: [PATCH 045/105] Fixed link to Lucene query docs --- lib/logstash/web/views/search/results.haml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/web/views/search/results.haml b/lib/logstash/web/views/search/results.haml index 95157b3a8..606d3bb21 100644 --- a/lib/logstash/web/views/search/results.haml +++ b/lib/logstash/web/views/search/results.haml @@ -12,7 +12,7 @@ %i You can click on any search result to see what kind of fields we know about for that event. You can also click on the graph to zoom to that time period. - The query language is that of Lucene's string query (docs). + The query language is that of Lucene's string query (docs). #visual From 5a4f654bda4a50d8045967e2fb8d0306ba880e7d Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Tue, 11 Dec 2012 09:55:27 -0500 Subject: [PATCH 046/105] Added AWS CloudWatch output plugin - LOGSTASH-461 --- lib/logstash/outputs/cloudwatch.rb | 255 +++++++++++++++++++++++++++++ logstash.gemspec | 1 + 2 files changed, 256 insertions(+) create mode 100644 lib/logstash/outputs/cloudwatch.rb diff --git a/lib/logstash/outputs/cloudwatch.rb b/lib/logstash/outputs/cloudwatch.rb new file mode 100644 index 000000000..8d42d9624 --- /dev/null +++ b/lib/logstash/outputs/cloudwatch.rb @@ -0,0 +1,255 @@ +require "logstash/outputs/base" +require "logstash/namespace" + +require "thread" +require "rufus/scheduler" +require "aws" + +# This output lets you aggregate and send metric data to AWS CloudWatch +# +# Configuration is done partly in this output and partly using fields added +# to your events by other input & filter plugins. +# +# Events which do not have a "CW_metric" field will be ignored, so to send +# events to CloudWatch you must at least add the "CW_metric" field to the +# desired events (using grep for example) +# +# Other fields which can be added to events to modify the behavior of this +# plugin are, "CW_namespace", "CW_unit", "CW_value", and the pair of +# "CW_dimensionName" & "CW_dimensionValue". All of these field names are +# configurable in this output. See below for details. +# +# You can read more about AWS CloudWatch here: http://aws.amazon.com/cloudwatch/ +class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base + config_name "cloudwatch" + plugin_status "experimental" + + # The AWS Region to send logs to. + config :region, :validate => :string, :default => "us-east-1" + + # The AWS Access Key ID + config :access_key, :validate => :string, :required => true + + # The AWS Secret Access Key + config :secret_key, :validate => :string, :required => true + + # How often to send data to CloudWatch + # This does not affect the event timestamps, events will always have their + # actual timestamp (to-the-minute) sent to CloudWatch. + # + # Increasing this may reduce the number of CloudWatch API calls, which would + # reduce costs in heavy usage. + # + # See here for allowed values: https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler + config :timeframe, :validate => :string, :default => "1m" + + # The default namespace to use for events which do not have a "CW_namespace" field + config :namespace, :validate => :string, :default => "Logstash" + + # The name of the field used to set the metric name on an event + config :field_metric, :validate => :string, :default => "CW_metric" + + # The name of the field used to set a different namespace per event + config :field_namespace, :validate => :string, :default => "CW_namespace" + + # The name of the field used to set the units on an event metric + config :field_unit, :validate => :string, :default => "CW_unit" + + # The name of the field used to set the value (float) on an event metric + config :field_value, :validate => :string, :default => "CW_value" + + # The name of the field used to set the dimension name on an event metric + config :field_dimensionname, :validate => :string, :default => "CW_dimensionName" + + # The name of the field used to set the dimension value on an event metric + config :field_dimensionvalue, :validate => :string, :default => "CW_dimensionValue" + + # aggregate_key members + DIM_NAME = "dimensionName" + DIM_VALUE = "dimensionValue" + TIMESTAMP = "timestamp" + METRIC = "metric" + COUNT = "count" + UNIT = "unit" + SUM = "sum" + MIN = "min" + MAX = "max" + + # Units + COUNT_UNIT = "Count" + NONE = "None" + + public + def register + AWS.config( + :access_key_id => @access_key, + :secret_access_key => @secret_key, + :cloud_watch_endpoint => "monitoring.#{@region}.amazonaws.com" + ) + @cw = AWS::CloudWatch.new + + @valid_units = ["Seconds", "Microseconds", "Milliseconds", "Bytes", "Kilobytes", "Megabytes", "Gigabytes", "Terabytes", "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits", "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second", "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second", "Bits/Second", "Kilobits/Second", "Megabits/Second", "Gigabits/Second", "Terabits/Second", "Count/Second", NONE] + + @event_queue = Queue.new + @scheduler = Rufus::Scheduler.start_new + @job = @scheduler.every @timeframe do + @logger.info("Scheduler Activated") + send(aggregate({})) + end + end + + public + def receive(event) + return unless output?(event) + + if event == LogStash::SHUTDOWN + job.trigger() + job.unschedule() + @logger.info("CloudWatch aggregator thread shutdown.") + finished + return + end + + return unless event.fields.member?(@field_metric) + + @logger.info("Queueing event", :event => event) + @event_queue << event + end # def receive + + private + def send(aggregates) + aggregates.each { |namespace, data| + @logger.info("Namespace, data: ", :namespace => namespace, :data => data) + metric_data = [] + data.each { |aggregate_key, stats| + new_data = { + :metric_name => aggregate_key[METRIC], + :timestamp => aggregate_key[TIMESTAMP], + :unit => aggregate_key[UNIT], + :statistic_values => { + :sample_count => stats[COUNT], + :sum => stats[SUM], + :minimum => stats[MIN], + :maximum => stats[MAX], + } + } + if (aggregate_key[DIM_NAME] != nil && aggregate_key[DIM_VALUE] != nil) + new_data[:dimensions] = [{ + :name => aggregate_key[DIM_NAME], + :value => aggregate_key[DIM_VALUE] + }] + end + metric_data << new_data + } # data.each + + begin + response = @cw.put_metric_data( + :namespace => namespace, + :metric_data => metric_data + ) + @logger.info("Sent data to AWS CloudWatch OK") + rescue Exception => e + @logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => metric_data) + break + end + } # aggregates.each + return aggregates + end + + # def send + + private + def aggregate(aggregates) + + @logger.info("QUEUE SIZE ", :queuesize => @event_queue.size) + until @event_queue.empty? do + begin + count(aggregates, @event_queue.pop(true)) + rescue Exception => e + @logger.warn("Exception! Breaking count loop", :exception => e) + break + end + end + return aggregates + end + + private + def count(aggregates, event) + + # If the event doesnt declare a namespace, use the default + ns = field(event, @field_namespace) + namespace = (!ns) ? @namespace : ns + + unit = field(event, @field_unit) + value = field(event, @field_value) + + # If neither Units nor Value is set, then we simply count the event + if (!unit && !value) + unit = COUNT + value = "1" + end + + # If Units is still not set (or is invalid), then we know Value must BE set, so set Units to "None" + # And warn about misconfiguration + if (!unit || !@valid_units.include?(unit)) + unit = NONE + @logger.warn("Possible config error: CloudWatch Value found with invalid or missing Units") + end + + + if (!aggregates[namespace]) + aggregates[namespace] = {} + @logger.info("INITIALIZING NAMESPACE DATA") + end + + aggregate_key = { + METRIC => field(event, @field_metric), + DIM_NAME => field(event, @field_dimensionname), + DIM_VALUE => field(event, @field_dimensionvalue), + UNIT => unit, + TIMESTAMP => normalizeTimestamp(event.timestamp) + } + + if (!aggregates[namespace][aggregate_key]) + aggregates[namespace][aggregate_key] = {} + end + + # We may get to this point with valid Units but missing value. Send zeros. + val = (!value) ? 0.0 : value.to_f + + if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX]) + aggregates[namespace][aggregate_key][MAX] = val + end + + if (!aggregates[namespace][aggregate_key][MIN] || val < aggregates[namespace][aggregate_key][MIN]) + aggregates[namespace][aggregate_key][MIN] = val + end + + if (!aggregates[namespace][aggregate_key][COUNT]) + aggregates[namespace][aggregate_key][COUNT] = 1 + else + aggregates[namespace][aggregate_key][COUNT] += 1 + end + + if (!aggregates[namespace][aggregate_key][SUM]) + aggregates[namespace][aggregate_key][SUM] = val + else + aggregates[namespace][aggregate_key][SUM] += val + end + end + + # Zeros out the seconds in a ISO8601 timestamp like event.timestamp + public + def normalizeTimestamp(time) + tz = (time[-1, 1] == "Z") ? "Z" : time[-5, 5] + totheminute = time[0..16] + normal = totheminute + "00.000" + tz + return normal + end + + private + def field(event, fieldname) + return event.fields.member?(fieldname) ? event.fields[fieldname][0] : nil + end + +end # class LogStash::Outputs::CloudWatch diff --git a/logstash.gemspec b/logstash.gemspec index 5c2954831..6e61d4415 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -58,6 +58,7 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "geoip", [">= 1.1.0"] gem.add_runtime_dependency "beefcake", "0.3.7" gem.add_runtime_dependency "php-serialize" # For input drupal_dblog + gem.add_runtime_dependency "rufus-scheduler" if RUBY_PLATFORM == 'java' gem.platform = RUBY_PLATFORM From dfb405d901b97256c8e7cdb9d5c10f19a99b7585 Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Tue, 11 Dec 2012 23:27:48 -0500 Subject: [PATCH 047/105] Fixing most of the issues jordan identified(CloudWatch/LOGSTASH-461) - moving 3rd party requires into register() - Using do...end instead of {...} for large blocks - using SizedQueue instead of Queue - using event.sprintf to normalize the timestamp - renaming send to publish - using while instead of until - improving how event fields are read --- lib/logstash/outputs/cloudwatch.rb | 72 +++++++++++++++++------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/lib/logstash/outputs/cloudwatch.rb b/lib/logstash/outputs/cloudwatch.rb index 8d42d9624..83e592612 100644 --- a/lib/logstash/outputs/cloudwatch.rb +++ b/lib/logstash/outputs/cloudwatch.rb @@ -1,10 +1,6 @@ require "logstash/outputs/base" require "logstash/namespace" -require "thread" -require "rufus/scheduler" -require "aws" - # This output lets you aggregate and send metric data to AWS CloudWatch # # Configuration is done partly in this output and partly using fields added @@ -43,6 +39,10 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base # See here for allowed values: https://github.com/jmettraux/rufus-scheduler#the-time-strings-understood-by-rufus-scheduler config :timeframe, :validate => :string, :default => "1m" + # How many events to queue before forcing a call to the CloudWatch API ahead of "timeframe" schedule + # Set this to the number of events-per-timeframe you will be sending to CloudWatch to avoid extra API calls + config :queue_size, :validate => :number, :default => 10000 + # The default namespace to use for events which do not have a "CW_namespace" field config :namespace, :validate => :string, :default => "Logstash" @@ -81,6 +81,10 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base public def register + require "thread" + require "rufus/scheduler" + require "aws" + AWS.config( :access_key_id => @access_key, :secret_access_key => @secret_key, @@ -90,11 +94,11 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base @valid_units = ["Seconds", "Microseconds", "Milliseconds", "Bytes", "Kilobytes", "Megabytes", "Gigabytes", "Terabytes", "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits", "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second", "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second", "Bits/Second", "Kilobits/Second", "Megabits/Second", "Gigabits/Second", "Terabits/Second", "Count/Second", NONE] - @event_queue = Queue.new + @event_queue = SizedQueue.new(@queue_size) @scheduler = Rufus::Scheduler.start_new @job = @scheduler.every @timeframe do @logger.info("Scheduler Activated") - send(aggregate({})) + publish(aggregate({})) end end @@ -110,18 +114,25 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base return end - return unless event.fields.member?(@field_metric) + return unless event[@field_metric] + + if (@event_queue.length >= @event_queue.max) + @job.trigger + @logger.warn("Posted to AWS CloudWatch ahead of schedule. If you see this often, consider increasing the cloudwatch queue_size option.") + end @logger.info("Queueing event", :event => event) @event_queue << event - end # def receive + end + + # def receive private - def send(aggregates) - aggregates.each { |namespace, data| + def publish(aggregates) + aggregates.each do |namespace, data| @logger.info("Namespace, data: ", :namespace => namespace, :data => data) metric_data = [] - data.each { |aggregate_key, stats| + data.each do |aggregate_key, stats| new_data = { :metric_name => aggregate_key[METRIC], :timestamp => aggregate_key[TIMESTAMP], @@ -140,7 +151,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base }] end metric_data << new_data - } # data.each + end # data.each begin response = @cw.put_metric_data( @@ -152,17 +163,17 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base @logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => metric_data) break end - } # aggregates.each + end # aggregates.each return aggregates end - # def send + # def publish private def aggregate(aggregates) @logger.info("QUEUE SIZE ", :queuesize => @event_queue.size) - until @event_queue.empty? do + while !@event_queue.empty? do begin count(aggregates, @event_queue.pop(true)) rescue Exception => e @@ -180,8 +191,8 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base ns = field(event, @field_namespace) namespace = (!ns) ? @namespace : ns - unit = field(event, @field_unit) - value = field(event, @field_value) + unit = field(event, @field_unit) # .to_s happens below + value = field(event, @field_value) # .to_f happens below # If neither Units nor Value is set, then we simply count the event if (!unit && !value) @@ -189,6 +200,9 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base value = "1" end + # We may get to this point with valid Units but missing value. Send zeros. + val = (!value) ? 0.0 : value.to_f + # If Units is still not set (or is invalid), then we know Value must BE set, so set Units to "None" # And warn about misconfiguration if (!unit || !@valid_units.include?(unit)) @@ -207,16 +221,13 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base DIM_NAME => field(event, @field_dimensionname), DIM_VALUE => field(event, @field_dimensionvalue), UNIT => unit, - TIMESTAMP => normalizeTimestamp(event.timestamp) + TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}") } if (!aggregates[namespace][aggregate_key]) aggregates[namespace][aggregate_key] = {} end - # We may get to this point with valid Units but missing value. Send zeros. - val = (!value) ? 0.0 : value.to_f - if (!aggregates[namespace][aggregate_key][MAX] || val > aggregates[namespace][aggregate_key][MAX]) aggregates[namespace][aggregate_key][MAX] = val end @@ -238,18 +249,17 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base end end - # Zeros out the seconds in a ISO8601 timestamp like event.timestamp - public - def normalizeTimestamp(time) - tz = (time[-1, 1] == "Z") ? "Z" : time[-5, 5] - totheminute = time[0..16] - normal = totheminute + "00.000" + tz - return normal - end - private def field(event, fieldname) - return event.fields.member?(fieldname) ? event.fields[fieldname][0] : nil + if !event[fieldname] + return nil + else + if event[fieldname].is_a?(Array) + return event[fieldname][0] + else + return event[fieldname] + end + end end end # class LogStash::Outputs::CloudWatch From e806099c28f4eb5cf962ff90ed5d6b9eaef5da07 Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Tue, 11 Dec 2012 23:32:43 -0500 Subject: [PATCH 048/105] cleaning up minor comments (CloudWatch/LOGSTASH-461) --- lib/logstash/outputs/cloudwatch.rb | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/lib/logstash/outputs/cloudwatch.rb b/lib/logstash/outputs/cloudwatch.rb index 83e592612..37f19120a 100644 --- a/lib/logstash/outputs/cloudwatch.rb +++ b/lib/logstash/outputs/cloudwatch.rb @@ -123,9 +123,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base @logger.info("Queueing event", :event => event) @event_queue << event - end - - # def receive + end # def receive private def publish(aggregates) @@ -165,9 +163,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base end end # aggregates.each return aggregates - end - - # def publish + end # def publish private def aggregate(aggregates) @@ -187,12 +183,12 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base private def count(aggregates, event) - # If the event doesnt declare a namespace, use the default + # If the event doesn't declare a namespace, use the default ns = field(event, @field_namespace) namespace = (!ns) ? @namespace : ns - unit = field(event, @field_unit) # .to_s happens below - value = field(event, @field_value) # .to_f happens below + unit = field(event, @field_unit) + value = field(event, @field_value) # If neither Units nor Value is set, then we simply count the event if (!unit && !value) From 96cfa49be6c75abf52519d5a38130431476a0259 Mon Sep 17 00:00:00 2001 From: Eugen Dinca Date: Wed, 12 Dec 2012 18:22:50 -0500 Subject: [PATCH 049/105] Update patterns/ruby MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit  - Corrected missing % for POSINT  - Made progname optional  - Made message greedy  - Made all fields named (except the first) --- patterns/ruby | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patterns/ruby b/patterns/ruby index f8cbb990a..638274217 100644 --- a/patterns/ruby +++ b/patterns/ruby @@ -1,2 +1,2 @@ RUBY_LOGLEVEL (?:DEBUG|FATAL|ERROR|WARN|INFO) -RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601} #{POSINT:pid}\] *%{RUBY_LOGLEVEL} -- %{DATA:progname}: %{DATA:message} +RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\] *%{RUBY_LOGLEVEL:loglevel} -- *%{DATA:progname}: %{GREEDYDATA:message} From 37c27b231ddbd83b44ad1309a259a2986c300d02 Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Wed, 12 Dec 2012 23:43:16 -0500 Subject: [PATCH 050/105] Cleaned up dimension handling, now supporting multiple dimensions (CloudWatch/LOGSTASH-461) --- lib/logstash/outputs/cloudwatch.rb | 50 ++++++++++++++++++------------ 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/lib/logstash/outputs/cloudwatch.rb b/lib/logstash/outputs/cloudwatch.rb index 37f19120a..18547ce1d 100644 --- a/lib/logstash/outputs/cloudwatch.rb +++ b/lib/logstash/outputs/cloudwatch.rb @@ -58,15 +58,17 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base # The name of the field used to set the value (float) on an event metric config :field_value, :validate => :string, :default => "CW_value" - # The name of the field used to set the dimension name on an event metric - config :field_dimensionname, :validate => :string, :default => "CW_dimensionName" - - # The name of the field used to set the dimension value on an event metric - config :field_dimensionvalue, :validate => :string, :default => "CW_dimensionValue" + # The name of the field used to set the dimensions on an event metric + # this field named here, if present in an event, must have an array of + # one or more key & value pairs, for example... + # add_field => [ "CW_dimensions", "Environment", "CW_dimensions", "prod" ] + # or, equivalently... + # add_field => [ "CW_dimensions", "Environment" ] + # add_field => [ "CW_dimensions", "prod" ] + config :field_dimensions, :validate => :string, :default => "CW_dimensions" # aggregate_key members - DIM_NAME = "dimensionName" - DIM_VALUE = "dimensionValue" + DIMENSIONS = "dimensions" TIMESTAMP = "timestamp" METRIC = "metric" COUNT = "count" @@ -123,7 +125,9 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base @logger.info("Queueing event", :event => event) @event_queue << event - end # def receive + end + + # def receive private def publish(aggregates) @@ -142,11 +146,14 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base :maximum => stats[MAX], } } - if (aggregate_key[DIM_NAME] != nil && aggregate_key[DIM_VALUE] != nil) - new_data[:dimensions] = [{ - :name => aggregate_key[DIM_NAME], - :value => aggregate_key[DIM_VALUE] - }] + dims = aggregate_key[DIMENSIONS] + if (dims.is_a?(Array) && dims.length > 0 && (dims.length % 2) == 0) + new_data[:dimensions] = Array.new + i = 0 + while (i < dims.length) + new_data[:dimensions] << {:name => dims[i], :value => dims[i+1]} + i += 2 + end end metric_data << new_data end # data.each @@ -163,7 +170,9 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base end end # aggregates.each return aggregates - end # def publish + end + + # def publish private def aggregate(aggregates) @@ -203,19 +212,22 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base # And warn about misconfiguration if (!unit || !@valid_units.include?(unit)) unit = NONE - @logger.warn("Possible config error: CloudWatch Value found with invalid or missing Units") + @logger.warn("Likely config error: CloudWatch Value found (#{val}) with invalid or missing Units (#{unit.to_s}", :event => event) end - if (!aggregates[namespace]) aggregates[namespace] = {} - @logger.info("INITIALIZING NAMESPACE DATA") + end + + dims = event[@field_dimensions] + if ( dims && (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0) ) + @logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event) + dims = nil end aggregate_key = { METRIC => field(event, @field_metric), - DIM_NAME => field(event, @field_dimensionname), - DIM_VALUE => field(event, @field_dimensionvalue), + DIMENSIONS => dims, UNIT => unit, TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}") } From d1b42a83dd8caf5cffe867fdd995d1278eb6b084 Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Thu, 13 Dec 2012 01:14:42 -0500 Subject: [PATCH 051/105] Now supporting "defaults" for all cloudwatch parameters in the output plugin (including ability to use field replacements a la sprintf) (CloudWatch/LOGSTASH-461) This removes any requirement for specific fields in events, however that is still an option. CloudWatch output can now be used with either configuration provided in event fields or by configuration provided in output plugin or almost any combination of the two. Helpful warnings are issued for likely configuration errors too. --- lib/logstash/outputs/cloudwatch.rb | 135 ++++++++++++++++++----------- 1 file changed, 86 insertions(+), 49 deletions(-) diff --git a/lib/logstash/outputs/cloudwatch.rb b/lib/logstash/outputs/cloudwatch.rb index 18547ce1d..45fc87164 100644 --- a/lib/logstash/outputs/cloudwatch.rb +++ b/lib/logstash/outputs/cloudwatch.rb @@ -20,6 +20,20 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base config_name "cloudwatch" plugin_status "experimental" + # Constants + # aggregate_key members + DIMENSIONS = "dimensions" + TIMESTAMP = "timestamp" + METRIC = "metric" + COUNT = "count" + UNIT = "unit" + SUM = "sum" + MIN = "min" + MAX = "max" + # Units + COUNT_UNIT = "Count" + NONE = "None" + # The AWS Region to send logs to. config :region, :validate => :string, :default => "us-east-1" @@ -46,18 +60,42 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base # The default namespace to use for events which do not have a "CW_namespace" field config :namespace, :validate => :string, :default => "Logstash" - # The name of the field used to set the metric name on an event - config :field_metric, :validate => :string, :default => "CW_metric" - # The name of the field used to set a different namespace per event config :field_namespace, :validate => :string, :default => "CW_namespace" - # The name of the field used to set the units on an event metric + # The default metric name to use for events which do not have a "CW_metricname" field. + # If this is provided then all events which pass through this output will be aggregated and + # sent to CloudWatch, so use this carefully. Furthermore, when providing this option, you + # will probably want to also restrict events from passing through this output using event + # type, tag, and field matching + # + # At a minimum events must have a "metric name" to be sent to CloudWatch. This can be achieved + # either by providing a default here, as described above, OR by adding a "CW_metricname" field + # to the events themselves, as described below. By default, if no other configuration is + # provided besides a metric name, then events will be counted (Unit: Count, Value: 1) + # by their metric name (either this default or from their CW_metricname field) + config :metricname, :validate => :string + + # The name of the field used to set the metric name on an event + config :field_metricname, :validate => :string, :default => "CW_metricname" + + # The default unit to use for events which do not have a "CW_unit" field + config :unit, :validate => :string, :default => COUNT_UNIT + + # The name of the field used to set the unit on an event metric config :field_unit, :validate => :string, :default => "CW_unit" + # The default value to use for events which do not have a "CW_value" field + # If provided, this must be a string which can be converted to a fload, for example... + # "1", "2.34", ".5", and "0.67" + config :value, :validate => :string, :default => "1" + # The name of the field used to set the value (float) on an event metric config :field_value, :validate => :string, :default => "CW_value" + # The default dimensions [ name, value, ... ] to use for events which do not have a "CW_dimensions" field + config :dimensions, :validate => :hash + # The name of the field used to set the dimensions on an event metric # this field named here, if present in an event, must have an array of # one or more key & value pairs, for example... @@ -67,20 +105,6 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base # add_field => [ "CW_dimensions", "prod" ] config :field_dimensions, :validate => :string, :default => "CW_dimensions" - # aggregate_key members - DIMENSIONS = "dimensions" - TIMESTAMP = "timestamp" - METRIC = "metric" - COUNT = "count" - UNIT = "unit" - SUM = "sum" - MIN = "min" - MAX = "max" - - # Units - COUNT_UNIT = "Count" - NONE = "None" - public def register require "thread" @@ -94,7 +118,13 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base ) @cw = AWS::CloudWatch.new - @valid_units = ["Seconds", "Microseconds", "Milliseconds", "Bytes", "Kilobytes", "Megabytes", "Gigabytes", "Terabytes", "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits", "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second", "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second", "Bits/Second", "Kilobits/Second", "Megabits/Second", "Gigabits/Second", "Terabits/Second", "Count/Second", NONE] + @valid_units = ["Seconds", "Microseconds", "Milliseconds", "Bytes", + "Kilobytes", "Megabytes", "Gigabytes", "Terabytes", + "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits", + "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second", + "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second", + "Bits/Second", "Kilobits/Second", "Megabits/Second", + "Gigabits/Second", "Terabits/Second", "Count/Second", NONE] @event_queue = SizedQueue.new(@queue_size) @scheduler = Rufus::Scheduler.start_new @@ -102,7 +132,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base @logger.info("Scheduler Activated") publish(aggregate({})) end - end + end # def register public def receive(event) @@ -116,7 +146,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base return end - return unless event[@field_metric] + return unless (event[@field_metricname] || @metricname) if (@event_queue.length >= @event_queue.max) @job.trigger @@ -125,9 +155,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base @logger.info("Queueing event", :event => event) @event_queue << event - end - - # def receive + end # def receive private def publish(aggregates) @@ -163,20 +191,17 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base :namespace => namespace, :metric_data => metric_data ) - @logger.info("Sent data to AWS CloudWatch OK") + @logger.info("Sent data to AWS CloudWatch OK", :namespace => namespace, :metric_data => metric_data) rescue Exception => e @logger.warn("Failed to send to AWS CloudWatch", :exception => e, :namespace => namespace, :metric_data => metric_data) break end end # aggregates.each return aggregates - end - - # def publish + end# def publish private def aggregate(aggregates) - @logger.info("QUEUE SIZE ", :queuesize => @event_queue.size) while !@event_queue.empty? do begin @@ -187,32 +212,32 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base end end return aggregates - end + end # def aggregate private def count(aggregates, event) - # If the event doesn't declare a namespace, use the default - ns = field(event, @field_namespace) - namespace = (!ns) ? @namespace : ns + fnamespace = field(event, @field_namespace) + namespace = (fnamespace ? fnamespace : event.sprintf(@namespace)) - unit = field(event, @field_unit) - value = field(event, @field_value) + funit = field(event, @field_unit) + unit = (funit ? funit : event.sprintf(@unit)) - # If neither Units nor Value is set, then we simply count the event - if (!unit && !value) - unit = COUNT - value = "1" - end + fvalue = field(event, @field_value) + value = (fvalue ? fvalue : event.sprintf(@value)) # We may get to this point with valid Units but missing value. Send zeros. val = (!value) ? 0.0 : value.to_f - # If Units is still not set (or is invalid), then we know Value must BE set, so set Units to "None" - # And warn about misconfiguration - if (!unit || !@valid_units.include?(unit)) + # Event provides exactly one (but not both) of value or unit + if ( (fvalue == nil) ^ (funit == nil) ) + @logger.warn("Likely config error: event has one of #{@field_value} or #{@field_unit} fields but not both.", :event => event) + end + + # If Unit is still not set or is invalid warn about misconfiguration & use NONE + if (!@valid_units.include?(unit)) unit = NONE - @logger.warn("Likely config error: CloudWatch Value found (#{val}) with invalid or missing Units (#{unit.to_s}", :event => event) + @logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event) end if (!aggregates[namespace]) @@ -220,13 +245,25 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base end dims = event[@field_dimensions] - if ( dims && (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0) ) - @logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event) + if (dims) # event provides dimensions + # validate the structure + if (!dims.is_a?(Array) || dims.length == 0 || (dims.length % 2) != 0) + @logger.warn("Likely config error: CloudWatch dimensions field (#{dims.to_s}) found which is not a positive- & even-length array. Ignoring it.", :event => event) + dims = nil + end + # Best case, we get here and exit the conditional because dims... + # - is an array + # - with positive length + # - and an even number of elements + elsif (@dimensions.is_a?(Hash)) # event did not provide dimensions, but the output has been configured with a default + dims = @dimensions.flatten.map{|d| event.sprintf(d)} # into the kind of array described just above + else dims = nil end + fmetric = field(event, @field_metricname) aggregate_key = { - METRIC => field(event, @field_metric), + METRIC => (fmetric ? fmetric : event.sprintf(@metricname)), DIMENSIONS => dims, UNIT => unit, TIMESTAMP => event.sprintf("%{+YYYY-MM-dd'T'HH:mm:00Z}") @@ -255,7 +292,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base else aggregates[namespace][aggregate_key][SUM] += val end - end + end # def count private def field(event, fieldname) @@ -268,6 +305,6 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base return event[fieldname] end end - end + end # def field end # class LogStash::Outputs::CloudWatch From d2ae438d2850baba471281a415e607965045da37 Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Thu, 13 Dec 2012 11:46:57 -0500 Subject: [PATCH 052/105] Improved validation of unit config option per jordan's suggestion (CloudWatch/LOGSTASH-461) Also fixed a typo s/fload/float/ --- lib/logstash/outputs/cloudwatch.rb | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/logstash/outputs/cloudwatch.rb b/lib/logstash/outputs/cloudwatch.rb index 45fc87164..7e55b46e6 100644 --- a/lib/logstash/outputs/cloudwatch.rb +++ b/lib/logstash/outputs/cloudwatch.rb @@ -79,14 +79,22 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base # The name of the field used to set the metric name on an event config :field_metricname, :validate => :string, :default => "CW_metricname" + @valid_units = ["Seconds", "Microseconds", "Milliseconds", "Bytes", + "Kilobytes", "Megabytes", "Gigabytes", "Terabytes", + "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits", + "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second", + "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second", + "Bits/Second", "Kilobits/Second", "Megabits/Second", + "Gigabits/Second", "Terabits/Second", "Count/Second", NONE] + # The default unit to use for events which do not have a "CW_unit" field - config :unit, :validate => :string, :default => COUNT_UNIT + config :unit, :validate => @valid_units, :default => COUNT_UNIT # The name of the field used to set the unit on an event metric config :field_unit, :validate => :string, :default => "CW_unit" # The default value to use for events which do not have a "CW_value" field - # If provided, this must be a string which can be converted to a fload, for example... + # If provided, this must be a string which can be converted to a float, for example... # "1", "2.34", ".5", and "0.67" config :value, :validate => :string, :default => "1" @@ -118,14 +126,6 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base ) @cw = AWS::CloudWatch.new - @valid_units = ["Seconds", "Microseconds", "Milliseconds", "Bytes", - "Kilobytes", "Megabytes", "Gigabytes", "Terabytes", - "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits", - "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second", - "Megabytes/Second", "Gigabytes/Second", "Terabytes/Second", - "Bits/Second", "Kilobits/Second", "Megabits/Second", - "Gigabits/Second", "Terabits/Second", "Count/Second", NONE] - @event_queue = SizedQueue.new(@queue_size) @scheduler = Rufus::Scheduler.start_new @job = @scheduler.every @timeframe do From e614a39338fc9d4284a42ee40ae3c5c56831870a Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Thu, 13 Dec 2012 23:35:31 -0500 Subject: [PATCH 053/105] Changed @valid_units to constant VALID_UNITS to fix bug introduced in last commit (CloudWatch/LOGSTASH-461) --- lib/logstash/outputs/cloudwatch.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/logstash/outputs/cloudwatch.rb b/lib/logstash/outputs/cloudwatch.rb index 7e55b46e6..fe5b03dc7 100644 --- a/lib/logstash/outputs/cloudwatch.rb +++ b/lib/logstash/outputs/cloudwatch.rb @@ -79,7 +79,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base # The name of the field used to set the metric name on an event config :field_metricname, :validate => :string, :default => "CW_metricname" - @valid_units = ["Seconds", "Microseconds", "Milliseconds", "Bytes", + VALID_UNITS = ["Seconds", "Microseconds", "Milliseconds", "Bytes", "Kilobytes", "Megabytes", "Gigabytes", "Terabytes", "Bits", "Kilobits", "Megabits", "Gigabits", "Terabits", "Percent", COUNT_UNIT, "Bytes/Second", "Kilobytes/Second", @@ -88,7 +88,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base "Gigabits/Second", "Terabits/Second", "Count/Second", NONE] # The default unit to use for events which do not have a "CW_unit" field - config :unit, :validate => @valid_units, :default => COUNT_UNIT + config :unit, :validate => VALID_UNITS, :default => COUNT_UNIT # The name of the field used to set the unit on an event metric config :field_unit, :validate => :string, :default => "CW_unit" @@ -235,7 +235,7 @@ class LogStash::Outputs::CloudWatch < LogStash::Outputs::Base end # If Unit is still not set or is invalid warn about misconfiguration & use NONE - if (!@valid_units.include?(unit)) + if (!VALID_UNITS.include?(unit)) unit = NONE @logger.warn("Likely config error: invalid or missing Units (#{unit.to_s}), using '#{NONE}' instead", :event => event) end From 927660258b76e2b55e6746d1d1749f521d70fef4 Mon Sep 17 00:00:00 2001 From: Jonny Schulz Date: Fri, 14 Dec 2012 13:54:35 +0100 Subject: [PATCH 054/105] A tag named '_jsonparsefailure' is added if json events couldn't be parsed --- lib/logstash/inputs/base.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/logstash/inputs/base.rb b/lib/logstash/inputs/base.rb index 9ab98b500..79b7dbed8 100644 --- a/lib/logstash/inputs/base.rb +++ b/lib/logstash/inputs/base.rb @@ -107,6 +107,7 @@ class LogStash::Inputs::Base < LogStash::Plugin :source => source, :exception => e, :backtrace => e.backtrace) event.message = raw + event.tags << "_jsonparsefailure" end when "json_event" begin @@ -124,6 +125,7 @@ class LogStash::Inputs::Base < LogStash::Plugin :input => raw, :source => source, :exception => e, :backtrace => e.backtrace) event.message = raw + event.tags << "_jsonparsefailure" end if event.source == "unknown" From fd560e3b19706fbbdf306f622c8878fcfaf46249 Mon Sep 17 00:00:00 2001 From: theduke Date: Fri, 14 Dec 2012 16:36:55 +0100 Subject: [PATCH 055/105] Drupal DBLog: enabled bulk fetching of database rows to prevent big memory spikes with big databases. Bulk size ins configurable. Also improved error detection and logging a bit. --- lib/logstash/inputs/drupal_dblog.rb | 57 ++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 0305b01e1..3b07de4ce 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -44,6 +44,11 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base # Time between checks in minutes. config :interval, :validate => :number, :default => 10 + # The amount of log messages that should be fetched with each query. + # Bulk fetching is done to prevent querying huge data sets when lots of + # messages are in the database. + config :bulksize, :validate => :number, :default => 5000 + # Label this input with a type. # Types are used mainly for filter activation. # @@ -120,10 +125,11 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base @databases.each do |name, db| @logger.debug("Drupal DBLog: Checking database #{name}") check_database(output_queue, db) + @logger.info("Drupal DBLog: Retrieved all new watchdog messages from #{name}") end timeTaken = Time.now.to_i - start - @logger.debug("Drupal DBLog: Fetched all new watchdog entries in #{timeTaken} seconds") + @logger.info("Drupal DBLog: Fetched all new watchdog entries in #{timeTaken} seconds") # If fetching of all databases took less time than the interval, # sleep a bit. @@ -169,35 +175,60 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base begin # connect to the MySQL server initialize_client(db) + rescue Exception => e + @logger.error("Could not connect to database: " + e.message) + return + end #begin + begin @sitename = db["site"] @usermap = @add_usernames ? get_usermap : nil # Retrieve last pulled watchdog entry id initialLastWid = get_last_wid - lastWid = initialLastWid ? initialLastWid : 0 + lastWid = nil - # Fetch new entries, and create the event - results = @client.query('SELECT * from watchdog WHERE wid > ' + lastWid.to_s + " ORDER BY wid asc") - results.each do |row| - event = build_event(row) - if event - output_queue << event - lastWid = row['wid'].to_s - end + + if initialLastWid == false + lastWid = 0 + set_last_wid(0, true) + else + lastWid = initialLastWid end - set_last_wid(lastWid, initialLastWid == false) + # Fetch new entries, and create the event + while true + results = get_db_rows(lastWid) + if results.length() < 1 + break + end + + @logger.debug("Fetched " + results.length().to_s + " database rows") + + results.each do |row| + event = build_event(row) + if event + output_queue << event + lastWid = row['wid'].to_s + end + end + + set_last_wid(lastWid, false) + end rescue Exception => e - @logger.info("Mysql error: ", :error => e.message) - throw e + @logger.error("Error while fetching messages: ", :error => e.message) end # begin # Close connection @client.close end # def check_database + def get_db_rows(lastWid) + query = 'SELECT * from watchdog WHERE wid > ' + lastWid.to_s + " ORDER BY wid asc LIMIT " + @bulksize.to_s + return @client.query(query) + end # def get_db_rows + private def update_sitename if @sitename == "" From 5df0f2851cfe12892874bec323f3259a8b04fc00 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 14 Dec 2012 13:30:15 -0800 Subject: [PATCH 056/105] Only include plugins, not support paths --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3040bfc3c..9381230ec 100644 --- a/Makefile +++ b/Makefile @@ -17,7 +17,7 @@ ELASTICSEARCH_URL=http://github.com/downloads/elasticsearch/elasticsearch ELASTICSEARCH=vendor/jar/elasticsearch-$(ELASTICSEARCH_VERSION) GEOIP=vendor/geoip/GeoLiteCity.dat GEOIP_URL=http://logstash.objects.dreamhost.com/maxmind/GeoLiteCity-2012-11-09.dat.gz -PLUGIN_FILES=$(shell git ls-files | egrep '^lib/logstash/(inputs|outputs|filters)/' | egrep -v '/(base|threadable).rb$$|/inputs/ganglia/') +PLUGIN_FILES=$(shell git ls-files | egrep '^lib/logstash/(inputs|outputs|filters)/[^/]+$' | egrep -v '/(base|threadable).rb$$|/inputs/ganglia/') QUIET=@ WGET=$(shell which wget 2>/dev/null) From d2b595cb8b69d946daf8786e9fb630f8e15e0e64 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 14 Dec 2012 13:30:31 -0800 Subject: [PATCH 057/105] Move external requires to the register method --- lib/logstash/inputs/drupal_dblog.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 3b07de4ce..cfea05bfc 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -1,13 +1,6 @@ require "date" require "logstash/inputs/base" require "logstash/namespace" -require "php_serialize" - -if RUBY_PLATFORM == 'java' - require "logstash/inputs/drupal_dblog/jdbcconnection" -else - require "mysql2" -end # Retrieve watchdog log events from a Drupal installation with DBLog enabled. # The events are pulled out directly from the database. @@ -68,6 +61,13 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base public def register + require "php_serialize" + + if RUBY_PLATFORM == 'java' + require "logstash/inputs/drupal_dblog/jdbcconnection" + else + require "mysql2" + end end # def register public From 6fde43ccf0c5ad7b3f652922b72682403b8cd229 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 14 Dec 2012 13:30:38 -0800 Subject: [PATCH 058/105] - include spec and testutils --- lib/logstash/runner.rb | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/logstash/runner.rb b/lib/logstash/runner.rb index 6ab8bf335..7fa04f317 100644 --- a/lib/logstash/runner.rb +++ b/lib/logstash/runner.rb @@ -129,6 +129,9 @@ class LogStash::Runner return @result end end + + $: << File.expand_path("#{File.dirname(__FILE__)}/../../spec") + require "test_utils" rspec = runner.new(fixedargs) rspec.run @runners << rspec From 67de34c0aed6b58c13f0a32942819fc6cc1a6d26 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sat, 15 Dec 2012 01:08:02 +0100 Subject: [PATCH 059/105] Deprecating 'name' variable For ticket LOGSTASH-755 - input/amqp to 'queue' - input/eventlog to 'logfile' - output/elasticsearch_river to 'queue' - output/amqp to 'exchange' --- lib/logstash/inputs/amqp.rb | 32 ++++++++++++++------- lib/logstash/inputs/eventlog.rb | 22 ++++++++++---- lib/logstash/outputs/amqp.rb | 29 +++++++++++++------ lib/logstash/outputs/elasticsearch_river.rb | 18 ++++++++++-- 4 files changed, 74 insertions(+), 27 deletions(-) diff --git a/lib/logstash/inputs/amqp.rb b/lib/logstash/inputs/amqp.rb index cd7db790d..346227e9d 100644 --- a/lib/logstash/inputs/amqp.rb +++ b/lib/logstash/inputs/amqp.rb @@ -30,8 +30,12 @@ class LogStash::Inputs::Amqp < LogStash::Inputs::Threadable # Your amqp password config :password, :validate => :password, :default => "guest" - # The name of the queue. - config :name, :validate => :string, :default => "" + # The name of the queue. Depricated due to conflicts with puppet naming convention. + # Replaced by 'queue' variable. See LOGSTASH-755 + config :name, :validate => :string, :deprecated => true + + # The name of the queue. + config :queue, :validate => :string, :default => "" # The name of the exchange to bind the queue. This is analogous to the 'amqp # output' [config 'name'](../outputs/amqp) @@ -86,6 +90,14 @@ class LogStash::Inputs::Amqp < LogStash::Inputs::Threadable public def register + + if @name + if @queue + @logger.error("'name' and 'queue' are the same setting, but 'name' is deprecated. Please use only 'queue'") + end + @queue = @name + end + @logger.info("Registering input #{@url}") require "bunny" # rubygem 'bunny' @vhost ||= "/" @@ -106,12 +118,12 @@ class LogStash::Inputs::Amqp < LogStash::Inputs::Threadable amqp_credentials << @user if @user amqp_credentials << ":#{@password}" if @password @amqpurl += amqp_credentials unless amqp_credentials.nil? - @amqpurl += "#{@host}:#{@port}#{@vhost}/#{@name}" + @amqpurl += "#{@host}:#{@port}#{@vhost}/#{@queue}" end # def register def run(queue) begin - @logger.debug("Connecting with AMQP settings #{@amqpsettings.inspect} to set up queue #{@name.inspect}") + @logger.debug("Connecting with AMQP settings #{@amqpsettings.inspect} to set up queue #{@queue.inspect}") @bunny = Bunny.new(@amqpsettings) return if terminating? @bunny.start @@ -119,15 +131,15 @@ class LogStash::Inputs::Amqp < LogStash::Inputs::Threadable @arguments_hash = Hash[*@arguments] - @queue = @bunny.queue(@name, {:durable => @durable, :auto_delete => @auto_delete, :exclusive => @exclusive, :arguments => @arguments_hash }) - @queue.bind(@exchange, :key => @key) + @bunnyqueue = @bunny.queue(@queue, {:durable => @durable, :auto_delete => @auto_delete, :exclusive => @exclusive, :arguments => @arguments_hash }) + @bunnyqueue.bind(@exchange, :key => @key) - @queue.subscribe({:ack => @ack}) do |data| + @bunnyqueue.subscribe({:ack => @ack}) do |data| e = to_event(data[:payload], @amqpurl) if e queue << e end - end # @queue.subscribe + end # @bunnyqueue.subscribe rescue *[Bunny::ConnectionError, Bunny::ServerDownError] => e @logger.error("AMQP connection error, will reconnect: #{e}") @@ -139,8 +151,8 @@ class LogStash::Inputs::Amqp < LogStash::Inputs::Threadable end # def run def teardown - @queue.unsubscribe unless @durable == true - @queue.delete unless @durable == true + @bunnyqueue.unsubscribe unless @durable == true + @bunnyqueue.delete unless @durable == true @bunny.close if @bunny finished end # def teardown diff --git a/lib/logstash/inputs/eventlog.rb b/lib/logstash/inputs/eventlog.rb index cc930f56f..0fa3eb01a 100644 --- a/lib/logstash/inputs/eventlog.rb +++ b/lib/logstash/inputs/eventlog.rb @@ -16,8 +16,12 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base config_name "eventlog" plugin_status "beta" + # Event Log Name. Depricated due to conflicts with puppet naming convention. + # Replaced by 'logfile' variable. See LOGSTASH-755 + config :name, :validate => :string, :deprecated => true + # Event Log Name - config :name, :validate => :string, :required => true, :default => "System" + config :logfile, :validate => :string, :required => true, :default => "System" public def initialize(params) @@ -27,8 +31,16 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base public def register + + if @name + if @logfile + @logger.error("'name' and 'logfile' are the same setting, but 'name' is deprecated. Please use only 'logfile'") + end + @logfile = @name + end + @hostname = Socket.gethostname - @logger.info("Registering input eventlog://#{@hostname}/#{@name}") + @logger.info("Registering input eventlog://#{@hostname}/#{@logfile}") require "win32ole" # rubygem 'win32ole' ('jruby-win32ole' on JRuby) end # def register @@ -43,7 +55,7 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base newest_shipped_event = latest_record_number next_newest_shipped_event = newest_shipped_event begin - @logger.debug("Tailing Windows Event Log '#{@name}'") + @logger.debug("Tailing Windows Event Log '#{@logfile}'") loop do event_index = 0 latest_events.each do |event| @@ -51,7 +63,7 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base timestamp = DateTime.strptime(event.TimeGenerated, "%Y%m%d%H%M%S").iso8601 timestamp[19..-1] = DateTime.now.iso8601[19..-1] # Copy over the correct TZ offset e = LogStash::Event.new({ - "@source" => "eventlog://#{@hostname}/#{@name}", + "@source" => "eventlog://#{@hostname}/#{@logfile}", "@type" => @type, "@timestamp" => timestamp }) @@ -81,7 +93,7 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base private def latest_events - wmi_query = "select * from Win32_NTLogEvent where Logfile = '#{@name}'" + wmi_query = "select * from Win32_NTLogEvent where Logfile = '#{@logfile}'" events = @wmi.ExecQuery(wmi_query) end # def latest_events diff --git a/lib/logstash/outputs/amqp.rb b/lib/logstash/outputs/amqp.rb index bd1ff2f11..d8fdf2b53 100644 --- a/lib/logstash/outputs/amqp.rb +++ b/lib/logstash/outputs/amqp.rb @@ -28,8 +28,12 @@ class LogStash::Outputs::Amqp < LogStash::Outputs::Base # The exchange type (fanout, topic, direct) config :exchange_type, :validate => [ "fanout", "direct", "topic"], :required => true + # The name of the exchange. Depricated due to conflicts with puppet naming convention. + # Replaced by 'exchange' variable. See LOGSTASH-755 + config :name, :validate => :string, :deprecated => true + # The name of the exchange - config :name, :validate => :string, :required => true + config :exchange, :validate => :string, :required => true # Key to route to by default. Defaults to 'logstash' # @@ -59,6 +63,13 @@ class LogStash::Outputs::Amqp < LogStash::Outputs::Base def register require "bunny" # rubygem 'bunny' + if @name + if @exchange + @logger.error("'name' and 'exchange' are the same setting, but 'name' is deprecated. Please use only 'exchange'") + end + @exchange = @name + end + @logger.info("Registering output", :plugin => self) connect end # def register @@ -78,7 +89,7 @@ class LogStash::Outputs::Amqp < LogStash::Outputs::Base begin @logger.debug("Connecting to AMQP", :settings => amqpsettings, - :exchange_type => @exchange_type, :name => @name) + :exchange_type => @exchange_type, :name => @exchange) @bunny = Bunny.new(amqpsettings) @bunny.start rescue => e @@ -92,11 +103,11 @@ class LogStash::Outputs::Amqp < LogStash::Outputs::Base end end - @logger.debug("Declaring exchange", :name => @name, :type => @exchange_type, + @logger.debug("Declaring exchange", :name => @exchange, :type => @exchange_type, :durable => @durable) - @exchange = @bunny.exchange(@name, :type => @exchange_type.to_sym, :durable => @durable) + @bunnyexchange = @bunny.exchange(@exchange, :type => @exchange_type.to_sym, :durable => @durable) - @logger.debug("Binding exchange", :name => @name, :key => @key) + @logger.debug("Binding exchange", :name => @exchange, :key => @key) end # def connect public @@ -118,9 +129,9 @@ class LogStash::Outputs::Amqp < LogStash::Outputs::Base public def receive_raw(message, key=@key) begin - if @exchange + if @bunnyexchange @logger.debug(["Publishing message", { :destination => to_s, :message => message, :key => key }]) - @exchange.publish(message, :persistent => @persistent, :key => key) + @bunnyexchange.publish(message, :persistent => @persistent, :key => key) else @logger.warn("Tried to send message, but not connected to amqp yet.") end @@ -133,14 +144,14 @@ class LogStash::Outputs::Amqp < LogStash::Outputs::Base public def to_s - return "amqp://#{@user}@#{@host}:#{@port}#{@vhost}/#{@exchange_type}/#{@name}\##{@key}" + return "amqp://#{@user}@#{@host}:#{@port}#{@vhost}/#{@exchange_type}/#{@exchange}\##{@key}" end public def teardown @bunny.close rescue nil @bunny = nil - @exchange = nil + @bunnyexchange = nil finished end # def teardown end # class LogStash::Outputs::Amqp diff --git a/lib/logstash/outputs/elasticsearch_river.rb b/lib/logstash/outputs/elasticsearch_river.rb index 605bae493..18075c7f5 100644 --- a/lib/logstash/outputs/elasticsearch_river.rb +++ b/lib/logstash/outputs/elasticsearch_river.rb @@ -57,9 +57,13 @@ class LogStash::Outputs::ElasticSearchRiver < LogStash::Outputs::Base # AMQP vhost config :vhost, :validate => :string, :default => "/" - # AMQP queue name - config :name, :validate => :string, :default => "elasticsearch" + # AMQP queue name. Depricated due to conflicts with puppet naming convention. + # Replaced by 'queue' variable. See LOGSTASH-755 + config :name, :validate => :string, :deprecated => true + # AMQP queue name + config :queue, :validate => :string, :default => "elasticsearch" + # AMQP exchange name config :exchange, :validate => :string, :default => "elasticsearch" @@ -78,6 +82,14 @@ class LogStash::Outputs::ElasticSearchRiver < LogStash::Outputs::Base public def register + + if @name + if @queue + @logger.error("'name' and 'queue' are the same setting, but 'name' is deprecated. Please use only 'queue'") + end + @queue = @name + end + # TODO(sissel): find a better way of declaring where the elasticsearch # libraries are # TODO(sissel): can skip this step if we're running from a jar. @@ -126,7 +138,7 @@ class LogStash::Outputs::ElasticSearchRiver < LogStash::Outputs::Base "user" => @user, "pass" => @password, "vhost" => @vhost, - "queue" => @name, + "queue" => @queue, "exchange" => @exchange, "routing_key" => @key, "exchange_type" => @exchange_type, From 951be631ffd70a850829451a7e220fe6683a2660 Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Sat, 15 Dec 2012 16:46:32 +0100 Subject: [PATCH 060/105] LOGSTASH-408 Adding output format option --- lib/logstash/outputs/tcp.rb | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/logstash/outputs/tcp.rb b/lib/logstash/outputs/tcp.rb index f08aea807..b6e168807 100644 --- a/lib/logstash/outputs/tcp.rb +++ b/lib/logstash/outputs/tcp.rb @@ -26,6 +26,14 @@ class LogStash::Outputs::Tcp < LogStash::Outputs::Base # `client` connects to a server. config :mode, :validate => ["server", "client"], :default => "client" + # The format to use when writing events to the file. This value + # supports any string and can include %{name} and other dynamic + # strings. + # + # If this setting is omitted, the full json representation of the + # event will be written as a single line. + config :message_format, :validate => :string + class Client public def initialize(socket, logger) @@ -89,19 +97,22 @@ class LogStash::Outputs::Tcp < LogStash::Outputs::Base def receive(event) return unless output?(event) - wire_event = event.to_hash.to_json + "\n" + if @message_format + output = event.sprintf(@message_format) + "\n" + else + output = event.to_hash.to_json + "\n" + end if server? @client_threads.each do |client_thread| - client_thread[:client].write(wire_event) + client_thread[:client].write(output) end @client_threads.reject! {|t| !t.alive? } else begin connect unless @client_socket - @client_socket.write(event.to_hash.to_json) - @client_socket.write("\n") + @client_socket.write(output) rescue => e @logger.warn("tcp output exception", :host => @host, :port => @port, :exception => e, :backtrace => e.backtrace) From d8a37f49b5b188212f91276d81ae0a47b5e496e1 Mon Sep 17 00:00:00 2001 From: David Butler Date: Sun, 16 Dec 2012 23:56:13 -0800 Subject: [PATCH 061/105] Ruby 1.8.7 compatibility for LogStash::Event --- lib/logstash-event.rb | 4 ++-- lib/logstash/event.rb | 6 +++--- lib/logstash/time.rb | 4 ++-- logstash-event.gemspec | 4 ++++ 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/lib/logstash-event.rb b/lib/logstash-event.rb index 4419865e1..e46b61bc1 100644 --- a/lib/logstash-event.rb +++ b/lib/logstash-event.rb @@ -1,3 +1,3 @@ -require "logstash/event" -require "logstash/version" +require File.join(File.dirname(__FILE__), "logstash/event") +require File.join(File.dirname(__FILE__), "logstash/version") diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index b92573963..37bfb4262 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -1,8 +1,8 @@ require "json" require "time" require "date" -require "logstash/time" -require "logstash/namespace" +require File.join(File.dirname(__FILE__), "time") +require File.join(File.dirname(__FILE__), "namespace") require "uri" # General event type. @@ -24,7 +24,7 @@ class LogStash::Event @data["@timestamp"] ||= LogStash::Time.now end # def initialize - if RUBY_ENGINE == "jruby" + if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" @@date_parser = Java::org.joda.time.format.ISODateTimeFormat.dateTimeParser.withOffsetParsed else # TODO(sissel): LOGSTASH-217 diff --git a/lib/logstash/time.rb b/lib/logstash/time.rb index 7017e0272..377eff936 100644 --- a/lib/logstash/time.rb +++ b/lib/logstash/time.rb @@ -1,4 +1,4 @@ -require "logstash/namespace" +require File.join(File.dirname(__FILE__), "namespace") # Provide our own Time wrapper for ISO8601 support # Example: @@ -8,7 +8,7 @@ require "logstash/namespace" # >> LogStash::Time.now.utc.to_iso8601 # => "2010-10-17 07:25:26.788704Z" module LogStash::Time - if RUBY_ENGINE == "jruby" + if defined?(RUBY_ENGINE) && (RUBY_ENGINE == "jruby") require "java" DateTime = org.joda.time.DateTime DateTimeZone = org.joda.time.DateTimeZone diff --git a/logstash-event.gemspec b/logstash-event.gemspec index 13e9306dd..c801473de 100644 --- a/logstash-event.gemspec +++ b/logstash-event.gemspec @@ -15,6 +15,7 @@ Gem::Specification.new do |gem| lib/logstash/namespace.rb lib/logstash/time.rb lib/logstash/version.rb + lib/spec/event.rb LICENSE } @@ -22,4 +23,7 @@ Gem::Specification.new do |gem| gem.name = "logstash-event" gem.require_paths = ["lib"] gem.version = LOGSTASH_VERSION + + gem.add_development_dependency "rspec" + gem.add_development_dependency "insist", "0.0.8" end From d8b21158c6a740ecf499d822a53cdeed5fe227e2 Mon Sep 17 00:00:00 2001 From: David Butler Date: Mon, 17 Dec 2012 02:28:25 -0800 Subject: [PATCH 062/105] Removed FILE relative requires --- lib/logstash-event.rb | 4 ++-- lib/logstash/event.rb | 4 ++-- lib/logstash/time.rb | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/logstash-event.rb b/lib/logstash-event.rb index e46b61bc1..4419865e1 100644 --- a/lib/logstash-event.rb +++ b/lib/logstash-event.rb @@ -1,3 +1,3 @@ -require File.join(File.dirname(__FILE__), "logstash/event") -require File.join(File.dirname(__FILE__), "logstash/version") +require "logstash/event" +require "logstash/version" diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index 37bfb4262..ed057bed6 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -1,8 +1,8 @@ require "json" require "time" require "date" -require File.join(File.dirname(__FILE__), "time") -require File.join(File.dirname(__FILE__), "namespace") +require "logstash/time" +require "logstash/namespace" require "uri" # General event type. diff --git a/lib/logstash/time.rb b/lib/logstash/time.rb index 377eff936..8034f5dde 100644 --- a/lib/logstash/time.rb +++ b/lib/logstash/time.rb @@ -1,4 +1,4 @@ -require File.join(File.dirname(__FILE__), "namespace") +require "logstash/namespace" # Provide our own Time wrapper for ISO8601 support # Example: @@ -8,7 +8,7 @@ require File.join(File.dirname(__FILE__), "namespace") # >> LogStash::Time.now.utc.to_iso8601 # => "2010-10-17 07:25:26.788704Z" module LogStash::Time - if defined?(RUBY_ENGINE) && (RUBY_ENGINE == "jruby") + if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" require "java" DateTime = org.joda.time.DateTime DateTimeZone = org.joda.time.DateTimeZone From f7b19154f9a966ecac64e5972b6961509122a0e7 Mon Sep 17 00:00:00 2001 From: David Butler Date: Mon, 17 Dec 2012 11:50:13 -0800 Subject: [PATCH 063/105] Whoops! --- logstash-event.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/logstash-event.gemspec b/logstash-event.gemspec index c801473de..9e162f04b 100644 --- a/logstash-event.gemspec +++ b/logstash-event.gemspec @@ -15,7 +15,7 @@ Gem::Specification.new do |gem| lib/logstash/namespace.rb lib/logstash/time.rb lib/logstash/version.rb - lib/spec/event.rb + spec/event.rb LICENSE } From e8772f64cf69a3e282933936d6830daf6fae12d7 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 18 Dec 2012 21:26:03 +0000 Subject: [PATCH 064/105] Fix syntax --- lib/logstash/filters/metrics.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/filters/metrics.rb b/lib/logstash/filters/metrics.rb index 6e8c5db89..d77557397 100644 --- a/lib/logstash/filters/metrics.rb +++ b/lib/logstash/filters/metrics.rb @@ -44,7 +44,7 @@ class LogStash::Filters::Metrics < LogStash::Filters::Base @metric_timers.each do |name, metric| event["#{name}.count"] = metric.count - event["#{name}.rate_1m"] = metric.one_mintute_rate + event["#{name}.rate_1m"] = metric.one_minute_rate event["#{name}.rate_5m"] = metric.five_minute_rate event["#{name}.rate_15m"] = metric.fifteen_minute_rate event["#{name}.min"] = metric.min From a169739c7d372ff671b87f31698629c8769a7111 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Tue, 18 Dec 2012 21:26:09 +0000 Subject: [PATCH 065/105] - escape $ --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 9381230ec..0307396c9 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ JRUBY_VERSION=1.7.0 ELASTICSEARCH_VERSION=0.19.10 #VERSION=$(shell ruby -r./lib/logstash/version -e 'puts LOGSTASH_VERSION') -VERSION=$(shell awk -F\" '/LOGSTASH_VERSION/ {print $$2}' lib/logstash/version.rb ) +VERSION=$(shell awk -F\" '/LOGSTASH_VERSION/ {print $$2}' lib/logstash/version.rb) WITH_JRUBY=java -jar $(shell pwd)/$(JRUBY) -S JRUBY=vendor/jar/jruby-complete-$(JRUBY_VERSION).jar @@ -17,7 +17,7 @@ ELASTICSEARCH_URL=http://github.com/downloads/elasticsearch/elasticsearch ELASTICSEARCH=vendor/jar/elasticsearch-$(ELASTICSEARCH_VERSION) GEOIP=vendor/geoip/GeoLiteCity.dat GEOIP_URL=http://logstash.objects.dreamhost.com/maxmind/GeoLiteCity-2012-11-09.dat.gz -PLUGIN_FILES=$(shell git ls-files | egrep '^lib/logstash/(inputs|outputs|filters)/[^/]+$' | egrep -v '/(base|threadable).rb$$|/inputs/ganglia/') +PLUGIN_FILES=$(shell git ls-files | egrep '^lib/logstash/(inputs|outputs|filters)/[^/]+$$' | egrep -v '/(base|threadable).rb$$|/inputs/ganglia/') QUIET=@ WGET=$(shell which wget 2>/dev/null) From 0a9caeac3e3a0417a11a5ff367acb30acbb999b7 Mon Sep 17 00:00:00 2001 From: theduke Date: Wed, 19 Dec 2012 13:55:26 +0100 Subject: [PATCH 066/105] Drupal DBLog: store logstash_last_wid serialized. This is to prevent a warning "can not unserialize" in Drupal 7, since it requries all variables to be stored serialized. --- lib/logstash/inputs/drupal_dblog.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/logstash/inputs/drupal_dblog.rb b/lib/logstash/inputs/drupal_dblog.rb index 3b07de4ce..22c182226 100644 --- a/lib/logstash/inputs/drupal_dblog.rb +++ b/lib/logstash/inputs/drupal_dblog.rb @@ -254,6 +254,8 @@ class LogStash::Inputs::DrupalDblog < LogStash::Inputs::Base private def set_last_wid(wid, insert) + wid = PHP.serialize(wid.to_i) + # Update last import wid variable if insert # Does not exist yet, so insert From 141f6fd17bdc888dd03b572c5cc44ff9c56577be Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Thu, 20 Dec 2012 02:05:25 +0100 Subject: [PATCH 067/105] Initial commit on anonymizing filter Allows for selecting different algorithms --- lib/logstash/filters/anonymize.rb | 63 +++++++++++++ spec/filters/anonymize.rb | 151 ++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 lib/logstash/filters/anonymize.rb create mode 100644 spec/filters/anonymize.rb diff --git a/lib/logstash/filters/anonymize.rb b/lib/logstash/filters/anonymize.rb new file mode 100644 index 000000000..31fcde8d5 --- /dev/null +++ b/lib/logstash/filters/anonymize.rb @@ -0,0 +1,63 @@ +require "logstash/filters/base" +require "logstash/namespace" + +# Anonymize fields using by replacing values with a consistent hash. +class LogStash::Filters::Anonymize < LogStash::Filters::Base + config_name "anonymize" + plugin_status "experimental" + + # The fields to be anonymized + config :fields, :validate => :array, :required => true + + # Hashing key + config :key, :validate => :string, :required => true + + # digest type + config :algorithm, :validate => ['SHA', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'MD4', 'MD5'], :required => true, :default => 'SHA1' + + public + def register + # require any library + require 'openssl' + end # def register + + public + def filter(event) + return unless filter?(event) + @fields.each do |field| + event[field] = anonymize(event[field]) + end + end # def filter + + private + def anonymize(data) + digest = algorithm() + OpenSSL::HMAC.hexdigest(digest, @key, data) + end + + private + def algorithm + + case @algorithm + when 'SHA' + return OpenSSL::Digest::SHA.new + when 'SHA1' + return OpenSSL::Digest::SHA1.new + when 'SHA224' + return OpenSSL::Digest::SHA224.new + when 'SHA256' + return OpenSSL::Digest::SHA256.new + when 'SHA384' + return OpenSSL::Digest::SHA384.new + when 'SHA512' + return OpenSSL::Digest::SHA512.new + when 'MD4' + return OpenSSL::Digest::MD4.new + when 'MD5' + return OpenSSL::Digest::MD5.new + else + @logger.error("Unknown algorithm") + end + end + +end # class LogStash::Filters::Anonymize diff --git a/spec/filters/anonymize.rb b/spec/filters/anonymize.rb new file mode 100644 index 000000000..1adfd9235 --- /dev/null +++ b/spec/filters/anonymize.rb @@ -0,0 +1,151 @@ +require "test_utils" +require "logstash/filters/anonymize" + +describe LogStash::Filters::Anonymize do + extend LogStash::RSpec + + describe "anonymize string with SHA alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'SHA' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "0d01b2191194d261fa1a2e7c18a38d44953ab4e2" + end + end + + describe "anonymize string with SHA1 alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'SHA1' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "fdc60acc4773dc5ac569ffb78fcb93c9630797f4" + end + end + + describe "anonymize string with SHA224 alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'SHA224' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "5744bbcc4f64acb6a805b7fee3013a8958cc8782d3fb0fb318cec915" + end + end + + describe "anonymize string with SHA256 alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'SHA256' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "345bec3eff242d53b568916c2610b3e393d885d6b96d643f38494fd74bf4a9ca" + end + end + + describe "anonymize string with SHA384 alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'SHA384' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "22d4c0e8c4fbcdc4887d2038fca7650f0e2e0e2457ff41c06eb2a980dded6749561c814fe182aff93e2538d18593947a" + end + end + + describe "anonymize string with SHA512 alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'SHA512' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "11c19b326936c08d6c50a3c847d883e5a1362e6a64dd55201a25f2c1ac1b673f7d8bf15b8f112a4978276d573275e3b14166e17246f670c2a539401c5bfdace8" + end + end + + describe "anonymize string with MD4 alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'MD4' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "0845cb571ab3646e51a07bcabf05e33d" + end + end + + describe "anonymize string with MD5 alogrithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + key => "longencryptionkey" + algorithm => 'MD5' + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.123.123.123"} do + insist { subject["clientip"] } == "9336c879e305c9604a3843fc3e75948f" + end + end + +end From 4fe0a95abf3077d562ccde57fb87e66a4a335224 Mon Sep 17 00:00:00 2001 From: James Turnbull Date: Thu, 20 Dec 2012 01:28:13 -0500 Subject: [PATCH 068/105] Fixed formatting of EventLog docs --- lib/logstash/inputs/eventlog.rb | 13 +++++++------ lib/logstash/inputs/heroku.rb | 8 ++++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/lib/logstash/inputs/eventlog.rb b/lib/logstash/inputs/eventlog.rb index cc930f56f..65b924a9c 100644 --- a/lib/logstash/inputs/eventlog.rb +++ b/lib/logstash/inputs/eventlog.rb @@ -5,12 +5,13 @@ require "socket" # Pull events from a Windows Event Log # # To collect Events from the System Event Log, use a config like: -# input { -# eventlog { -# type => 'Win32-EventLog' -# name => 'System' -# } -# } +# +# input { +# eventlog { +# type => 'Win32-EventLog' +# name => 'System' +# } +# } class LogStash::Inputs::EventLog < LogStash::Inputs::Base config_name "eventlog" diff --git a/lib/logstash/inputs/heroku.rb b/lib/logstash/inputs/heroku.rb index 802a3c629..2da8571e9 100644 --- a/lib/logstash/inputs/heroku.rb +++ b/lib/logstash/inputs/heroku.rb @@ -8,10 +8,10 @@ require "logstash/namespace" # # Recommended filters: # -# filter { -# grok { -# pattern => "^%{TIMESTAMP_ISO8601:timestamp} %{WORD:component}\[%{WORD:process}(?:\.%{INT:instance:int})?\]: %{DATA:message}$" -# } +# filter { +# grok { +# pattern => "^%{TIMESTAMP_ISO8601:timestamp} %{WORD:component}\[%{WORD:process}(?:\.%{INT:instance:int})?\]: %{DATA:message}$" +# } # date { timestamp => ISO8601 } # } class LogStash::Inputs::Heroku < LogStash::Inputs::Base From 21859c5944b0a54075f25f6858fa46c282d0c978 Mon Sep 17 00:00:00 2001 From: theduke Date: Thu, 20 Dec 2012 07:40:40 +0100 Subject: [PATCH 069/105] Fixes LOGSTASH-782: Gelf output level option Level option now works correctly: picks the first valid entry in the array. --- lib/logstash/outputs/gelf.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/logstash/outputs/gelf.rb b/lib/logstash/outputs/gelf.rb index 300115a03..884a8c6fd 100644 --- a/lib/logstash/outputs/gelf.rb +++ b/lib/logstash/outputs/gelf.rb @@ -29,7 +29,7 @@ class LogStash::Outputs::Gelf < LogStash::Outputs::Base # useful if you want to parse the 'log level' from an event and use that # as the gelf level/severity. # - # Values here can be integers [0..7] inclusive or any of + # Values here can be integers [0..7] inclusive or any of # "debug", "info", "warn", "error", "fatal", "unknown" (case insensitive). # Single-character versions of these are also valid, "d", "i", "w", "e", "f", # "u" @@ -88,9 +88,9 @@ class LogStash::Outputs::Gelf < LogStash::Outputs::Base # If we leave that set, the gelf gem will extract the file and line number # of the source file that logged the message (i.e. logstash/gelf.rb:138). - # With that set to false, it can use the actual event's filename (i.e. + # With that set to false, it can use the actual event's filename (i.e. # /var/log/syslog), which is much more useful - @gelf.collect_file_and_line = false + @gelf.collect_file_and_line = false # these are syslog words and abbreviations mapped to RFC 5424 integers @level_map = { @@ -162,10 +162,10 @@ class LogStash::Outputs::Gelf < LogStash::Outputs::Base if @level.is_a?(Array) @level.each do |value| parsed_value = event.sprintf(value) - if parsed_value - level = parsed_value - break - end + next if value.count('%{') > 0 and parsed_value == value + + level = parsed_value + break end else level = event.sprintf(@level.to_s) From 71335f8fb3eb560481f285a0ac8925a96740691a Mon Sep 17 00:00:00 2001 From: theduke Date: Thu, 20 Dec 2012 10:24:04 +0100 Subject: [PATCH 070/105] Add password option for irc output. --- lib/logstash/outputs/irc.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/irc.rb b/lib/logstash/outputs/irc.rb index 1db3d821c..0449db37f 100644 --- a/lib/logstash/outputs/irc.rb +++ b/lib/logstash/outputs/irc.rb @@ -24,6 +24,9 @@ class LogStash::Outputs::Irc < LogStash::Outputs::Base # IRC Real name config :real, :validate => :string, :default => "logstash" + # IRC server password + config :password, :validate => :string + # Channels to broadcast to config :channels, :validate => :array, :required => true @@ -45,8 +48,6 @@ class LogStash::Outputs::Irc < LogStash::Outputs::Base c.user = @user c.realname = @real c.channels = @channels - c.channels = @channels - c.channels = @channels c.password = @password end Thread.new(@bot) do |bot| From 9ed81319bcc325f9658ec1e003bf5196948b338b Mon Sep 17 00:00:00 2001 From: theduke Date: Thu, 20 Dec 2012 18:09:35 +0100 Subject: [PATCH 071/105] Changed password validator, and setting default to nil --- lib/logstash/outputs/irc.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/outputs/irc.rb b/lib/logstash/outputs/irc.rb index 0449db37f..3ca195a18 100644 --- a/lib/logstash/outputs/irc.rb +++ b/lib/logstash/outputs/irc.rb @@ -25,7 +25,7 @@ class LogStash::Outputs::Irc < LogStash::Outputs::Base config :real, :validate => :string, :default => "logstash" # IRC server password - config :password, :validate => :string + config :password, :validate => :password, :default => nil # Channels to broadcast to config :channels, :validate => :array, :required => true From 6d6a328ca0b36753ef5bda8d64132ae6189fde20 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 20 Dec 2012 16:18:38 -0800 Subject: [PATCH 072/105] - log warnings if logger methods are called outside of the current log level. Enable it with PROFILE_BAD_LOG_CALLS=1 in env. Supporting research here: https://github.com/jordansissel/experiments/tree/master/ruby/logger-string-vs-block --- lib/logstash/runner.rb | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/lib/logstash/runner.rb b/lib/logstash/runner.rb index 7fa04f317..d8608f6f0 100644 --- a/lib/logstash/runner.rb +++ b/lib/logstash/runner.rb @@ -16,6 +16,41 @@ require "logstash/namespace" require "logstash/program" require "logstash/util" +if ENV["PROFILE_BAD_LOG_CALLS"] + # Set PROFILE_BAD_LOG_CALLS=1 in your environment if you want + # to track down logger calls that cause performance problems + # + # Related research here: + # https://github.com/jordansissel/experiments/tree/master/ruby/logger-string-vs-block + # + # Basically, the following is wastes tons of effort creating objects that are + # never used if the log level hides the log: + # + # logger.debug("something happend", :what => Happened) + # + # This is shown to be 4x faster: + # + # logger.debug(...) if logger.debug? + # + # I originally intended to use RubyParser and SexpProcessor to + # process all the logstash ruby code offline, but it was much + # faster to write this monkeypatch to warn as things are called. + require "cabin/mixins/logger" + module Cabin::Mixins::Logger + LEVELS.keys.each do |level| + m = "original_#{level}".to_sym + predicate = "#{level}?".to_sym + alias_method m, level + define_method(level) do |*args| + if !send(predicate) + warn("Unconditional log call", :location => caller[0]) + end + send(m, *args) + end + end + end +end + class LogStash::Runner include LogStash::Program From 49cc3307e1fe7ef6edeea6159dd2180923df66d7 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 20 Dec 2012 16:25:39 -0800 Subject: [PATCH 073/105] - github doesn't have downloads anymore --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0307396c9..75d105bb8 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ JRUBY=vendor/jar/jruby-complete-$(JRUBY_VERSION).jar JRUBY_URL=http://repository.codehaus.org/org/jruby/jruby-complete/$(JRUBY_VERSION) JRUBY_CMD=java -jar $(JRUBY) JRUBYC=$(WITH_JRUBY) jrubyc -ELASTICSEARCH_URL=http://github.com/downloads/elasticsearch/elasticsearch +ELASTICSEARCH_URL=http://download.elasticsearch.org/elasticsearch/elasticsearch ELASTICSEARCH=vendor/jar/elasticsearch-$(ELASTICSEARCH_VERSION) GEOIP=vendor/geoip/GeoLiteCity.dat GEOIP_URL=http://logstash.objects.dreamhost.com/maxmind/GeoLiteCity-2012-11-09.dat.gz From 879827da577e479539ff328d5925b39ba4635324 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 20 Dec 2012 17:13:42 -0800 Subject: [PATCH 074/105] - rebuild grammar --- lib/logstash/config/grammar.rb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) mode change 100755 => 100644 lib/logstash/config/grammar.rb diff --git a/lib/logstash/config/grammar.rb b/lib/logstash/config/grammar.rb old mode 100755 new mode 100644 index 333af8248..3e7181d5a --- a/lib/logstash/config/grammar.rb +++ b/lib/logstash/config/grammar.rb @@ -3,7 +3,7 @@ require "logstash/namespace" -# line 147 "grammar.rl" +# line 150 "grammar.rl" class LogStash::Config::Grammar @@ -248,7 +248,7 @@ end self.logstash_config_en_main = 55; -# line 156 "grammar.rl" +# line 159 "grammar.rl" # END RAGEL DATA @tokenstack = Array.new @@ -275,7 +275,7 @@ begin cs = logstash_config_start end -# line 175 "grammar.rl" +# line 178 "grammar.rl" # END RAGEL INIT begin @@ -469,7 +469,7 @@ when 10 then #puts "Config component: #{name}" end when 12 then -# line 142 "grammar.rl" +# line 145 "grammar.rl" begin # Compute line and column of the cursor (p) @@ -521,11 +521,11 @@ when 10 then #puts "Config component: #{name}" end when 11 then -# line 141 "grammar.rl" +# line 144 "grammar.rl" begin puts "END" end when 12 then -# line 142 "grammar.rl" +# line 145 "grammar.rl" begin # Compute line and column of the cursor (p) @@ -546,7 +546,7 @@ end end end -# line 180 "grammar.rl" +# line 183 "grammar.rl" # END RAGEL EXEC rescue => e # Compute line and column of the cursor (p) From b6c39a615da68ca1361a42d2e0f7a5077abf5edd Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 20 Dec 2012 17:14:31 -0800 Subject: [PATCH 075/105] include mean --- lib/logstash/filters/metrics.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/logstash/filters/metrics.rb b/lib/logstash/filters/metrics.rb index d77557397..7d924d307 100644 --- a/lib/logstash/filters/metrics.rb +++ b/lib/logstash/filters/metrics.rb @@ -50,6 +50,7 @@ class LogStash::Filters::Metrics < LogStash::Filters::Base event["#{name}.min"] = metric.min event["#{name}.max"] = metric.max event["#{name}.stddev"] = metric.stddev + event["#{name}.mean"] = metric.mean end filter_matched(event) From 0efce45ed417285aee1dc9e69c508c08ea09c75c Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 20 Dec 2012 17:14:49 -0800 Subject: [PATCH 076/105] - Fix a bunch of logger calls to be 'logger.info? && logger.info(...)' as identified with PROFILE_BAD_LOG_CALLS=1 This nets 15% throughput improvement with this config: http://semicomplete.com/images/logger-conditional-performance.png input { generator { type => foo } } filter { grok { pattern => "foo" } metrics { meter => "events" add_tag => "metric" } } output { stdout { tags => metric debug => true } http { tags => metric url => "..." http_method => "post" mapping => [ "entry.0.single", "%{events.rate_1m}", "submit", "Submit" ] format => form } }' --- lib/logstash/agent.rb | 31 ++++++++++++++++--------------- lib/logstash/filters/base.rb | 11 +++++++---- lib/logstash/filters/grok.rb | 35 +++++++++++++++++------------------ lib/logstash/filterworker.rb | 10 +++++----- lib/logstash/inputs/base.rb | 2 +- lib/logstash/outputs/base.rb | 8 ++++---- 6 files changed, 50 insertions(+), 47 deletions(-) diff --git a/lib/logstash/agent.rb b/lib/logstash/agent.rb index 6083a68db..ecec293d2 100644 --- a/lib/logstash/agent.rb +++ b/lib/logstash/agent.rb @@ -140,7 +140,7 @@ class LogStash::Agent # These are 'unknown' flags that begin ---flag # Put any plugin paths into the ruby library path for requiring later. @plugin_paths.each do |p| - @logger.debug("Adding to ruby load path", :path => p) + @logger.debug? and @logger.debug("Adding to ruby load path", :path => p) $:.unshift p end @@ -163,8 +163,8 @@ class LogStash::Agent %w{inputs outputs filters}.each do |component| @plugin_paths.each do |path| plugin = File.join(path, component, name) + ".rb" - @logger.debug("Plugin flag found; trying to load it", - :flag => arg, :plugin => plugin) + @logger.debug? and @logger.debug("Plugin flag found; trying to load it", + :flag => arg, :plugin => plugin) if File.file?(plugin) @logger.info("Loading plugin", :plugin => plugin) require plugin @@ -173,7 +173,7 @@ class LogStash::Agent # and add any options to our option parser. klass_name = name.capitalize if c.const_defined?(klass_name) - @logger.debug("Found plugin class", :class => "#{c}::#{klass_name})") + @logger.debug? and @logger.debug("Found plugin class", :class => "#{c}::#{klass_name})") klass = c.const_get(klass_name) # See LogStash::Config::Mixin::DSL#options klass.options(@opts) @@ -241,8 +241,8 @@ class LogStash::Agent # Support directory of config files. # https://logstash.jira.com/browse/LOGSTASH-106 if File.directory?(@config_path) - @logger.debug("Config path is a directory, scanning files", - :path => @config_path) + @logger.debug? and @logger.debug("Config path is a directory, scanning files", + :path => @config_path) paths = Dir.glob(File.join(@config_path, "*")).sort else # Get a list of files matching a glob. If the user specified a single @@ -332,23 +332,23 @@ class LogStash::Agent private def start_input(input) - @logger.debug("Starting input", :plugin => input) + @logger.debug? and @logger.debug("Starting input", :plugin => input) t = 0 # inputs should write directly to output queue if there are no filters. input_target = @filters.length > 0 ? @filter_queue : @output_queue # check to see if input supports multiple threads if input.threadable - @logger.debug("Threadable input", :plugin => input) + @logger.debug? and @logger.debug("Threadable input", :plugin => input) # start up extra threads if need be (input.threads-1).times do input_thread = input.clone - @logger.debug("Starting thread", :plugin => input, :thread => (t+=1)) + @logger.debug? and @logger.debug("Starting thread", :plugin => input, :thread => (t+=1)) @plugins[input_thread] = Thread.new(input_thread, input_target) do |*args| run_input(*args) end end end - @logger.debug("Starting thread", :plugin => input, :thread => (t+=1)) + @logger.debug? and @logger.debug("Starting thread", :plugin => input, :thread => (t+=1)) @plugins[input] = Thread.new(input, input_target) do |*args| run_input(*args) end @@ -356,7 +356,7 @@ class LogStash::Agent private def start_output(output) - @logger.debug("Starting output", :plugin => output) + @logger.debug? and @logger.debug("Starting output", :plugin => output) queue = LogStash::SizedQueue.new(10 * @filterworker_count) queue.logger = @logger @output_queue.add_queue(queue) @@ -474,7 +474,7 @@ class LogStash::Agent shutdown break end - @logger.debug("heartbeat") + @logger.debug? and @logger.debug("heartbeat") end end # def run_with_config @@ -740,7 +740,7 @@ class LogStash::Agent begin while event = queue.pop do - @logger.debug("Sending event", :target => output) + @logger.debug? and @logger.debug("Sending event", :target => output) output.handle(event) break if output.finished? end @@ -768,8 +768,9 @@ class LogStash::Agent remaining = @plugins.count do |plugin, thread| plugin.is_a?(pluginclass) and plugin.running? and thread.alive? end - @logger.debug("Plugins still running", :type => pluginclass, - :remaining => remaining) + @logger.debug? and @logger.debug("Plugins still running", + :type => pluginclass, + :remaining => remaining) if remaining == 0 @logger.warn("All #{pluginclass} finished. Shutting down.") diff --git a/lib/logstash/filters/base.rb b/lib/logstash/filters/base.rb index 524830b7f..f9ff123bf 100644 --- a/lib/logstash/filters/base.rb +++ b/lib/logstash/filters/base.rb @@ -105,12 +105,14 @@ class LogStash::Filters::Base < LogStash::Plugin event[field] = [event[field]] if !event[field].is_a?(Array) event[field] << event.sprintf(value) end - @logger.debug("filters/#{self.class.name}: adding value to field", - :field => field, :value => value) + @logger.debug? and @logger.debug("filters/#{self.class.name}: adding " \ + "value to field", :field => field, + :value => value) end (@add_tag or []).each do |tag| - @logger.debug("filters/#{self.class.name}: adding tag", :tag => tag) + @logger.debug? and @logger.debug("filters/#{self.class.name}: adding tag", + :tag => tag) event.tags << event.sprintf(tag) #event.tags |= [ event.sprintf(tag) ] end @@ -119,7 +121,8 @@ class LogStash::Filters::Base < LogStash::Plugin remove_tags = @remove_tag.map do |tag| event.sprintf(tag) end - @logger.debug("filters/#{self.class.name}: removing tags", :tags => (event.tags & remove_tags)) + @logger.debug? and @logger.debug("filters/#{self.class.name}: removing tags", + :tags => (event.tags & remove_tags)) event.tags -= remove_tags end end # def filter_matched diff --git a/lib/logstash/filters/grok.rb b/lib/logstash/filters/grok.rb index 04995b74a..2c358c8fb 100644 --- a/lib/logstash/filters/grok.rb +++ b/lib/logstash/filters/grok.rb @@ -104,13 +104,13 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # Have @@patterns_path show first. Last-in pattern definitions win; this # will let folks redefine built-in patterns at runtime. @patterns_dir = @@patterns_path.to_a + @patterns_dir - @logger.info("Grok patterns path", :patterns_dir => @patterns_dir) + @logger.info? and @logger.info("Grok patterns path", :patterns_dir => @patterns_dir) @patterns_dir.each do |path| # Can't read relative paths from jars, try to normalize away '../' while path =~ /file:\/.*\.jar!.*\/\.\.\// # replace /foo/bar/../baz => /foo/baz path = path.gsub(/[^\/]+\/\.\.\//, "") - @logger.debug("In-jar path to read", :path => path) + @logger.debug? and @logger.debug("In-jar path to read", :path => path) end if File.directory?(path) @@ -118,14 +118,14 @@ class LogStash::Filters::Grok < LogStash::Filters::Base end Dir.glob(path).each do |file| - @logger.info("Grok loading patterns from file", :path => file) + @logger.info? and @logger.info("Grok loading patterns from file", :path => file) @patternfiles << file end end @patterns = Hash.new { |h,k| h[k] = [] } - @logger.info("Match data", :match => @match) + @logger.info? and @logger.info("Match data", :match => @match) # TODO(sissel): Hash.merge actually overrides, not merges arrays. # Work around it by implementing our own? @@ -143,9 +143,9 @@ class LogStash::Filters::Grok < LogStash::Filters::Base add_patterns_from_files(@patternfiles, @patterns[field]) end - @logger.info("Grok compile", :field => field, :patterns => patterns) + @logger.info? and @logger.info("Grok compile", :field => field, :patterns => patterns) patterns.each do |pattern| - @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern) + @logger.debug? and @logger.debug("regexp: #{@type}/#{field}", :pattern => pattern) @patterns[field].compile(pattern) end end # @config.each @@ -158,17 +158,17 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # parse it with grok matched = false - @logger.debug("Running grok filter", :event => event); + @logger.debug? and @logger.debug("Running grok filter", :event => event); done = false @patterns.each do |field, pile| break if done if !event[field] - @logger.debug("Skipping match object, field not present", - :field => field, :event => event) + @logger.debug? and @logger.debug("Skipping match object, field not present", + :field => field, :event => event) next end - @logger.debug("Trying pattern", :pile => pile, :field => field) + @logger.debug? and @logger.debug("Trying pattern", :pile => pile, :field => field) (event[field].is_a?(Array) ? event[field] : [event[field]]).each do |fieldvalue| begin # Coerce all field values to string. This turns arrays, hashes, numbers, etc @@ -197,8 +197,8 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # Permit typing of captures by giving an additional colon and a type, # like: %{FOO:name:int} for int coercion. if type_coerce - @logger.info("Match type coerce: #{type_coerce}") - @logger.info("Patt: #{grok.pattern}") + @logger.info? and @logger.info("Match type coerce: #{type_coerce}") + @logger.info? and @logger.info("Patt: #{grok.pattern}") end case type_coerce @@ -211,13 +211,12 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # Special casing to skip captures that represent the entire log message. if fieldvalue == value and field == "@message" # Skip patterns that match the entire message - @logger.debug("Skipping capture since it matches the whole line.", :field => key) + @logger.debug? and @logger.debug("Skipping capture since it matches the whole line.", :field => key) next end if @named_captures_only && !is_named - @logger.debug("Skipping capture since it is not a named " \ - "capture and named_captures_only is true.", :field => key) + @logger.debug? and @logger.debug("Skipping capture since it is not a named " "capture and named_captures_only is true.", :field => key) next end @@ -253,7 +252,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base event.tags << "_grokparsefailure" end - @logger.debug("Event now: ", :event => event) + @logger.debug? and @logger.debug("Event now: ", :event => event) end # def filter private @@ -272,8 +271,8 @@ class LogStash::Filters::Grok < LogStash::Filters::Base # the end. I don't know if this is a bug or intentional, but we need # to chomp it. name, pattern = line.chomp.split(/\s+/, 2) - @logger.debug("Adding pattern from file", :name => name, - :pattern => pattern, :path => path) + @logger.debug? and @logger.debug("Adding pattern from file", :name => name, + :pattern => pattern, :path => path) pile.add_pattern(name, pattern) end else diff --git a/lib/logstash/filterworker.rb b/lib/logstash/filterworker.rb index ed9b00ca7..b1f47e9a9 100644 --- a/lib/logstash/filterworker.rb +++ b/lib/logstash/filterworker.rb @@ -58,7 +58,7 @@ class LogStash::FilterWorker < LogStash::Plugin end events.each do |event| - @logger.debug("Pushing flushed events", :event => event) + @logger.debug? and @logger.debug("Pushing flushed events", :event => event) @output_queue.push(event) unless event.cancelled? end end # def flusher @@ -95,14 +95,14 @@ class LogStash::FilterWorker < LogStash::Plugin clear_watchdog end if event.cancelled? - @logger.debug("Event cancelled", :event => event, - :filter => filter.class) + @logger.debug? and @logger.debug("Event cancelled", :event => event, + :filter => filter.class) break end end # @filters.each - @logger.debug("Event finished filtering", :event => event, - :thread => Thread.current[:name]) + @logger.debug? and @logger.debug("Event finished filtering", :event => event, + :thread => Thread.current[:name]) @output_queue.push(event) unless event.cancelled? end # events.each end # def filter diff --git a/lib/logstash/inputs/base.rb b/lib/logstash/inputs/base.rb index 79b7dbed8..37e306822 100644 --- a/lib/logstash/inputs/base.rb +++ b/lib/logstash/inputs/base.rb @@ -143,7 +143,7 @@ class LogStash::Inputs::Base < LogStash::Plugin event[field] << event.sprintf(value) end - logger.debug(["Received new event", {:source => source, :event => event}]) + @logger.debug? and @logger.debug("Received new event", :source => source, :event => event) return event end # def to_event end # class LogStash::Inputs::Base diff --git a/lib/logstash/outputs/base.rb b/lib/logstash/outputs/base.rb index 37bab342b..f993036c4 100644 --- a/lib/logstash/outputs/base.rb +++ b/lib/logstash/outputs/base.rb @@ -59,28 +59,28 @@ class LogStash::Outputs::Base < LogStash::Plugin def output?(event) if !@type.empty? if event.type != @type - @logger.debug(["Dropping event because type doesn't match #{@type}", event]) + @logger.debug? and @logger.debug(["Dropping event because type doesn't match #{@type}", event]) return false end end if !@tags.empty? if (event.tags & @tags).size != @tags.size - @logger.debug(["Dropping event because tags don't match #{@tags.inspect}", event]) + @logger.debug? and @logger.debug(["Dropping event because tags don't match #{@tags.inspect}", event]) return false end end if !@exclude_tags.empty? if (diff_tags = (event.tags & @exclude_tags)).size != 0 - @logger.debug(["Dropping event because tags contains excluded tags: #{diff_tags.inspect}", event]) + @logger.debug? and @logger.debug(["Dropping event because tags contains excluded tags: #{diff_tags.inspect}", event]) return false end end if !@fields.empty? if (event.fields.keys & @fields).size != @fields.size - @logger.debug(["Dropping event because type doesn't match #{@fields.inspect}", event]) + @logger.debug? and @logger.debug(["Dropping event because type doesn't match #{@fields.inspect}", event]) return false end end From e7e9a708400382c5aea88120bd72b07afe4d5e57 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 00:06:07 -0800 Subject: [PATCH 077/105] - clarify versions --- lib/logstash/outputs/elasticsearch.rb | 5 +++-- lib/logstash/outputs/elasticsearch_http.rb | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index b1dd3c0a4..2942e52b8 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -5,11 +5,12 @@ require "logstash/outputs/base" # output for logstash. If you plan on using the logstash web interface, you'll # need to use this output. # -# *NOTE*: The elasticsearch client is version %ELASTICSEARCH_VERSION%. Your elasticsearch -# cluster must be running 0.19.x for API compatibility. +# *NOTE*: Your elasticsearch cluster must be running elasticsearch +# %ELASTICSEARCH_VERSION%. Any other version may not work. # # If you want to set other elasticsearch options that are not exposed directly # as config options, there are two options: +# # * create an elasticsearch.yml file in the $PWD of the logstash process # * pass in es.* java properties (java -Des.node.foo= or ruby -J-Des.node.foo=) # diff --git a/lib/logstash/outputs/elasticsearch_http.rb b/lib/logstash/outputs/elasticsearch_http.rb index 03f54fc15..8a5e9ce8d 100644 --- a/lib/logstash/outputs/elasticsearch_http.rb +++ b/lib/logstash/outputs/elasticsearch_http.rb @@ -3,8 +3,9 @@ require "logstash/outputs/base" # This output lets you store logs in elasticsearch. # -# This output differs from the 'elasticsearch' output by using the HTTP -# interface for indexing data with elasticsearch. +# This plugin uses the HTTP/REST interface to ElasticSearch, which usually +# lets you use any version of elasticsearch server. It is known to work +# with elasticsearch %ELASTICSEARCH_VERSION% # # You can learn more about elasticsearch at class LogStash::Outputs::ElasticSearchHTTP < LogStash::Outputs::Base From 124a14461f025dd8e3d52ad2eba9657095f42f9e Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 01:34:09 -0800 Subject: [PATCH 078/105] Add '.' as a valid date separator for EU dates (requested by rarruda in irc) --- patterns/grok-patterns | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/patterns/grok-patterns b/patterns/grok-patterns index 4f9d4229e..2ed0045d1 100755 --- a/patterns/grok-patterns +++ b/patterns/grok-patterns @@ -68,7 +68,7 @@ SECOND (?:(?:[0-5][0-9]|60)(?:[.,][0-9]+)?) TIME (?!<[0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9]) # datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it) DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR} -DATE_EU %{YEAR}[/-]%{MONTHNUM}[/-]%{MONTHDAY} +DATE_EU %{YEAR}[./-]%{MONTHNUM}[./-]%{MONTHDAY} ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE})) ISO8601_SECOND (?:%{SECOND}|60) TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}? From e0da16cbf7dd0c660b21c97548b024b8a69258de Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Fri, 21 Dec 2012 11:18:26 +0100 Subject: [PATCH 079/105] Fix for LOGSTASH-733: allow whitespace in value when using field pipe-delimiter --- lib/logstash/filters/kv.rb | 7 ++++--- spec/filters/kv.rb | 14 ++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/logstash/filters/kv.rb b/lib/logstash/filters/kv.rb index 972d36501..3af66034c 100644 --- a/lib/logstash/filters/kv.rb +++ b/lib/logstash/filters/kv.rb @@ -33,9 +33,10 @@ class LogStash::Filters::KV < LogStash::Filters::Base # # Example, to split out the args from a string such as # '?pin=12345~0&d=123&e=foo@bar.com&oq=bobo&ss=12345': - # + # + # Default to space character for backward compatibility # filter { kv { field_split => "&?" } } - config :field_split, :validate => :string, :default => '' + config :field_split, :validate => :string, :default => ' ' # A string of characters to use as delimiters for identifying key-value relations. @@ -95,7 +96,7 @@ class LogStash::Filters::KV < LogStash::Filters::Base if !event =~ /[@field_split]/ return kv_keys end - scan_re = Regexp.new("([^ "+@field_split+@value_split+"]+)["+@value_split+"](?:\"([^\""+@field_split+"]+)\"|'([^'"+@field_split+"]+)'|([^ "+@field_split+"]+))") + scan_re = Regexp.new("([^"+@field_split+@value_split+"]+)["+@value_split+"](?:\"([^\"]+)\"|'([^']+)'|([^"+@field_split+"]+))") text.scan(scan_re) do |key, v1, v2, v3| value = v1 || v2 || v3 if !@trim.nil? diff --git a/spec/filters/kv.rb b/spec/filters/kv.rb index 23214c8d6..c3f83206e 100644 --- a/spec/filters/kv.rb +++ b/spec/filters/kv.rb @@ -61,6 +61,20 @@ describe LogStash::Filters::KV do end + describe "delimited fields should override space default (reported by LOGSTASH-733)" do + config <<-CONFIG + filter { + kv { field_split => "|" } + } + CONFIG + + sample "field1=test|field2=another test|field3=test3" do + insist { subject["field1"] } == "test" + insist { subject["field2"] } == "another test" + insist { subject["field3"] } == "test3" + end + end + describe "test prefix" do config <<-CONFIG filter { From e0b09f075e97e144c50dd50a9fa44f9eae1413c9 Mon Sep 17 00:00:00 2001 From: Wiibaa Date: Fri, 21 Dec 2012 13:15:54 +0100 Subject: [PATCH 080/105] Fix for LOGSTASH-624: allow escaped spaces in key or value --- lib/logstash/filters/kv.rb | 2 +- spec/filters/kv.rb | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/lib/logstash/filters/kv.rb b/lib/logstash/filters/kv.rb index 3af66034c..bb9daa11a 100644 --- a/lib/logstash/filters/kv.rb +++ b/lib/logstash/filters/kv.rb @@ -96,7 +96,7 @@ class LogStash::Filters::KV < LogStash::Filters::Base if !event =~ /[@field_split]/ return kv_keys end - scan_re = Regexp.new("([^"+@field_split+@value_split+"]+)["+@value_split+"](?:\"([^\"]+)\"|'([^']+)'|([^"+@field_split+"]+))") + scan_re = Regexp.new("((?:\\\\ |[^"+@field_split+@value_split+"])+)["+@value_split+"](?:\"([^\"]+)\"|'([^']+)'|((?:\\\\ |[^"+@field_split+"])+))") text.scan(scan_re) do |key, v1, v2, v3| value = v1 || v2 || v3 if !@trim.nil? diff --git a/spec/filters/kv.rb b/spec/filters/kv.rb index c3f83206e..7a3b27bbc 100644 --- a/spec/filters/kv.rb +++ b/spec/filters/kv.rb @@ -24,6 +24,19 @@ describe LogStash::Filters::KV do end + describe "LOGSTASH-624: allow escaped space in key or value " do + config <<-CONFIG + filter { + kv { value_split => ':' } + } + CONFIG + + sample 'IKE:=Quick\ Mode\ completion IKE\ IDs:=subnet:\ x.x.x.x\ (mask=\ 255.255.255.254)\ and\ host:\ y.y.y.y' do + insist { subject["IKE"] } == '=Quick\ Mode\ completion' + insist { subject['IKE\ IDs'] } == '=subnet:\ x.x.x.x\ (mask=\ 255.255.255.254)\ and\ host:\ y.y.y.y' + end + end + describe "test value_split" do config <<-CONFIG filter { From d20d8a67376808c2a8d5e8aa5a569de46e1effee Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 09:47:47 -0800 Subject: [PATCH 081/105] - add missing redis flag for extra logging (Pointed out by Georgi Pachov on the mailing list) --- docs/tutorials/getting-started-centralized.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/tutorials/getting-started-centralized.md b/docs/tutorials/getting-started-centralized.md index 530653819..c54371ed4 100644 --- a/docs/tutorials/getting-started-centralized.md +++ b/docs/tutorials/getting-started-centralized.md @@ -63,7 +63,7 @@ Building and installing Redis is fairly straightforward. While normally this wou - Download Redis from http://redis.io/download (The latest stable release is like what you want) - Extract the source, change to the directory and run `make` -- Run Redis with `src/redis-server` +- Run Redis with `src/redis-server --loglevel verbose` That's it. @@ -104,6 +104,7 @@ Put this in a file and call it 'shipper.conf' (or anything, really), and run: This will take anything you type into this console and display it on the console. Additionally it will save events to Redis in a `list` named after the `key` value you provided. ### Testing the Redis output + To verify that the message made it into Redis, check your Redis window. You should see something like the following: [83019] 02 Jul 12:51:02 - Accepted 127.0.0.1:58312 From 675f419cd0d48bd46d98d2e996b61f48ab87f7d6 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 14:52:12 -0800 Subject: [PATCH 082/105] - style fixes --- lib/logstash/inputs/irc.rb | 2 +- lib/logstash/outputs/irc.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/logstash/inputs/irc.rb b/lib/logstash/inputs/irc.rb index 431d1da1e..9cdaeac75 100644 --- a/lib/logstash/inputs/irc.rb +++ b/lib/logstash/inputs/irc.rb @@ -25,7 +25,7 @@ class LogStash::Inputs::Irc < LogStash::Inputs::Base config :real, :validate => :string, :default => "logstash" # IRC Server password - config :password, :validate => :password, :default => nil + config :password, :validate => :password # Channels to listen to config :channels, :validate => :array, :required => true diff --git a/lib/logstash/outputs/irc.rb b/lib/logstash/outputs/irc.rb index 3ca195a18..00cdfaddf 100644 --- a/lib/logstash/outputs/irc.rb +++ b/lib/logstash/outputs/irc.rb @@ -25,7 +25,7 @@ class LogStash::Outputs::Irc < LogStash::Outputs::Base config :real, :validate => :string, :default => "logstash" # IRC server password - config :password, :validate => :password, :default => nil + config :password, :validate => :password # Channels to broadcast to config :channels, :validate => :array, :required => true From 8ed837e0eface844db3233fe021d89fa0f53e2b8 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 17:18:17 -0800 Subject: [PATCH 083/105] - make TAI64N date pattern support '@'-prefixed times. Now both are supported: @4000000050d506482dbdf024 4000000050d506482dbdf024 - Add TAI64N date specs which pass. --- lib/logstash/filters/date.rb | 7 ++++++- spec/filters/date.rb | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index 0f59667c7..af04224b0 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -119,7 +119,12 @@ class LogStash::Filters::Date < LogStash::Filters::Base when "UNIX_MS" # unix epoch in ms parser = lambda { |date| org.joda.time.Instant.new(date.to_i).toDateTime } when "TAI64N" # TAI64 with nanoseconds, -10000 accounts for leap seconds - parser = lambda { |date| org.joda.time.Instant.new((date[1..15].hex * 1000 - 10000)+(date[16..23].hex/1000000)).toDateTime } + parser = lambda do |date| + # Skip leading "@" if it is present (common in tai64n times) + date = date[1..-1] if date[0, 1] == "@" + + org.joda.time.Instant.new((date[1..15].hex * 1000 - 10000)+(date[16..23].hex/1000000)).toDateTime + end else joda_parser = org.joda.time.format.DateTimeFormat.forPattern(format).withOffsetParsed if(locale != nil) diff --git a/spec/filters/date.rb b/spec/filters/date.rb index 76ce8d391..1a9d691ff 100644 --- a/spec/filters/date.rb +++ b/spec/filters/date.rb @@ -160,4 +160,25 @@ describe LogStash::Filters::Date do # nothing to do, if this crashes it's an error.. end end + + describe "TAI64N support" do + config <<-'CONFIG' + filter { + date { + t => TAI64N + } + } + CONFIG + + # Try without leading "@" + sample({ "@fields" => { "t" => "4000000050d506482dbdf024" } }) do + insist { subject.timestamp } == "2012-12-22T01:00:46.767Z" + end + + # Should still parse successfully if it's a full tai64n time (with leading + # '@') + sample({ "@fields" => { "t" => "@4000000050d506482dbdf024" } }) do + insist { subject.timestamp } == "2012-12-22T01:00:46.767Z" + end + end end From a2269a77fcb19e4834bee8979647460d24a7f02b Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 17:40:50 -0800 Subject: [PATCH 084/105] - fix formatting/style on examples --- lib/logstash/filters/mutate.rb | 71 ++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/lib/logstash/filters/mutate.rb b/lib/logstash/filters/mutate.rb index b57c168b7..3c53551a7 100644 --- a/lib/logstash/filters/mutate.rb +++ b/lib/logstash/filters/mutate.rb @@ -63,20 +63,25 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # Convert a string field by applying a regular expression and a replacement # if the field is not a string, no action will be taken # - # this configuration takes an array consisting of 3 elements per field/substitution + # This configuration takes an array consisting of 3 elements per + # field/substitution. # # be aware of escaping any backslash in the config file # # for example: # - # mutate { - # gsub => [ - # # replace all forward slashes with underscore - # "fieldname", "\\/", "_", - # # replace backslashes, question marks, hashes and minuses with underscore - # "fieldname", "[\\?#-]", "_" - # ] - # } + # filter { + # mutate { + # gsub => [ + # # replace all forward slashes with underscore + # "fieldname", "\\/", "_", + # + # # replace backslashes, question marks, hashes and minuses with + # # underscore + # "fieldname", "[\\?#-]", "_" + # ] + # } + # } # config :gsub, :validate => :array @@ -84,50 +89,56 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base # # Example: # - # mutate { - # uppercase => [ "fieldname" ] - # } - # + # filter { + # mutate { + # uppercase => [ "fieldname" ] + # } + # } config :uppercase, :validate => :array # Convert a string to its lowercase equivalent # # Example: # - # mutate { - # lowercase => [ "fieldname" ] - # } - # + # filter { + # mutate { + # lowercase => [ "fieldname" ] + # } + # } config :lowercase, :validate => :array - # Split a field to an array using a separator character. Only works on string fields + # Split a field to an array using a separator character. Only works on string + # fields. # # Example: # - # mutate { - # split => ["fieldname", ","] - # } - # + # filter { + # mutate { + # split => ["fieldname", ","] + # } + # } config :split, :validate => :hash # Join an array with a separator character, does nothing on non-array fields # # Example: # - # mutate { - # join => ["fieldname", ","] - # } - # + # filter { + # mutate { + # join => ["fieldname", ","] + # } + # } config :join, :validate => :hash # Strip whitespaces # # Example: # - # mutate { - # strip => ["field1", "field2"] - # } - # + # filter { + # mutate { + # strip => ["field1", "field2"] + # } + # } config :strip, :validate => :array public From 74d960d996c940c4ea7c1228912938b2cf6da9dd Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 22:38:23 -0800 Subject: [PATCH 085/105] - Don't make 'logfile' required until 'name' is finally removed. --- lib/logstash/inputs/eventlog.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/logstash/inputs/eventlog.rb b/lib/logstash/inputs/eventlog.rb index 9a87172bb..9d0c8a426 100644 --- a/lib/logstash/inputs/eventlog.rb +++ b/lib/logstash/inputs/eventlog.rb @@ -22,7 +22,9 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base config :name, :validate => :string, :deprecated => true # Event Log Name - config :logfile, :validate => :string, :required => true, :default => "System" + config :logfile, :validate => :string, :default => "System" + + # TODO(sissel): Make 'logfile' required after :name is gone. public def initialize(params) @@ -40,6 +42,10 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base @logfile = @name end + if @logfile.nil? + raise ArgumentError, "Missing required parameter 'logfile' for input/eventlog" + end + @hostname = Socket.gethostname @logger.info("Registering input eventlog://#{@hostname}/#{@logfile}") require "win32ole" # rubygem 'win32ole' ('jruby-win32ole' on JRuby) From e1a9d5eb0cca81eaceb1e488650040f5ee4bc2ed Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 22:42:21 -0800 Subject: [PATCH 086/105] - don't set a default either --- lib/logstash/inputs/eventlog.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/logstash/inputs/eventlog.rb b/lib/logstash/inputs/eventlog.rb index 9d0c8a426..f231a6345 100644 --- a/lib/logstash/inputs/eventlog.rb +++ b/lib/logstash/inputs/eventlog.rb @@ -22,7 +22,8 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base config :name, :validate => :string, :deprecated => true # Event Log Name - config :logfile, :validate => :string, :default => "System" + config :logfile, :validate => :string + #:required => true, :default => "System" # TODO(sissel): Make 'logfile' required after :name is gone. @@ -36,6 +37,7 @@ class LogStash::Inputs::EventLog < LogStash::Inputs::Base def register if @name + @logger.warn("Please use 'logfile' instead of the 'name' setting") if @logfile @logger.error("'name' and 'logfile' are the same setting, but 'name' is deprecated. Please use only 'logfile'") end From b116d2b7669edc0a8357a4928faf6f58e06cce94 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Fri, 21 Dec 2012 22:48:44 -0800 Subject: [PATCH 087/105] - don't make 'exchange' required until 'name' is fully removed --- lib/logstash/outputs/amqp.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/logstash/outputs/amqp.rb b/lib/logstash/outputs/amqp.rb index d8fdf2b53..676d62ea8 100644 --- a/lib/logstash/outputs/amqp.rb +++ b/lib/logstash/outputs/amqp.rb @@ -33,7 +33,7 @@ class LogStash::Outputs::Amqp < LogStash::Outputs::Base config :name, :validate => :string, :deprecated => true # The name of the exchange - config :exchange, :validate => :string, :required => true + config :exchange, :validate => :string # TODO(sissel): Make it required when 'name' is gone # Key to route to by default. Defaults to 'logstash' # From c9e670d703b92106a031da8ac81b2a92caecdc98 Mon Sep 17 00:00:00 2001 From: Avishai Ish-Shalom Date: Sat, 8 Dec 2012 12:14:39 +0200 Subject: [PATCH 088/105] Added ipv4_network and murmur3 algorithms to the anonymize filter --- lib/logstash/filters/anonymize.rb | 39 ++++++++++++++++++++++++++----- logstash.gemspec | 2 ++ spec/filters/anonymize.rb | 34 +++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 6 deletions(-) diff --git a/lib/logstash/filters/anonymize.rb b/lib/logstash/filters/anonymize.rb index 31fcde8d5..55b6e655d 100644 --- a/lib/logstash/filters/anonymize.rb +++ b/lib/logstash/filters/anonymize.rb @@ -13,12 +13,22 @@ class LogStash::Filters::Anonymize < LogStash::Filters::Base config :key, :validate => :string, :required => true # digest type - config :algorithm, :validate => ['SHA', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'MD4', 'MD5'], :required => true, :default => 'SHA1' + config :algorithm, :validate => ['SHA', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'MD4', 'MD5', "MURMUR3", "IPV4_NETWORK"], :required => true, :default => 'SHA1' public def register - # require any library - require 'openssl' + # require any library and set the anonymize function + case @algorithm + when "IPV4_NETWORK" + require "ipaddress" + class << self; alias_method :anonymize, :anonymize_ipv4_network; end + when "MURMUR3" + require "murmurhash3" + class << self; alias_method :anonymize, :anonymize_murmur3; end + else + require 'openssl' + class << self; alias_method :anonymize, :anonymize_openssl; end + end end # def register public @@ -30,12 +40,29 @@ class LogStash::Filters::Anonymize < LogStash::Filters::Base end # def filter private - def anonymize(data) + def anonymize_ipv4_network(ip_string) + warn "ipv4" + ip = IPAddress::IPv4.new(ip_string) + ip.prefix = @key + ip.network.to_s + end + + def anonymize_openssl(data) + warn "openssl" digest = algorithm() OpenSSL::HMAC.hexdigest(digest, @key, data) end - private + def anonymize_murmur3(value) + warn "murmur3" + case value + when Fixnum + MurmurHash3::V32.int_hash(value) + when String + MurmurHash3::V32.str_hash(value) + end + end + def algorithm case @algorithm @@ -59,5 +86,5 @@ class LogStash::Filters::Anonymize < LogStash::Filters::Base @logger.error("Unknown algorithm") end end - + end # class LogStash::Filters::Anonymize diff --git a/logstash.gemspec b/logstash.gemspec index 5c2954831..ac4c5e9bc 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -58,6 +58,8 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "geoip", [">= 1.1.0"] gem.add_runtime_dependency "beefcake", "0.3.7" gem.add_runtime_dependency "php-serialize" # For input drupal_dblog + gem.add_runtime_dependency "ipaddress" + gem.add_runtime_dependency "murmurhash3" if RUBY_PLATFORM == 'java' gem.platform = RUBY_PLATFORM diff --git a/spec/filters/anonymize.rb b/spec/filters/anonymize.rb index 1adfd9235..94fb06958 100644 --- a/spec/filters/anonymize.rb +++ b/spec/filters/anonymize.rb @@ -21,6 +21,40 @@ describe LogStash::Filters::Anonymize do insist { subject["clientip"] } == "0d01b2191194d261fa1a2e7c18a38d44953ab4e2" end end + + describe "anonymize ipaddress with IPV4_NETWORK algorithm" do + # The logstash config goes here. + # At this time, only filters are supported. + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + algorithm => "IPV4_NETWORK" + key => 24 + } + } + CONFIG + + sample "@fields" => {"clientip" => "233.255.13.44"} do + insist { subject["clientip"] } == "233.255.13.0" + end + end + + describe "anonymize string with MURMUR3 algorithm" do + config <<-CONFIG + filter { + anonymize { + fields => ["clientip"] + algorithm => "MURMUR3" + key => "" + } + } + CONFIG + + sample "@fields" => {"clientip" => "123.52.122.33"} do + insist { subject["clientip"] } == 1541804874 + end + end describe "anonymize string with SHA1 alogrithm" do # The logstash config goes here. From c444f4811260fa43e56ba9d71a0120fb41bd566d Mon Sep 17 00:00:00 2001 From: Avishai Ish-Shalom Date: Sat, 22 Dec 2012 12:08:10 +0200 Subject: [PATCH 089/105] added specific doc string --- lib/logstash/filters/anonymize.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/logstash/filters/anonymize.rb b/lib/logstash/filters/anonymize.rb index 55b6e655d..b5a156c38 100644 --- a/lib/logstash/filters/anonymize.rb +++ b/lib/logstash/filters/anonymize.rb @@ -10,9 +10,11 @@ class LogStash::Filters::Anonymize < LogStash::Filters::Base config :fields, :validate => :array, :required => true # Hashing key + # When using MURMUR3 the key is ignored but must still be set. + # When using IPV4_NETWORK key is the subnet prefix lentgh config :key, :validate => :string, :required => true - # digest type + # digest/hash type config :algorithm, :validate => ['SHA', 'SHA1', 'SHA224', 'SHA256', 'SHA384', 'SHA512', 'MD4', 'MD5', "MURMUR3", "IPV4_NETWORK"], :required => true, :default => 'SHA1' public From d026e5c24c8507b612a5472f5d6c638e069f7583 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Sat, 22 Dec 2012 11:42:54 -0800 Subject: [PATCH 090/105] - add suggestion to use elasticsearch_http if you cannot align versions; suggested by semiosis --- lib/logstash/outputs/elasticsearch.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index 2942e52b8..12883faba 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -6,7 +6,9 @@ require "logstash/outputs/base" # need to use this output. # # *NOTE*: Your elasticsearch cluster must be running elasticsearch -# %ELASTICSEARCH_VERSION%. Any other version may not work. +# %ELASTICSEARCH_VERSION%. If you use any other version of elasticsearch, +# you should consider using the [elasticsearch_http](elasticsearch_http) +# output instead. # # If you want to set other elasticsearch options that are not exposed directly # as config options, there are two options: From f218cf96485ab18c580fb1a70f5affa43a5ad7ba Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Sat, 22 Dec 2012 11:48:30 -0800 Subject: [PATCH 091/105] - more attempts to improve the ES notes --- lib/logstash/outputs/elasticsearch.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index 12883faba..787ce6ed5 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -5,7 +5,7 @@ require "logstash/outputs/base" # output for logstash. If you plan on using the logstash web interface, you'll # need to use this output. # -# *NOTE*: Your elasticsearch cluster must be running elasticsearch +# *VERSION NOTE*: Your elasticsearch cluster must be running elasticsearch # %ELASTICSEARCH_VERSION%. If you use any other version of elasticsearch, # you should consider using the [elasticsearch_http](elasticsearch_http) # output instead. @@ -16,6 +16,9 @@ require "logstash/outputs/base" # * create an elasticsearch.yml file in the $PWD of the logstash process # * pass in es.* java properties (java -Des.node.foo= or ruby -J-Des.node.foo=) # +# This plugin will join your elasticsearch cluster, so it will show up in +# elasticsearch's cluster health status. +# # You can learn more about elasticsearch at class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base From f92f1826f15ecf51d4da0e1f480c22eae7981a65 Mon Sep 17 00:00:00 2001 From: James Turnbull Date: Sun, 23 Dec 2012 11:02:35 -0500 Subject: [PATCH 092/105] Fixed indent on example code in kv filter --- lib/logstash/filters/kv.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/logstash/filters/kv.rb b/lib/logstash/filters/kv.rb index bb9daa11a..3fbe0486c 100644 --- a/lib/logstash/filters/kv.rb +++ b/lib/logstash/filters/kv.rb @@ -7,9 +7,9 @@ require "logstash/namespace" # For example, if you have a log message which contains 'ip=1.2.3.4 # error=REFUSED', you can parse those automatically by doing: # -# filter { -# kv { } -# } +# filter { +# kv { } +# } # # And you will get field 'ip' == "1.2.3.4" etc. class LogStash::Filters::KV < LogStash::Filters::Base From c785f1342858295009ca704244f78598e492b80e Mon Sep 17 00:00:00 2001 From: "michael.gibson" Date: Mon, 24 Dec 2012 09:29:16 -0700 Subject: [PATCH 093/105] adding ability to define the event.id in a custom filter for the elasticsearch output. LOGSTASH-256 --- lib/logstash/event.rb | 4 ++++ lib/logstash/outputs/elasticsearch.rb | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index ed057bed6..31e153182 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -124,6 +124,10 @@ class LogStash::Event def tags; @data["@tags"]; end # def tags def tags=(val); @data["@tags"] = val; end # def tags= + public + def id; @data["@id"]; end # def id + def id=(val); @data["@id"] = val; end # def id= + # field-related access public def [](key) diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index 787ce6ed5..51578e567 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -37,6 +37,9 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base # similar events to the same 'type'. String expansion '%{foo}' works here. config :index_type, :validate => :string, :default => "%{@type}" + # The document ID for the index. Overwrites any existing entry in elasticsearch with the same ID. + config :id, :validate => :string, :default => "%{@id}" + # The name of your cluster if you set it on the ElasticSearch side. Useful # for discovery. config :cluster, :validate => :string @@ -140,6 +143,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base index = event.sprintf(@index) type = event.sprintf(@index_type) + id = event.sprintf(@id) # TODO(sissel): allow specifying the ID? # The document ID is how elasticsearch determines sharding hash, so it can # help performance if we allow folks to specify a specific ID. @@ -160,7 +164,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base end end - req = @client.index(index, type, event.to_hash) + req = @client.index(index, type, id, event.to_hash) increment_inflight_request_count #timer = @logger.time("elasticsearch write") req.on(:success) do |response| From 9bde79c62daf5b7a91610b57de0cf7252f4d7127 Mon Sep 17 00:00:00 2001 From: James Turnbull Date: Mon, 24 Dec 2012 12:47:22 -0500 Subject: [PATCH 094/105] Updated flags list to include full list. --- docs/flags.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/flags.md b/docs/flags.md index ee14036c1..fc9113426 100644 --- a/docs/flags.md +++ b/docs/flags.md @@ -11,7 +11,16 @@ The logstash agent has the following flags (also try using the '--help' flag)
-f, --config CONFIGFILE
Load the logstash config from a specific file, directory, or a wildcard. If given a directory or wildcard, config files will be read in order lexigraphically.
-
--log FILE
+
-e CONFIGSTRING
+
Use the given string as the configuration data. Same syntax as the +config file. If not input is specified, 'stdin { type => stdin }' is +default. If no output is specified, 'stdout { debug => true }}' is +default.
+
-w, --filterworks COUNT
+
Run COUNT filter workers (default: 1)
+
--watchdog-timeout TIMEOUT
+
Set watchdog timeout value.
+
-l, --log FILE
Log to a given path. Default is to log to stdout
-v
Increase verbosity. There are multiple levels of verbosity available with @@ -26,6 +35,9 @@ name, like --grok-foo. ## Web UI +The logstash web interface has the following flags (also try using the '--help' +flag) +
--log FILE
Log to a given path. Default is stdout.
@@ -33,7 +45,9 @@ name, like --grok-foo.
Address on which to start webserver. Default is 0.0.0.0.
--port PORT
Port on which to start webserver. Default is 9292.
-
--backend URL
+
-B, --elasticsearch-bind-host ADDRESS
+
Address on which to bind elastic search node.
+
-b, --backend URL
The backend URL to use. Default is elasticsearch:/// (assumes multicast discovery). You can specify elasticsearch://[host][:port]/[clustername]
From 3066ac7f9fcd04103211a6af5cc3e1d9459bd42e Mon Sep 17 00:00:00 2001 From: "michael.gibson" Date: Mon, 24 Dec 2012 18:16:35 -0700 Subject: [PATCH 095/105] bug-fix for when id is not specified for elasticsearch. making it not required. LOGSTASH-256 --- lib/logstash/outputs/elasticsearch.rb | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index 51578e567..bb6341841 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -38,7 +38,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base config :index_type, :validate => :string, :default => "%{@type}" # The document ID for the index. Overwrites any existing entry in elasticsearch with the same ID. - config :id, :validate => :string, :default => "%{@id}" + config :id, :validate => :string, :default => "" # The name of your cluster if you set it on the ElasticSearch side. Useful # for discovery. @@ -164,7 +164,12 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base end end - req = @client.index(index, type, id, event.to_hash) + if id == "%{@id}" + req = @client.index(index, type, event.to_hash) + else + req = @client.index(index, type, id, event.to_hash) + end + increment_inflight_request_count #timer = @logger.time("elasticsearch write") req.on(:success) do |response| From 5055d43f2bed3681496f05d80f6dfc56d4f701fb Mon Sep 17 00:00:00 2001 From: Richard Pijnenburg Date: Thu, 27 Dec 2012 16:37:39 +0100 Subject: [PATCH 096/105] Deprecating 'name' variable For ticket LOGSTASH-755 Part 2 gemfire input and output 'name' to 'cache_name' --- lib/logstash/inputs/gemfire.rb | 16 +++++++++++++--- lib/logstash/outputs/gemfire.rb | 19 +++++++++++++++---- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/lib/logstash/inputs/gemfire.rb b/lib/logstash/inputs/gemfire.rb index b2230b055..2b38c4040 100644 --- a/lib/logstash/inputs/gemfire.rb +++ b/lib/logstash/inputs/gemfire.rb @@ -17,7 +17,10 @@ class LogStash::Inputs::Gemfire < LogStash::Inputs::Threadable plugin_status "experimental" # Your client cache name - config :name, :validate => :string, :default => "logstash" + config :name, :validate => :string, :deprecated => true + + # Your client cache name + config :cache_name, :validate => :string, :default => "logstash" # The path to a GemFire client cache XML file. # @@ -51,6 +54,13 @@ class LogStash::Inputs::Gemfire < LogStash::Inputs::Threadable # How the message is serialized in the cache. Can be one of "json" or "plain"; default is plain config :serialization, :validate => :string, :default => nil + if @name + if @cache_name + @logger.error("'name' and 'cache_name' are the same setting, but 'name' is deprecated. Please use only 'cache_name'") + end + @cache_name = @name + end + public def initialize(params) super @@ -97,10 +107,10 @@ class LogStash::Inputs::Gemfire < LogStash::Inputs::Threadable protected def connect begin - @logger.debug("Connecting to GemFire #{@name}") + @logger.debug("Connecting to GemFire #{@cache_name}") @cache = ClientCacheFactory.new. - set("name", @name). + set("name", @cache_name). set("cache-xml-file", @cache_xml_file).create @logger.debug("Created cache #{@cache.inspect}") diff --git a/lib/logstash/outputs/gemfire.rb b/lib/logstash/outputs/gemfire.rb index 2bf51248d..708bd5d66 100644 --- a/lib/logstash/outputs/gemfire.rb +++ b/lib/logstash/outputs/gemfire.rb @@ -16,7 +16,10 @@ class LogStash::Outputs::Gemfire < LogStash::Outputs::Base plugin_status "experimental" # Your client cache name - config :name, :validate => :string, :default => "logstash" + config :name, :validate => :string, :deprecated => true + + # Your client cache name + config :cache_name, :validate => :string, :default => "logstash" # The path to a GemFire client cache XML file. # @@ -40,6 +43,14 @@ class LogStash::Outputs::Gemfire < LogStash::Outputs::Base # A sprintf format to use when building keys config :key_format, :validate => :string, :default => "%{@source}-%{@timestamp}" + if @name + if @cache_name + @logger.error("'name' and 'cache_name' are the same setting, but 'name' is deprecated. Please use only 'cache_name'") + end + @cache_name = @name + end + + public def register import com.gemstone.gemfire.cache.client.ClientCacheFactory @@ -52,10 +63,10 @@ class LogStash::Outputs::Gemfire < LogStash::Outputs::Base public def connect begin - @logger.debug("Connecting to GemFire #{@name}") + @logger.debug("Connecting to GemFire #{@cache_name}") @cache = ClientCacheFactory.new. - set("name", @name). + set("name", @cache_name). set("cache-xml-file", @cache_xml_file).create @logger.debug("Created cache #{@cache.inspect}") @@ -90,7 +101,7 @@ class LogStash::Outputs::Gemfire < LogStash::Outputs::Base public def to_s - return "gemfire://#{name}" + return "gemfire://#{cache_name}" end public From d7505972ae746fb080a0b33650588fd92e2f6108 Mon Sep 17 00:00:00 2001 From: "michael.gibson" Date: Thu, 27 Dec 2012 09:05:32 -0700 Subject: [PATCH 097/105] updating changes to pull 286 --- lib/logstash/event.rb | 1 - lib/logstash/outputs/elasticsearch.rb | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index 31e153182..e08f69a69 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -124,7 +124,6 @@ class LogStash::Event def tags; @data["@tags"]; end # def tags def tags=(val); @data["@tags"] = val; end # def tags= - public def id; @data["@id"]; end # def id def id=(val); @data["@id"] = val; end # def id= diff --git a/lib/logstash/outputs/elasticsearch.rb b/lib/logstash/outputs/elasticsearch.rb index bb6341841..f4d7deaf3 100644 --- a/lib/logstash/outputs/elasticsearch.rb +++ b/lib/logstash/outputs/elasticsearch.rb @@ -38,7 +38,7 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base config :index_type, :validate => :string, :default => "%{@type}" # The document ID for the index. Overwrites any existing entry in elasticsearch with the same ID. - config :id, :validate => :string, :default => "" + config :id, :validate => :string, :default => nil # The name of your cluster if you set it on the ElasticSearch side. Useful # for discovery. @@ -143,7 +143,6 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base index = event.sprintf(@index) type = event.sprintf(@index_type) - id = event.sprintf(@id) # TODO(sissel): allow specifying the ID? # The document ID is how elasticsearch determines sharding hash, so it can # help performance if we allow folks to specify a specific ID. @@ -164,9 +163,10 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base end end - if id == "%{@id}" + if id.nil? req = @client.index(index, type, event.to_hash) else + id = event.sprintf(@id) req = @client.index(index, type, id, event.to_hash) end From 8a04ac387b59b8369fbef3bd83a07b5341bbc22c Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 27 Dec 2012 14:45:28 -0800 Subject: [PATCH 098/105] - style fix - all new plugins should start life as 'experimental' --- lib/logstash/outputs/syslog.rb | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/logstash/outputs/syslog.rb b/lib/logstash/outputs/syslog.rb index 5e2781c1e..81e80534d 100644 --- a/lib/logstash/outputs/syslog.rb +++ b/lib/logstash/outputs/syslog.rb @@ -2,14 +2,13 @@ require "logstash/outputs/base" require "logstash/namespace" require "date" -# Send events to syslog server +# Send events to a syslog server. +# # You can send messages compliant with RFC3164 or RFC5424 # UDP or TCP syslog transport is supported - class LogStash::Outputs::Syslog < LogStash::Outputs::Base - config_name "syslog" - plugin_status "beta" + plugin_status "experimental" FACILITY_LABELS = [ "kernel", From 635dc91c1e1f6fa4bebe5226202d87e94afbf907 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 27 Dec 2012 14:46:14 -0800 Subject: [PATCH 099/105] update changes --- CHANGELOG | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 10b068f0d..0944695ad 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,9 +2,9 @@ ## Overview of this release: - grok now captures (?...) regexp into 'somename' field - new 'charset' feature for inputs (for improved UTF-8 conversion support) - - TODO TODO TODO new faster start-time release jars are available, see the 'flatjar' download - option. This flatjar thing may have bugs, so both flatjar and monolithic are - available. + - TODO TODO TODO new faster start-time release jars are available, see the + 'flatjar' download option. This flatjar thing may have bugs, so both flatjar + and monolithic are available. ## general - fixed internal dependency versioning on 'addressable' gem (LOGSTASH-694) @@ -19,6 +19,9 @@ - lumberjack: jls-lumberjack gem updated to 0.0.7 ## filters + - new: anonymize: supports many hash mechanisms (murmur3, sha, md5, etc) as + well as IP address anonymization (#280, #261; patches by Richard Pijnenburg + and Avishai Ish-Shalom) - improvement: grok: now accepts (?...) named captures. This lets you compose a pattern in the grok config without needing to define it in a patterns file. Example: (?%{HOST}:%{POSINT}) to capture 'hostport' @@ -41,6 +44,8 @@ - improvement: gelf: Allow full_message gelf property to be overridden (#245, patch by Sébastien Masset) - lumberjack: jls-lumberjack gem updated to 0.0.6 + - new: syslog output supporting both RFC3164 and RFC5424 (#180, patch by + ruckalvnet) 1.1.5 (November 10, 2012) ## Overview of this release: From 3a8e5bdb602de94bdce46cbe0fb280d877c99108 Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 27 Dec 2012 14:51:01 -0800 Subject: [PATCH 100/105] - clarify spec description --- spec/filters/date.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/filters/date.rb b/spec/filters/date.rb index 85999fbea..a4dd2a827 100644 --- a/spec/filters/date.rb +++ b/spec/filters/date.rb @@ -182,7 +182,7 @@ describe LogStash::Filters::Date do end end - describe "accept match config option with hash value like grep (LOGSTASH-735)" do + describe "accept match config option with hash value (LOGSTASH-735)" do config <<-CONFIG filter { date { From 16cd88f060bcff0ecbc1b9154d41ea8bd4ed5ac5 Mon Sep 17 00:00:00 2001 From: Avishai Ish-Shalom Date: Fri, 28 Dec 2012 01:12:21 +0200 Subject: [PATCH 101/105] Removed warn statements, replaced ipaddress with ipaddr --- lib/logstash/filters/anonymize.rb | 9 ++------- logstash.gemspec | 1 - 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/logstash/filters/anonymize.rb b/lib/logstash/filters/anonymize.rb index b5a156c38..6393f6cf8 100644 --- a/lib/logstash/filters/anonymize.rb +++ b/lib/logstash/filters/anonymize.rb @@ -22,7 +22,7 @@ class LogStash::Filters::Anonymize < LogStash::Filters::Base # require any library and set the anonymize function case @algorithm when "IPV4_NETWORK" - require "ipaddress" + require 'ipaddr' class << self; alias_method :anonymize, :anonymize_ipv4_network; end when "MURMUR3" require "murmurhash3" @@ -43,20 +43,15 @@ class LogStash::Filters::Anonymize < LogStash::Filters::Base private def anonymize_ipv4_network(ip_string) - warn "ipv4" - ip = IPAddress::IPv4.new(ip_string) - ip.prefix = @key - ip.network.to_s + IPAddr.new(ip_string).mask(@key.to_i).to_s end def anonymize_openssl(data) - warn "openssl" digest = algorithm() OpenSSL::HMAC.hexdigest(digest, @key, data) end def anonymize_murmur3(value) - warn "murmur3" case value when Fixnum MurmurHash3::V32.int_hash(value) diff --git a/logstash.gemspec b/logstash.gemspec index ac4c5e9bc..cf482a8aa 100644 --- a/logstash.gemspec +++ b/logstash.gemspec @@ -58,7 +58,6 @@ Gem::Specification.new do |gem| gem.add_runtime_dependency "geoip", [">= 1.1.0"] gem.add_runtime_dependency "beefcake", "0.3.7" gem.add_runtime_dependency "php-serialize" # For input drupal_dblog - gem.add_runtime_dependency "ipaddress" gem.add_runtime_dependency "murmurhash3" if RUBY_PLATFORM == 'java' From ff40032de2f999f3126ce7cc1db99b6c94eb9f5c Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 27 Dec 2012 15:13:12 -0800 Subject: [PATCH 102/105] - copy docs for match --- lib/logstash/filters/date.rb | 38 ++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/lib/logstash/filters/date.rb b/lib/logstash/filters/date.rb index e2eba9cce..1e39194cc 100644 --- a/lib/logstash/filters/date.rb +++ b/lib/logstash/filters/date.rb @@ -47,17 +47,47 @@ class LogStash::Filters::Date < LogStash::Filters::Base # 2011-04-19T03:44:01.103Z # * "UNIX" - will parse unix time in seconds since epoch # * "UNIX_MS" - will parse unix time in milliseconds since epoch + # * "TAI64N" - will parse tai64n time values # - # For example, if you have a field 'logdate' and with a value that looks like 'Aug 13 2010 00:03:44' + # For example, if you have a field 'logdate' and with a value that looks like + # 'Aug 13 2010 00:03:44' # you would use this configuration: # - # logdate => "MMM dd yyyy HH:mm:ss" + # logdate => "MMM dd YYYY HH:mm:ss" # # [dateformats]: http://download.oracle.com/javase/1.4.2/docs/api/java/text/SimpleDateFormat.html config /[A-Za-z0-9_-]+/, :validate => :array - # An array with field name first, and format patterns following, [ field, formats... ] - # Using this more than once will have unpredictable results, so only use it once per date filter. + # The date formats allowed are anything allowed by Joda-Time (java time + # library), generally: [java.text.SimpleDateFormat][dateformats] + # + # An array with field name first, and format patterns following, [ field, + # formats... ] + # + # If your time field has multiple possible formats, you can do this: + # + # match => [ "logdate", "MMM dd YYY HH:mm:ss", + # "MMM d YYY HH:mm:ss", "ISO8601" ] + # + # The above will match a syslog (rfc3164) or iso8601 timestamp. + # + # There are a few special exceptions, the following format literals exist + # to help you save time and ensure correctness of date parsing. + # + # * "ISO8601" - should parse any valid ISO8601 timestamp, such as + # 2011-04-19T03:44:01.103Z + # * "UNIX" - will parse unix time in seconds since epoch + # * "UNIX_MS" - will parse unix time in milliseconds since epoch + # * "TAI64N" - will parse tai64n time values + # + # For example, if you have a field 'logdate' and with a value that looks like + # 'Aug 13 2010 00:03:44', you would use this configuration: + # + # filter { + # date { + # match => [ "logdate", "MMM dd YYYY HH:mm:ss" ] + # } + # } config :match, :validate => :array, :default => [] # LOGSTASH-34 From c00126f6b53f7eda71efc874bc9cada3d0e3830e Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 27 Dec 2012 15:13:26 -0800 Subject: [PATCH 103/105] update changelog --- CHANGELOG | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 0944695ad..d2518c6d1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -22,6 +22,8 @@ - new: anonymize: supports many hash mechanisms (murmur3, sha, md5, etc) as well as IP address anonymization (#280, #261; patches by Richard Pijnenburg and Avishai Ish-Shalom) + - filter: date: now accepts 'match' as a setting. Use of this is preferable + to the old syntax. - improvement: grok: now accepts (?...) named captures. This lets you compose a pattern in the grok config without needing to define it in a patterns file. Example: (?%{HOST}:%{POSINT}) to capture 'hostport' From 44e9bb54b765ee262850acd00c245b18af0d9e8a Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 27 Dec 2012 15:59:30 -0800 Subject: [PATCH 104/105] - style --- lib/logstash/event.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lib/logstash/event.rb b/lib/logstash/event.rb index ce84778cb..1d9736821 100644 --- a/lib/logstash/event.rb +++ b/lib/logstash/event.rb @@ -97,14 +97,11 @@ class LogStash::Event val = uri if uri if val.is_a?(URI) @data["@source"] = val.to_s - maybe_new_source_host = val.host + @data["@source_host"] = val.host if @data["@source_host"].nil? @data["@source_path"] = val.path else @data["@source"] = val - maybe_new_source_host = val - end - if !@data["@source_host"] - @data["@source_host"] = maybe_new_source_host + @data["@source_host"] = val.host if @data["@source_host"].nil? end end # def source= From 27afc97d2231860ff93570a417fc6778b695d76c Mon Sep 17 00:00:00 2001 From: Jordan Sissel Date: Thu, 27 Dec 2012 16:35:26 -0800 Subject: [PATCH 105/105] more changelog maintenance --- CHANGELOG | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index d2518c6d1..55ac0dba3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,11 +12,12 @@ ## inputs - All inputs now have a 'charset' setting to help you inform logstash of the - text encoding of the input. This is useful if you have Shift_JIS or CP1252 + text encoding of the input. This is useful if you have Shift_JIS or CP1251 encoded log files. This should help resolve the many UTF-8 bugs that were reported recently. - bugfix: zeromq: 'topology' is now a required setting - - lumberjack: jls-lumberjack gem updated to 0.0.7 + - misc: lumberjack: jls-lumberjack gem updated to 0.0.7 + - bugfix: stomp: fix startup problems causing early termination (#226 ## filters - new: anonymize: supports many hash mechanisms (murmur3, sha, md5, etc) as @@ -40,14 +41,15 @@ (LOGSTASH-757) ## outputs + - new: syslog output supporting both RFC3164 and RFC5424 (#180, patch by + ruckalvnet) + - new: cloudwatch output to emit metrics and other events to Amazon CloudWatch. - bugfix: zeromq: 'topology' is now a required setting - improvement: mongodb: new setting 'isodate', when true, stores the @timestamp field as a mongodb date instead of a string. (#224, patch by Kevin Amorin) - improvement: gelf: Allow full_message gelf property to be overridden (#245, patch by Sébastien Masset) - lumberjack: jls-lumberjack gem updated to 0.0.6 - - new: syslog output supporting both RFC3164 and RFC5424 (#180, patch by - ruckalvnet) 1.1.5 (November 10, 2012) ## Overview of this release: