Merge branch 'master' into cloudwatch-docs

2025-04-24 06:37:19 -04:00 · 2012-12-29 09:34:23 -05:00 · 2012-12-29 09:34:23 -05:00 · 9a4a96c302
commit 9a4a96c302
parent 5d5c235dcd 994318c480
14 changed files with 50 additions and 33 deletions
--- a/6
+++ b/6
@ -23,8 +23,8 @@
 - new: anonymize: supports many hash mechanisms (murmur3, sha, md5, etc) as
   well as IP address anonymization (#280, #261; patches by Richard Pijnenburg
   and Avishai Ish-Shalom)
- - filter: date: now accepts 'match' as a setting. Use of this is preferable
-   to the old syntax.
+ - feature: date: now accepts 'match' as a setting. Use of this is preferable
+   to the old syntax. (#248, LOGSTASH-734, Patch by Louis Zuckerman)
 - improvement: grok: now accepts (?<foo>...) named captures. This lets you
   compose a pattern in the grok config without needing to define it in a
   patterns file. Example: (?<hostport>%{HOST}:%{POSINT}) to capture 'hostport'
@ -39,6 +39,8 @@
   settings. (#225, patch by Alex Wheeler)
 - mutate: rename on a nonexistant field now does nothing as expected.
   (LOGSTASH-757)
+ - grok: don't tag an event with _grokparsefailure if it's already so (#248,
+   patch by Greg Brockman)

 ## outputs
 - new: syslog output supporting both RFC3164 and RFC5424 (#180, patch by
--- a/6
+++ b/6
@ -3,8 +3,8 @@
 #   cpio
 #   wget or curl
 #
-JRUBY_VERSION=1.7.0
-ELASTICSEARCH_VERSION=0.19.10
+JRUBY_VERSION=1.7.1
+ELASTICSEARCH_VERSION=0.20.2
 #VERSION=$(shell ruby -r./lib/logstash/version -e 'puts LOGSTASH_VERSION')
 VERSION=$(shell awk -F\" '/LOGSTASH_VERSION/ {print $$2}' lib/logstash/version.rb)

@ -125,6 +125,8 @@ vendor/bundle: | vendor $(JRUBY)
 	@echo "=> Installing gems to $@..."
 	#$(QUIET)GEM_HOME=$(GEM_HOME) $(JRUBY_CMD) --1.9 $(GEM_HOME)/bin/bundle install --deployment
 	$(QUIET)GEM_HOME=./vendor/bundle/jruby/1.9/ GEM_PATH= $(JRUBY_CMD) --1.9 ./gembag.rb logstash.gemspec
+	@# Purge old version of json
+	#$(QUIET)GEM_HOME=./vendor/bundle/jruby/1.9/ GEM_PATH= $(JRUBY_CMD) --1.9 -S gem uninstall json -v 1.6.5
 	@# Purge any junk that fattens our jar without need!
 	@# The riak gem includes previous gems in the 'pkg' dir. :(
 	-rm -rf $@/jruby/1.9/gems/riak-client-1.0.3/pkg
--- a/lib/logstash/filters/date.rb
+++ b/lib/logstash/filters/date.rb
@ -1,6 +1,6 @@
 require "logstash/filters/base"
 require "logstash/namespace"
-require "logstash/time"
+require "logstash/time_addon"

 # The date filter is used for parsing dates from fields and using that
 # date or timestamp as the timestamp for the event.
--- a/lib/logstash/filters/grok.rb
+++ b/lib/logstash/filters/grok.rb
@ -7,7 +7,8 @@ require "set"
 # data (like syslog or apache logs) into something structured and queryable.
 #
 # Grok allows you to match text without needing to be a regular expressions
-# ninja. Logstash ships with about 120 patterns by default. You can add
+# ninja. Logstash ships with about 120 patterns by default. You can find them here:
+# <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
 # your own trivially. (See the patterns_dir setting)
 class LogStash::Filters::Grok < LogStash::Filters::Base
  config_name "grok"
@ -249,7 +250,7 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
    if !matched
      # Tag this event if we can't parse it. We can use this later to
      # reparse+reindex logs if we improve the patterns given .
-      event.tags << "_grokparsefailure"
+      event.tags << "_grokparsefailure" unless event.tags.include?("_grokparsefailure")
    end

    @logger.debug? and @logger.debug("Event now: ", :event => event)
--- a/lib/logstash/filters/mutate.rb
+++ b/lib/logstash/filters/mutate.rb
@ -1,6 +1,6 @@
 require "logstash/filters/base"
 require "logstash/namespace"
-require "logstash/time"
+require "logstash/time_addon"

 # The mutate filter allows you to do general mutations to fields. You
 # can rename, remove, replace, and modify fields in your events.
@ -207,12 +207,14 @@ class LogStash::Filters::Mutate < LogStash::Filters::Base
  def replace(event)
    # TODO(sissel): use event.sprintf on the field names?
    @replace.each do |field, newvalue|
+      next unless event.include?(field)
      event[field] = event.sprintf(newvalue)
    end
  end # def replace

  def convert(event)
    @convert.each do |field, type|
+      next unless event.include?(field)
      original = event[field]

      # calls convert_{string,integer,float} depending on type requested.
--- a/lib/logstash/filters/split.rb
+++ b/lib/logstash/filters/split.rb
@ -1,6 +1,6 @@
 require "logstash/filters/base"
 require "logstash/namespace"
-require "logstash/time"
+require "logstash/time_addon"

 # The split filter is for splitting multiline messages into separate events.
 #
--- a/lib/logstash/inputs/gelf.rb
+++ b/lib/logstash/inputs/gelf.rb
@ -1,7 +1,7 @@
 require "date"
 require "logstash/inputs/base"
 require "logstash/namespace"
-require "logstash/time" # should really use the filters/date.rb bits
+require "logstash/time_addon" # should really use the filters/date.rb bits
 require "socket"

 # Read gelf messages as events over the network.
--- a/lib/logstash/inputs/udp.rb
+++ b/lib/logstash/inputs/udp.rb
@ -1,7 +1,7 @@
 require "date"
 require "logstash/inputs/base"
 require "logstash/namespace"
-require "logstash/time" # should really use the filters/date.rb bits
+require "logstash/time_addon" # should really use the filters/date.rb bits
 require "socket"

 # Read messages as events over the network via udp.
--- a/lib/logstash/outputs/elasticsearch.rb
+++ b/lib/logstash/outputs/elasticsearch.rb
@ -37,8 +37,9 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
  # similar events to the same 'type'. String expansion '%{foo}' works here.
  config :index_type, :validate => :string, :default => "%{@type}"

-  # The document ID for the index. Overwrites any existing entry in elasticsearch with the same ID.
-  config :id, :validate => :string, :default => nil
+  # The document ID for the index. Useful for overwriting existing entries in
+  # elasticsearch with the same ID.
+  config :document_id, :validate => :string, :default => nil

  # The name of your cluster if you set it on the ElasticSearch side. Useful
  # for discovery.
@ -163,11 +164,11 @@ class LogStash::Outputs::ElasticSearch < LogStash::Outputs::Base
      end
    end

-    if id.nil?
-        req = @client.index(index, type, event.to_hash) 
+    if @document_id.nil?
+      req = @client.index(index, type, event.to_hash) 
    else
-        id = event.sprintf(@id)
-        req = @client.index(index, type, id, event.to_hash)
+      id = event.sprintf(@document_id)
+      req = @client.index(index, type, id, event.to_hash)
    end

    increment_inflight_request_count
--- a/lib/logstash/outputs/elasticsearch_http.rb
+++ b/lib/logstash/outputs/elasticsearch_http.rb
@ -40,6 +40,10 @@ class LogStash::Outputs::ElasticSearchHTTP < LogStash::Outputs::Base
  # be used.
  config :flush_size, :validate => :number, :default => 100

+  # The document ID for the index. Useful for overwriting existing entries in
+  # elasticsearch with the same ID.
+  config :document_id, :validate => :string, :default => nil
+
  public
  def register
    require "ftw" # gem ftw
@ -84,9 +88,12 @@ class LogStash::Outputs::ElasticSearchHTTP < LogStash::Outputs::Base
  end # def receive_single

  def receive_bulk(event, index, type)
+    header = { "index" => { "_index" => index, "_type" => type } }
+    if @document_id.nil?
+      header["index"]["_id"] = event.sprintf(@document_id)
+    end
    @queue << [
-      { "index" => { "_index" => index, "_type" => type } }.to_json,
-      event.to_json
+      header.to_json, event.to_json
    ].join("\n")

    # Keep trying to flush while the queue is full.
@ -98,6 +105,10 @@ class LogStash::Outputs::ElasticSearchHTTP < LogStash::Outputs::Base
    puts "Flushing #{@queue.count} events"
    # If we don't tack a trailing newline at the end, elasticsearch
    # doesn't seem to process the last event in this bulk index call.
+    #
+    # as documented here: 
+    # http://www.elasticsearch.org/guide/reference/api/bulk.html
+    #  "NOTE: the final line of data must end with a newline character \n."
    response = @agent.post!("http://#{@host}:#{@port}/_bulk",
                            :body => @queue.join("\n") + "\n")

--- a/lib/logstash/outputs/elasticsearch_river.rb
+++ b/lib/logstash/outputs/elasticsearch_river.rb
@ -80,6 +80,10 @@ class LogStash::Outputs::ElasticSearchRiver < LogStash::Outputs::Base
  # AMQP persistence setting
  config :persistent, :validate => :boolean, :default => true

+  # The document ID for the index. Useful for overwriting existing entries in
+  # elasticsearch with the same ID.
+  config :document_id, :validate => :string, :default => nil
+
  public
  def register

@ -199,20 +203,14 @@ class LogStash::Outputs::ElasticSearchRiver < LogStash::Outputs::Base
  public
  def receive(event)
    return unless output?(event)
-
-    # TODO(sissel): Refactor this to not use so much string concatonation.
-
    # River events have a format of
    # "action\ndata\n"
    # where 'action' is index or delete, data is the data to index.
-    index_message = {
-      "index" => { 
-        "_index" => event.sprintf(@index),
-        "_type" => event.sprintf(@index_type)
-      }
-   }.to_json + "\n"
+    header = { "index" => { "_index" => index, "_type" => type } }
+    if @document_id.nil?
+      header["index"]["_id"] = event.sprintf(@document_id)
+    end

-    index_message += event.to_json + "\n"
-    @mq.receive_raw(index_message)
+    @mq.receive_raw(header.to_json + "\n" + event.to_json + "\n")
  end # def receive
 end # LogStash::Outputs::ElasticSearchRiver
--- a/lib/logstash/outputs/email.rb
+++ b/lib/logstash/outputs/email.rb
@ -9,7 +9,7 @@ class LogStash::Outputs::Email < LogStash::Outputs::Base

  # the registered fields that we want to monitor
  # A hash of matches of field => value
-  config :match, :validate => :hash
+  config :match, :validate => :hash, :required => true

  # the To address setting - fully qualified email address to send to
  config :to, :validate => :string, :required => true
--- a/logstash.gemspec
+++ b/logstash.gemspec
@ -24,7 +24,7 @@ Gem::Specification.new do |gem|
  gem.add_runtime_dependency "stud"

  # Web dependencies
-  gem.add_runtime_dependency "ftw", ["~> 0.0.25"]
+  gem.add_runtime_dependency "ftw", ["~> 0.0.26"]
  gem.add_runtime_dependency "haml"
  gem.add_runtime_dependency "rack"
  gem.add_runtime_dependency "sass"
--- a/patterns/ruby
+++ b/patterns/ruby
@ -1,2 +1,2 @@
 RUBY_LOGLEVEL (?:DEBUG|FATAL|ERROR|WARN|INFO)
-RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\] *%{RUBY_LOGLEVEL:loglevel} -- *%{DATA:progname}: %{GREEDYDATA:message}
+RUBY_LOGGER [DFEWI], \[%{TIMESTAMP_ISO8601:timestamp} #%{POSINT:pid}\] *%{RUBY_LOGLEVEL:loglevel} -- +%{DATA:progname}: %{GREEDYDATA:message}