Merge branch 'master' into aws-iam-roles

2025-04-24 06:37:19 -04:00 · 2013-01-02 10:47:41 -05:00 · 2013-01-02 10:47:41 -05:00 · 22cbd426ed
commit 22cbd426ed
parent 64e1792515 97debf3c5e
9 changed files with 162 additions and 37 deletions
--- a/5
+++ b/5
@ -2,9 +2,6 @@
 ## Overview of this release:
 - grok now captures (?<somename>...) regexp into 'somename' field
 - new 'charset' feature for inputs (for improved UTF-8 conversion support)
- - TODO TODO TODO new faster start-time release jars are available, see the
-   'flatjar' download option. This flatjar thing may have bugs, so both flatjar
-   and monolithic are available.

 ## general
 - fixed internal dependency versioning on 'addressable' gem (LOGSTASH-694)
@ -14,7 +11,7 @@
 - All inputs now have a 'charset' setting to help you inform logstash of the
   text encoding of the input. This is useful if you have Shift_JIS or CP1251
   encoded log files. This should help resolve the many UTF-8 bugs that were
-   reported recently.
+   reported recently. The default charset is UTF-8.
 - bugfix: zeromq: 'topology' is now a required setting
 - misc: lumberjack: jls-lumberjack gem updated to 0.0.7
 - bugfix: stomp: fix startup problems causing early termination (#226
--- a/2
+++ b/2
@ -1,4 +1,4 @@
-Copyright 2009-2011 Jordan Sissel, Pete Fritchman, and contributors.
+Copyright 2009-2013 Jordan Sissel, Pete Fritchman, and contributors.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
--- a/9
+++ b/9
@ -194,19 +194,18 @@ build/flatgems: | build vendor/bundle
 	done

 flatjar-test:
-	GEM_HOME= GEM_PATH= java -jar logstash-$(VERSION)-flatjar.jar rspec $(TESTS)
+	GEM_HOME= GEM_PATH= java -jar build/logstash-$(VERSION)-flatjar.jar rspec $(TESTS)
 	cd build && GEM_HOME= GEM_PATH= java -jar logstash-$(VERSION)-flatjar.jar rspec spec/jar.rb

 jar-test:
-	#cd build && GEM_HOME= GEM_PATH= java -jar logstash-$(VERSION)-monolithic.jar rspec $(TESTS)
-	GEM_HOME= GEM_PATH= java -jar logstash-$(VERSION)-monolithic.jar rspec $(TESTS)
+	GEM_HOME= GEM_PATH= java -jar build/logstash-$(VERSION)-monolithic.jar rspec $(TESTS)
 	cd build && GEM_HOME= GEM_PATH= java -jar logstash-$(VERSION)-monolithic.jar rspec spec/jar.rb

 flatjar-test-and-report:
-	cd build && GEM_HOME= GEM_PATH= java -jar logstash-$(VERSION)-monolithic.jar rspec $(TESTS) --format h > results.flatjar.html
+	GEM_HOME= GEM_PATH= java -jar build/logstash-$(VERSION)-flatjar.jar rspec $(TESTS) --format h --out build/results.flatjar.html

 jar-test-and-report:
-	cd build && GEM_HOME= GEM_PATH= java -jar logstash-$(VERSION)-monolithic.jar rspec $(TESTS) --format h > results.monolithic.html
+	GEM_HOME= GEM_PATH= java -jar build/logstash-$(VERSION)-monolithic.jar rspec $(TESTS) --format h --out build/results.monolithic.html

 flatjar: build/logstash-$(VERSION)-flatjar.jar
 build/jar: | build build/flatgems build/monolith
--- a/README.md
+++ b/README.md
@ -2,21 +2,30 @@

 [![Build Status](https://secure.travis-ci.org/logstash/logstash.png)](http://travis-ci.org/logstash/logstash)

-logstash is a tool for managing events and logs. You can use it to collect logs, parse them, and store them for later use (like, for searching). Speaking of searching, logstash comes with a web interface for searching and drilling into all of your logs.
+logstash is a tool for managing events and logs. You can use it to collect
+logs, parse them, and store them for later use (like, for searching). Speaking
+of searching, logstash comes with a web interface for searching and drilling
+into all of your logs.

 It is fully free and fully open source. The license is Apache 2.0, meaning you
 are pretty much free to use it however you want in whatever way.

 For more info, see <http://logstash.net/>

-Need help? Try #logstash on freenode irc or the logstash-users@googlegroups.com mailing list.
+## Need Help?
+
+Need help? Try #logstash on freenode irc or the logstash-users@googlegroups.com
+mailing list.
+
+You can also find documentation on the <http://logstash.net> site.
+
+## Developing

-## Building
 To work on the code without building a jar, install rvm and run the following:

    # Install JRuby with rvm
-    rvm install jruby-1.7.0
-    rvm use jruby-1.7.0
+    rvm install jruby-1.7.1
+    rvm use jruby-1.7.1

    # Install logstash dependencies
    ruby gembag.rb logstash.gemspec
@ -24,7 +33,10 @@ To work on the code without building a jar, install rvm and run the following:
    # Run logstash
    bin/logstash agent [options]

-jar releases are available here: <http://logstash.objects.dreamhost.com/>
+## Building
+
+Releases are available here: <http://logstash.objects.dreamhost.com/>
+
 If you want to build the jar yourself, run:

    make jar
--- a/docs/docgen.rb
+++ b/docs/docgen.rb
@ -29,6 +29,7 @@ class LogStashConfigDocGenerator
  end

  def parse(string)
+    clear_comments
    buffer = ""
    string.split(/\r\n|\n/).each do |line|
      # Join long lines
@ -72,7 +73,7 @@ class LogStashConfigDocGenerator
    name, opts = eval(code)

    description = BlueCloth.new(@comments.join("\n")).to_html
-    @attributes[name][:description] = description
+    @attributes[name.to_s][:description] = description
    clear_comments
  end # def add_config

@ -135,12 +136,12 @@ class LogStashConfigDocGenerator
    # Monkeypatch the 'config' method to capture
    # Note, this monkeypatch requires us do the config processing
    # one at a time.
-    LogStash::Config::Mixin::DSL.instance_eval do
-      define_method(:config) do |name, opts={}|
+    #LogStash::Config::Mixin::DSL.instance_eval do
+      #define_method(:config) do |name, opts={}|
        #p name => opts
-        attributes[name].merge!(opts)
-      end
-    end
+        #attributes[name].merge!(opts)
+      #end
+    #end

    # Loading the file will trigger the config dsl which should
    # collect all the config settings.
@ -188,6 +189,9 @@ class LogStashConfigDocGenerator
    # descriptions are assumed to be markdown
    description = BlueCloth.new(@class_description).to_html

+    klass.get_config.each do |name, settings|
+      @attributes[name].merge!(settings)
+    end
    sorted_attributes = @attributes.sort { |a,b| a.first.to_s <=> b.first.to_s }
    klassname = LogStash::Config::Registry.registry[@name].to_s
    name = @name
--- a/docs/plugin-doc.html.erb
+++ b/docs/plugin-doc.html.erb
@ -8,6 +8,7 @@ layout: content_right
 <%= description %>

 <% if !@flags.empty? -%>
+<!-- Flags are deprecated 
 <h3> Flags </h3>

 This plugin provides the following flags:
@ -20,6 +21,8 @@ This plugin provides the following flags:
 <% end -%>
 </dl>

+... flags are deprecated -->
+
 <% end -%>

 <h3> Synopsis </h3>
--- a/lib/logstash/filters/grok.rb
+++ b/lib/logstash/filters/grok.rb
@ -3,13 +3,127 @@ require "logstash/namespace"
 require "set"

 # Parse arbitrary text and structure it.
-# Grok is currently the best way in logstash to parse crappy unstructured log
-# data (like syslog or apache logs) into something structured and queryable.
 #
-# Grok allows you to match text without needing to be a regular expressions
-# ninja. Logstash ships with about 120 patterns by default. You can find them here:
+# Grok is currently the best way in logstash to parse crappy unstructured log
+# data into something structured and queryable.
+#
+# This tool is perfect for syslog logs, apache and other webserver logs, mysql
+# logs, and in general, any log format that is generally written for humans
+# and not computer consumption.
+#
+# Logstash ships with about 120 patterns by default. You can find them here:
 # <https://github.com/logstash/logstash/tree/v%VERSION%/patterns>. You can add
 # your own trivially. (See the patterns_dir setting)
+#
+# If you need help building patterns to match your logs, you will find the
+# <http://grokdebug.herokuapp.com> too quite useful!
+# 
+# #### Grok Basics
+#
+# Grok works by using combining text patterns into something that matches your
+# logs.
+#
+# The syntax for a grok pattern is '%{SYNTAX:SEMANTIC}'
+#
+# The 'SYNTAX' is the name of the pattern that will match your text. For
+# example, "3.44" will be matched by the NUMBER pattern and "55.3.244.1" will
+# be matched by the IP pattern. The syntax is how you match.
+#
+# The 'SEMANTIC' is the identifier you give to the piece of text being matched.
+# For example, "3.44" could be the duration of an event, so you could call it
+# simply 'duration'. Further, a string "55.3.244.1" might identify the client
+# making a request.
+#
+# #### Example
+#
+# With that idea of a syntax and semantic, we can pull out useful fields from a
+# sample log like this fictional http request log:
+#
+#     55.3.244.1 GET /index.html 15824 0.043
+#
+# The pattern for this could be:
+#
+#     %{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}
+#
+# A more realistic example, let's read these logs from a file:
+#
+#     input {
+#       file {
+#         path => "/var/log/http.log"
+#         type => "examplehttp"
+#       }
+#     }
+#     filter {
+#       grok {
+#         type => "examplehttp"
+#         pattern => "%{IP:client} %{WORD:method} %{URIPATHPARAM:request} %{NUMBER:bytes} %{NUMBER:duration}"
+#       }
+#     }
+#
+# After the grok filter, the event will have a few extra fields in it:
+#
+# * client: 55.3.244.1
+# * method: GET
+# * request: /index.html
+# * bytes: 15824
+# * duration: 0.043
+#
+# #### Regular Expressions
+#
+# Grok sits on top of regular expressions, so any regular expressions are valid
+# in grok as well. The regular expression library is Oniguruma, and you can see
+# the full supported regexp syntax [on the Onigiruma
+# site](http://www.geocities.jp/kosako3/oniguruma/doc/RE.txt)
+#
+# #### Custom Patterns
+#
+# Sometimes logstash doesn't have a pattern you need. For this, you have
+# a few options.
+#
+# First, you can use the Oniguruma syntax for 'named capture' which will
+# let you match a piece of text and save it as a field:
+#
+#     (?<field_name>the pattern here)
+#
+# For example, postfix logs have a 'queue id' that is an 11-character
+# hexadecimal value. I can capture that easily like this:
+#
+#     (?<queue_id>[0-9A-F]{11})
+#
+# Alternately, you can create a custom patterns file. 
+#
+# * Create a directory called 'patterns' with a file in it called 'extra'
+#   (the file name doesn't matter, but name it meaningfully for yourself)
+# * In that file, write the pattern you need as the pattern name, a space, then
+#   the regexp for that pattern.
+#
+# For example, doing the postfix queue id example as above:
+#
+#     # in ./patterns/postfix 
+#     POSTFIX_QUEUEID [0-9A-F]{11}
+#
+# Then use the 'patterns_dir' setting in this plugin to tell logstash where
+# your custom patterns directory is. Here's a full example with a sample log:
+#
+#     Jan  1 06:25:43 mailserver14 postfix/cleanup[21403]: BEF25A72965: message-id=<20130101142543.5828399CCAF@mailserver14.example.com>
+#
+#     filter {
+#       grok {
+#         patterns_dir => "./patterns"
+#         pattern => "%{SYSLOGBASE} %{POSTFIX_QUEUEID:queue_id}: %{GREEDYDATA:message}"
+#       }
+#     }
+#
+# The above will match and result in the following fields:
+#
+# * timestamp: Jan  1 06:25:43
+# * logsource: mailserver14
+# * program: postfix/cleanup
+# * pid: 21403
+# * queue_id: BEF25A72965
+#
+# The 'timestamp', 'logsource', 'program', and 'pid' fields come from the
+# SYSLOGBASE pattern which itself is defined by other patterns.
 class LogStash::Filters::Grok < LogStash::Filters::Base
  config_name "grok"
  plugin_status "stable"
@ -31,7 +145,10 @@ class LogStash::Filters::Grok < LogStash::Filters::Base
  #
  #     # same as:
  #     match => [ "foo", "some pattern" ]
-  config /[A-Za-z0-9_-]+/, :validate => :string
+  #
+  # It is preferable to use the 'match' setting instead of this one, because
+  # this one is going to be removed..
+  config /[A-Za-z0-9_-]+/, :validate => :string, :deprecated => true

  #
  # logstash ships by default with a bunch of patterns, so you don't
--- a/lib/logstash/inputs/exec.rb
+++ b/lib/logstash/inputs/exec.rb
@ -2,7 +2,7 @@ require "logstash/inputs/base"
 require "logstash/namespace"
 require "socket" # for Socket.gethostname

-# Run command line tools and cature output as an event.
+# Run command line tools and capture the whole output as an event.
 #
 # Notes:
 #
@ -10,8 +10,6 @@ require "socket" # for Socket.gethostname
 # * The '@message' of this event will be the entire stdout of the command
 #   as one event.
 #
-# TODO(sissel): Implement a 'split' filter so you can split output of this
-# and other messages by newline, etc.
 class LogStash::Inputs::Exec < LogStash::Inputs::Base

  config_name "exec"
--- a/lib/logstash/runner.rb
+++ b/lib/logstash/runner.rb
@ -140,16 +140,11 @@ class LogStash::Runner
                # Add the 'spec' dir to the load path so specs can run
                specpath = File.join(jar_root, "spec")
                $: << specpath unless $:.include?(specpath)
-                newpath
-              else
-                arg
+                next newpath
              end
-            else
-              arg
            end
-          else
-            arg
          end
+          next arg
        end # args.collect

        # Hack up a runner