- add line codec (line-delimited data). This compliments the other major

common case of "plain" codec in which the decoding calls all yield events and no buffering is done (for generator, udp,zeromq, and other whole-event inputs)
2025-04-24 22:57:16 -04:00 · 2013-08-20 22:56:23 -07:00 · 2013-08-20 22:56:23 -07:00 · 4adb741cf3
commit 4adb741cf3
parent 6cde55106b
1 changed files with 56 additions and 0 deletions
--- a/lib/logstash/codecs/line.rb
+++ b/lib/logstash/codecs/line.rb
@ -0,0 +1,56 @@
+require "logstash/codecs/base"
+
+# Line-oriented text data.
+#
+# Decoding behavior: Only whole line events will be emitted.
+#
+# Encoding behavior: Each event will be emitted with a trailing newline.
+class LogStash::Codecs::Line < LogStash::Codecs::Base
+  config_name "line"
+  milestone 3
+
+  # Set the desired text format for encoding.
+  config :format, :validate => :string
+
+  # The character encoding used in this input. Examples include "UTF-8"
+  # and "cp1252"
+  #
+  # This setting is useful if your log files are in Latin-1 (aka cp1252)
+  # or in another character set other than UTF-8.
+  #
+  # This only affects "plain" format logs since json is UTF-8 already.
+  config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
+
+  public
+  def register
+    require "logstash/util/buftok"
+    @buffer = FileWatch::BufferedTokenizer.new
+  end
+  
+  public
+  def decode(data)
+    @buffer.extract(data).each do |line|
+      line.force_encoding(@charset)
+      if @charset != "UTF-8"
+        # The user has declared the character encoding of this data is
+        # something other than UTF-8. Let's convert it (as cleanly as possible)
+        # into UTF-8 so we can use it with JSON, etc.
+
+        # To convert, we first tell ruby the string is *really* encoded as
+        # somethign else (@charset), then we convert it to UTF-8.
+        data = data.encode("UTF-8", :invalid => :replace, :undef => :replace)
+      end
+      yield LogStash::Event.new({"message" => line})
+    end
+  end # def decode
+
+  public
+  def encode(data)
+    if data.is_a? LogStash::Event and @format
+      @on_event.call(data.sprintf(@format))
+    else
+      @on_event.call(data.to_s)
+    end
+  end # def encode
+
+end # class LogStash::Codecs::Plain