From 03526d9a6f6336597f0fd4d9a0506fa3aaad5228 Mon Sep 17 00:00:00 2001 From: Colin Surprenant Date: Fri, 14 Mar 2014 21:34:57 -0400 Subject: [PATCH] main message codec support and headers transcoding --- lib/logstash/inputs/imap.rb | 72 +++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 30 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index 5e5837660..c3fb794e6 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -13,6 +13,8 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base milestone 1 ISO8601_STRFTIME = "%04d-%02d-%02dT%02d:%02d:%02d.%06d%+03d:00".freeze + default :codec, "plain" + config :host, :validate => :string, :required => true config :port, :validate => :number @@ -91,41 +93,42 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base message = part.decoded end - event = LogStash::Event.new("message" => message) + @codec.decode(message) do |event| + # event = LogStash::Event.new("message" => message) - # Use the 'Date' field as the timestamp - event["@timestamp"] = mail.date.to_time.gmtime + # Use the 'Date' field as the timestamp + event["@timestamp"] = mail.date.to_time.gmtime - # Add fields: Add message.header_fields { |h| h.name=> h.value } - mail.header_fields.each do |header| - if @lowercase_headers - # 'header.name' can sometimes be a Mail::Multibyte::Chars, get it in - # String form - name = header.name.to_s.downcase - else - name = header.name.to_s - end - # Call .decoded on the header in case it's in encoded-word form. - # Details at: - # https://github.com/mikel/mail/blob/master/README.md#encodings - # http://tools.ietf.org/html/rfc2047#section-2 - value = header.decoded + # Add fields: Add message.header_fields { |h| h.name=> h.value } + mail.header_fields.each do |header| + if @lowercase_headers + # 'header.name' can sometimes be a Mail::Multibyte::Chars, get it in + # String form + name = header.name.to_s.downcase + else + name = header.name.to_s + end + # Call .decoded on the header in case it's in encoded-word form. + # Details at: + # https://github.com/mikel/mail/blob/master/README.md#encodings + # http://tools.ietf.org/html/rfc2047#section-2 + value = transcode_to_utf8(header.decoded) - # Assume we already processed the 'date' above. - next if name == "Date" + # Assume we already processed the 'date' above. + next if name == "Date" - case event[name] - # promote string to array if a header appears multiple times - # (like 'received') - when String; event[name] = [event[name], value] - when Array; event[name].is_a?(Array) - when nil; event[name] = value - end - end # mail.header_fields.each + case event[name] + # promote string to array if a header appears multiple times + # (like 'received') + when String; event[name] = [event[name], value] + when Array; event[name].is_a?(Array) + when nil; event[name] = value + end + end # mail.header_fields.each - decorate(event) - - return event + decorate(event) + event + end end # def handle public @@ -133,4 +136,13 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base $stdin.close finished end # def teardown + + private + + # transcode_to_utf8 is meant for headers transcoding. + # the mail gem will set the correct encoding on header strings decoding + # and we want to transcode it to utf8 + def transcode_to_utf8(s) + s.encode(Encoding::UTF_8, :invalid => :replace, :undef => :replace) + end end # class LogStash::Inputs::IMAP