From a68dad27226683cd10b316eec8fef4c9a60834c0 Mon Sep 17 00:00:00 2001 From: Brad Fritz Date: Wed, 20 Nov 2013 20:56:10 -0500 Subject: [PATCH] [imap] decode header values in RFC 2047 encoded-word format RFC 2047 outlines an encoding for header fields. Without this change the header fields (To, From, Subject, etc.) may have values like `=?iso-8859-1?Q?abc?=` instead of `abc`. --- lib/logstash/inputs/imap.rb | 8 +++++--- spec/inputs/imap.rb | 13 +++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/lib/logstash/inputs/imap.rb b/lib/logstash/inputs/imap.rb index caa9a42ce..5e5837660 100644 --- a/lib/logstash/inputs/imap.rb +++ b/lib/logstash/inputs/imap.rb @@ -105,9 +105,11 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base else name = header.name.to_s end - # Call .to_s on the value just in case it's some weird Mail:: object - # thing. - value = header.value.to_s + # Call .decoded on the header in case it's in encoded-word form. + # Details at: + # https://github.com/mikel/mail/blob/master/README.md#encodings + # http://tools.ietf.org/html/rfc2047#section-2 + value = header.decoded # Assume we already processed the 'date' above. next if name == "Date" diff --git a/spec/inputs/imap.rb b/spec/inputs/imap.rb index 476e8e111..ebe8ddff5 100644 --- a/spec/inputs/imap.rb +++ b/spec/inputs/imap.rb @@ -44,4 +44,17 @@ describe LogStash::Inputs::IMAP do end end + context "when subject is in RFC 2047 encoded-word format" do + it "should be decoded" do + msg.subject = "=?iso-8859-1?Q?foo_:_bar?=" + config = {"type" => "imap", "host" => "localhost", + "user" => "#{user}", "password" => "#{password}"} + + input = LogStash::Inputs::IMAP.new config + input.register + event = input.parse_mail(msg) + insist { event["subject"] } == "foo : bar" + end + end + end