[imap] decode header values in RFC 2047 encoded-word format

RFC 2047 outlines an encoding for header fields.  Without this
change the header fields (To, From, Subject, etc.) may have
values like `=?iso-8859-1?Q?abc?=` instead of `abc`.
This commit is contained in:
Brad Fritz 2013-11-20 20:56:10 -05:00
parent fd3ab9166d
commit a68dad2722
2 changed files with 18 additions and 3 deletions

View file

@ -105,9 +105,11 @@ class LogStash::Inputs::IMAP < LogStash::Inputs::Base
else
name = header.name.to_s
end
# Call .to_s on the value just in case it's some weird Mail:: object
# thing.
value = header.value.to_s
# Call .decoded on the header in case it's in encoded-word form.
# Details at:
# https://github.com/mikel/mail/blob/master/README.md#encodings
# http://tools.ietf.org/html/rfc2047#section-2
value = header.decoded
# Assume we already processed the 'date' above.
next if name == "Date"

View file

@ -44,4 +44,17 @@ describe LogStash::Inputs::IMAP do
end
end
context "when subject is in RFC 2047 encoded-word format" do
it "should be decoded" do
msg.subject = "=?iso-8859-1?Q?foo_:_bar?="
config = {"type" => "imap", "host" => "localhost",
"user" => "#{user}", "password" => "#{password}"}
input = LogStash::Inputs::IMAP.new config
input.register
event = input.parse_mail(msg)
insist { event["subject"] } == "foo : bar"
end
end
end