-
-
Save Azimkhan/549619bca0bdc12a1695f9d4a94935ee to your computer and use it in GitHub Desktop.
Decoding emails in Python e.g. for GMail and imapclient lib
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import email | |
def get_decoded_email_body(message_body): | |
""" Decode email body. | |
Detect character set if the header is not set. | |
We try to get text/plain, but if there is not one then fallback to text/html. | |
:param message_body: Raw 7-bit message body input e.g. from imaplib. Double encoded in quoted-printable and latin-1 | |
:return: Message body as unicode string | |
""" | |
msg = email.message_from_string(message_body) | |
text = "" | |
if msg.is_multipart(): | |
html = None | |
for part in msg.get_payload(): | |
print "%s, %s" % (part.get_content_type(), part.get_content_charset()) | |
if part.get_content_charset() is None: | |
# We cannot know the character set, so return decoded "something" | |
text = part.get_payload(decode=True) | |
continue | |
charset = part.get_content_charset() | |
if part.get_content_type() == 'text/plain': | |
text = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace') | |
if part.get_content_type() == 'text/html': | |
html = unicode(part.get_payload(decode=True), str(charset), "ignore").encode('utf8', 'replace') | |
if text is not None: | |
return text.strip() | |
else: | |
return html.strip() | |
else: | |
text = unicode(msg.get_payload(decode=True), msg.get_content_charset(), 'ignore').encode('utf8', 'replace') | |
return text.strip() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment