#!/usr/bin/env python3 import email import getpass import imaplib import os username = input("Gmail username: ") seekrit = getpass.getpass("Gmail password: ") m = imaplib.IMAP4_SSL("imap.gmail.com") m.login(username, seekrit) # This could be "/INBOX" or whatever instead. Use m.list() # to see all available mailboxes. mailbox = "Spam" # The actual response is something like ('OK', [b'1234']). I do not # understand why imaplib handles numbers this way. ignored_response, num_msgs = m.select(f"[Gmail]/{mailbox}", readonly=True) # Just convert num_msgs to something sane right away. I.e., let's all # pretend this never happened. num_msgs = int(num_msgs[0].decode("utf-8")) print(f"{num_msgs} messages to retrieve from mailbox '{mailbox}'") width = len(str(num_msgs)) # After this, 'items' will be a tuple with a single element: a # bytestring of space-separated decimal integers, each of which # identifies a mail message. E.g.,: [b'1 2 3 4 5 6 7 8 9 10 ...'] # # By the way, one could filter with IMAP rules here; see # http://www.example-code.com/csharp/imap-search-critera.asp. ignored_response, items = m.search(None, "ALL") # Turn that bytestring into a list of bytestrings, each representing # a number. We're leaving them as bytestrings because apparently # imaplib.fetch() wants that? Further research required. items = items[0].split() for emailid in items: fpath = emailid.decode("utf-8").zfill(width) + ".txt" print("CHECKING %s" % fpath) if not os.path.isfile(fpath): # Fetch the mail. I believe "(RFC822)" means something like # "get headers and body (but not attachments)." We don't # don't need the attachments to determine whether something # is spam or ham, so we don't fetch them by default. ignored_response, data = m.fetch(emailid, "(RFC822)") # Get mail body, presumably sans attachments headers_and_body = data[0][1] print("SAVING %s" % fpath) with open(fpath, 'wb') as f: f.write(headers_and_body) m.close() m.logout()