Skip to content

Instantly share code, notes, and snippets.

@kliu04
Created November 24, 2025 00:23
Show Gist options
  • Select an option

  • Save kliu04/71f083ea08663a3974dc9b73b8cb89d0 to your computer and use it in GitHub Desktop.

Select an option

Save kliu04/71f083ea08663a3974dc9b73b8cb89d0 to your computer and use it in GitHub Desktop.
Count rejection emails!
from functools import partial
import mailbox
import email.utils
from datetime import datetime
from multiprocessing import Pool
import os
from dotenv import load_dotenv
from openai import OpenAI
from pydantic import BaseModel
class Result(BaseModel):
is_rejection: bool
def parse(mbox: mailbox.mbox, cutoff: datetime):
filtered_emails: set[mailbox.mboxMessage] = set()
for msg in mbox:
date_header = msg.get("Date")
if not date_header:
continue
raw_date = email.utils.parsedate(date_header)
if not raw_date:
continue
msg_dt = datetime(*raw_date[:6])
if msg_dt < cutoff:
# skip old emails
continue
filtered_emails.add(msg)
return filtered_emails
def get_text_body(msg: mailbox.mboxMessage) -> str:
"""
Extracts the plain text body of an email message,
ignoring attachments. Returns a string.
Note: Written by ChatGPT.
"""
body_parts = []
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = part.get("Content-Disposition", "")
if (
content_type == "text/plain"
and "attachment" not in content_disposition.lower()
):
charset = part.get_content_charset() or "utf-8"
try:
text = part.get_payload(decode=True).decode( # type: ignore
charset, errors="replace"
)
except Exception:
text = part.get_payload(decode=True).decode( # type: ignore
"utf-8", errors="replace"
)
body_parts.append(text)
else:
# single-part message
content_type = msg.get_content_type()
if content_type == "text/plain":
charset = msg.get_content_charset() or "utf-8"
body_parts.append(
msg.get_payload(decode=True).decode(charset, errors="replace") # type: ignore
)
return "\n".join(body_parts)
def process_email(msg: mailbox.mboxMessage) -> bool:
prompt = """Return True if this email if and only if the email is a job rejection email.
Only return True if the email explicitly states that the applicant was not selected for the position they applied for, and not for any other reason."""
text = get_text_body(msg)
try:
response = client.responses.parse(
model="gpt-5-mini",
input=[
{"role": "system", "content": prompt},
{"role": "user", "content": text},
],
text_format=Result,
)
event = response.output_parsed
if not event:
return False
return event.is_rejection
except Exception as e:
print("API error:", e)
return False
cutoff_date = datetime(2025, 9, 1)
filename = "all_mail.mbox"
mbox = mailbox.mbox(filename)
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not set in environment variables")
client = OpenAI()
client.api_key = api_key
filtered_emails = parse(mbox, cutoff_date)
with Pool() as pool:
# True/False mapped to 1/0
result = sum(list(pool.imap_unordered(process_email, filtered_emails)))
print(f"Total rejection emails: {result}")
print(f"Total emails past cutoff: {len(filtered_emails)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment