Created
November 24, 2025 00:23
-
-
Save kliu04/71f083ea08663a3974dc9b73b8cb89d0 to your computer and use it in GitHub Desktop.
Count rejection emails!
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from functools import partial | |
| import mailbox | |
| import email.utils | |
| from datetime import datetime | |
| from multiprocessing import Pool | |
| import os | |
| from dotenv import load_dotenv | |
| from openai import OpenAI | |
| from pydantic import BaseModel | |
| class Result(BaseModel): | |
| is_rejection: bool | |
| def parse(mbox: mailbox.mbox, cutoff: datetime): | |
| filtered_emails: set[mailbox.mboxMessage] = set() | |
| for msg in mbox: | |
| date_header = msg.get("Date") | |
| if not date_header: | |
| continue | |
| raw_date = email.utils.parsedate(date_header) | |
| if not raw_date: | |
| continue | |
| msg_dt = datetime(*raw_date[:6]) | |
| if msg_dt < cutoff: | |
| # skip old emails | |
| continue | |
| filtered_emails.add(msg) | |
| return filtered_emails | |
| def get_text_body(msg: mailbox.mboxMessage) -> str: | |
| """ | |
| Extracts the plain text body of an email message, | |
| ignoring attachments. Returns a string. | |
| Note: Written by ChatGPT. | |
| """ | |
| body_parts = [] | |
| if msg.is_multipart(): | |
| for part in msg.walk(): | |
| content_type = part.get_content_type() | |
| content_disposition = part.get("Content-Disposition", "") | |
| if ( | |
| content_type == "text/plain" | |
| and "attachment" not in content_disposition.lower() | |
| ): | |
| charset = part.get_content_charset() or "utf-8" | |
| try: | |
| text = part.get_payload(decode=True).decode( # type: ignore | |
| charset, errors="replace" | |
| ) | |
| except Exception: | |
| text = part.get_payload(decode=True).decode( # type: ignore | |
| "utf-8", errors="replace" | |
| ) | |
| body_parts.append(text) | |
| else: | |
| # single-part message | |
| content_type = msg.get_content_type() | |
| if content_type == "text/plain": | |
| charset = msg.get_content_charset() or "utf-8" | |
| body_parts.append( | |
| msg.get_payload(decode=True).decode(charset, errors="replace") # type: ignore | |
| ) | |
| return "\n".join(body_parts) | |
| def process_email(msg: mailbox.mboxMessage) -> bool: | |
| prompt = """Return True if this email if and only if the email is a job rejection email. | |
| Only return True if the email explicitly states that the applicant was not selected for the position they applied for, and not for any other reason.""" | |
| text = get_text_body(msg) | |
| try: | |
| response = client.responses.parse( | |
| model="gpt-5-mini", | |
| input=[ | |
| {"role": "system", "content": prompt}, | |
| {"role": "user", "content": text}, | |
| ], | |
| text_format=Result, | |
| ) | |
| event = response.output_parsed | |
| if not event: | |
| return False | |
| return event.is_rejection | |
| except Exception as e: | |
| print("API error:", e) | |
| return False | |
| cutoff_date = datetime(2025, 9, 1) | |
| filename = "all_mail.mbox" | |
| mbox = mailbox.mbox(filename) | |
| load_dotenv() | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENAI_API_KEY not set in environment variables") | |
| client = OpenAI() | |
| client.api_key = api_key | |
| filtered_emails = parse(mbox, cutoff_date) | |
| with Pool() as pool: | |
| # True/False mapped to 1/0 | |
| result = sum(list(pool.imap_unordered(process_email, filtered_emails))) | |
| print(f"Total rejection emails: {result}") | |
| print(f"Total emails past cutoff: {len(filtered_emails)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment