Skip to content

Instantly share code, notes, and snippets.

@kleinlennart
Created January 21, 2025 17:36
Show Gist options
  • Save kleinlennart/5a9f308972e542e10006fc865c875c92 to your computer and use it in GitHub Desktop.
Save kleinlennart/5a9f308972e542e10006fc865c875c92 to your computer and use it in GitHub Desktop.
Web Scraping Trump II Presidential Actions / Executive Orders from Whitehouse.gov
import requests
from bs4 import BeautifulSoup
import pandas as pd
actions = []
# FIXME: update page limit
for page in range(1, 6):
url = f"https://www.whitehouse.gov/presidential-actions/page/{page}/"
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
list_items = soup.find_all("li", class_="wp-block-post")
print(f"Number of list items found on page {page}: {len(list_items)}")
for item in list_items:
title_tag = item.find("h2", class_="wp-block-post-title")
date_tag = item.find("time")
if title_tag and date_tag:
title = title_tag.get_text(strip=True)
date = date_tag.get_text(strip=True)
link = title_tag.find("a")["href"]
actions.append({"title": title, "date": date, "link": link})
for action in actions:
link = action["link"]
response = requests.get(link)
response.raise_for_status()
action_soup = BeautifulSoup(response.text, "html.parser")
content_div = action_soup.find("div", class_="entry-content")
if content_div:
action_content = content_div.get_text(strip=True, separator=" ")
action["content"] = action_content
df = pd.DataFrame(actions)
print(df)
df.to_csv(
"presidential_actions.csv",
index=False,
)
print(f"Number of actions scraped: {len(df)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment