Skip to content

Instantly share code, notes, and snippets.

@almet
Last active September 26, 2024 09:36
Show Gist options
  • Save almet/607bbcc32887535c570d2ed20acd5d72 to your computer and use it in GitHub Desktop.
Save almet/607bbcc32887535c570d2ed20acd5d72 to your computer and use it in GitHub Desktop.
Scrap mairies emails

Comment installer ça sur ma machine ?

Prérequis:

  • Je pars du principe que brew est déjà installé (sinon suivre leurs instructions)
  • Avoir copié le fichier quelque part sur sa machine

Ouvrir un terminal, puis:

brew install uv
chmod +x scrap-mairie.py
./scrap-mairie.py
#!/usr/bin/env -S uv run
# Needs "uv" to be run. chmod +x and then ./scrap-mairies.py
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "requests", "bs4"
# ]
# ///
import requests
from bs4 import BeautifulSoup
import time
import csv
def get_email_from_page(url):
print(f"Getting email from {url}...")
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
email_link = soup.select_one("a.send-mail")
return email_link["href"].replace("mailto:", "") if email_link else None
def scrape_mairies():
base_url = "https://lannuaire.service-public.fr/navigation/mairie"
params = {"where": "Ille Et Vilaine 35", "page": 1}
results = []
while True:
print(f"Scraping page {params['page']}...")
response = requests.get(base_url, params=params)
soup = BeautifulSoup(response.text, "html.parser")
# Extract mairie links
mairie_links = soup.select("div.sp-link--label a.fr-link")
for link in mairie_links:
mairie_name = link.text.strip()
mairie_url = link["href"]
email = get_email_from_page(mairie_url)
results.append({"name": mairie_name, "url": mairie_url, "email": email})
time.sleep(1) # Be polite, wait a second between requests
# Check for next page
next_link = soup.select_one("a.fr-pagination__link--next")
if next_link:
params["page"] += 1
else:
break
return results
def save_to_csv(data, filename="mairies_emails.csv"):
with open(filename, "w", newline="", encoding="utf-8") as csvfile:
fieldnames = ["name", "url", "email"]
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in data:
writer.writerow(row)
if __name__ == "__main__":
mairies_data = scrape_mairies()
save_to_csv(mairies_data)
print(f"Scraping completed. Data saved to mairies_emails.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment