Skip to content

Instantly share code, notes, and snippets.

@aflyhorse
Created June 3, 2025 01:56
Show Gist options
  • Save aflyhorse/2728a68a971d040bb72c7cd7a55bef09 to your computer and use it in GitHub Desktop.
Save aflyhorse/2728a68a971d040bb72c7cd7a55bef09 to your computer and use it in GitHub Desktop.
Weather data extractor for temperature analysis. Extracts lowest and highest temperatures from mirror-earth.com weather history.
[weather]
city_code = 310000 # Shanghai city code
#!/usr/bin/env python3
"""
Weather data extractor for temperature analysis.
Extracts lowest and highest temperatures from mirror-earth.com weather history.
Usage:
python getweather.py [YYYY-MM]
# If no month is provided, it defaults to the last month.
requirements.in:
beautifulsoup4
configparser
requests
requires a config.ini file with the following format:
[weather]
city_code = <your_city_code>
Example:
[weather]
city_code = 310000 # Shanghai city code
"""
import requests
import re
import sys
import configparser
from datetime import datetime, timedelta
from bs4 import BeautifulSoup
def load_config():
"""Load configuration from config.ini file."""
config = configparser.ConfigParser()
config.read("config.ini")
return config["weather"]["city_code"]
def get_last_month():
"""Get last month in YYYY-MM format."""
today = datetime.now()
# Get first day of current month, then subtract one day to get last month
first_day_current_month = today.replace(day=1)
last_month = first_day_current_month - timedelta(days=1)
return last_month.strftime("%Y-%m")
def parse_temperature(temp_str):
"""Parse temperature string and return float value."""
# Extract numeric value from temperature string (e.g., "29.5℃" -> 29.5)
match = re.search(r"(\d+\.?\d*)", temp_str)
if match:
return float(match.group(1))
return None
def fetch_weather_data(city_code, month):
"""Fetch weather data from mirror-earth.com for given city and month."""
url = f"https://mirror-earth.com/wea_history/{city_code}/{month}"
# Add headers with a user agent to avoid 403 Forbidden errors
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/91.0.4472.124 Safari/537.36"
)
}
try:
response = requests.get(url, headers=headers, timeout=30)
response.raise_for_status()
return response.text
except requests.RequestException as e:
print(f"Error fetching data: {e}", file=sys.stderr)
sys.exit(1)
def extract_temperatures(html_content):
"""Extract all daily high and low temperatures from HTML content using table parsing."""
temperatures = []
soup = BeautifulSoup(html_content, "html.parser")
# Find the main table (assume the first table is the weather table)
table = soup.find("table")
if not table:
return temperatures
# Find header row to determine column indices
headers = [th.get_text(strip=True) for th in table.find_all("th")]
date_idx = None
high_idx = None
low_idx = None
for i, h in enumerate(headers):
if "日期" in h or "Date" in h:
date_idx = i
if "最高温" in h:
high_idx = i
if "最低温" in h:
low_idx = i
if high_idx is None or low_idx is None:
return temperatures
# Iterate over table rows (skip header)
for row in table.find_all("tr")[1:]:
cols = [td.get_text(strip=True) for td in row.find_all(["td", "th"])]
if len(cols) < max(high_idx, low_idx) + 1:
continue
date = cols[date_idx] if date_idx is not None else ""
try:
high = float(cols[high_idx].replace("℃", ""))
low = float(cols[low_idx].replace("℃", ""))
except ValueError:
continue
temperatures.append({"date": date, "high": high, "low": low})
return temperatures
def find_extremes(temperatures):
"""Find the lowest and highest temperatures from the data."""
if not temperatures:
return None, None
all_temps = []
for day in temperatures:
all_temps.extend([day["high"], day["low"]])
if not all_temps:
return None, None
return min(all_temps), max(all_temps)
def main():
"""Main function to extract and print temperature extremes."""
# Load city code from config
city_code = load_config()
# Determine month to fetch
if len(sys.argv) > 1:
# Month provided as command line argument
month = sys.argv[1]
# Validate month format
try:
datetime.strptime(month, "%Y-%m")
except ValueError:
print("Error: Month must be in YYYY-MM format", file=sys.stderr)
sys.exit(1)
else:
# Use last month as default
month = get_last_month()
# Fetch weather data
html_content = fetch_weather_data(city_code, month)
# Extract temperature data
temperatures = extract_temperatures(html_content)
if not temperatures:
print("Error: No temperature data found", file=sys.stderr)
sys.exit(1)
# Print daily temperatures in CSV format
for day in temperatures:
print(f"{day['low']},{day['high']}")
if __name__ == "__main__":
main()
beautifulsoup4
configparser
requests
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment