Created
June 3, 2025 01:56
-
-
Save aflyhorse/2728a68a971d040bb72c7cd7a55bef09 to your computer and use it in GitHub Desktop.
Weather data extractor for temperature analysis. Extracts lowest and highest temperatures from mirror-earth.com weather history.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[weather] | |
city_code = 310000 # Shanghai city code |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Weather data extractor for temperature analysis. | |
Extracts lowest and highest temperatures from mirror-earth.com weather history. | |
Usage: | |
python getweather.py [YYYY-MM] | |
# If no month is provided, it defaults to the last month. | |
requirements.in: | |
beautifulsoup4 | |
configparser | |
requests | |
requires a config.ini file with the following format: | |
[weather] | |
city_code = <your_city_code> | |
Example: | |
[weather] | |
city_code = 310000 # Shanghai city code | |
""" | |
import requests | |
import re | |
import sys | |
import configparser | |
from datetime import datetime, timedelta | |
from bs4 import BeautifulSoup | |
def load_config(): | |
"""Load configuration from config.ini file.""" | |
config = configparser.ConfigParser() | |
config.read("config.ini") | |
return config["weather"]["city_code"] | |
def get_last_month(): | |
"""Get last month in YYYY-MM format.""" | |
today = datetime.now() | |
# Get first day of current month, then subtract one day to get last month | |
first_day_current_month = today.replace(day=1) | |
last_month = first_day_current_month - timedelta(days=1) | |
return last_month.strftime("%Y-%m") | |
def parse_temperature(temp_str): | |
"""Parse temperature string and return float value.""" | |
# Extract numeric value from temperature string (e.g., "29.5℃" -> 29.5) | |
match = re.search(r"(\d+\.?\d*)", temp_str) | |
if match: | |
return float(match.group(1)) | |
return None | |
def fetch_weather_data(city_code, month): | |
"""Fetch weather data from mirror-earth.com for given city and month.""" | |
url = f"https://mirror-earth.com/wea_history/{city_code}/{month}" | |
# Add headers with a user agent to avoid 403 Forbidden errors | |
headers = { | |
"User-Agent": ( | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) " | |
"AppleWebKit/537.36 (KHTML, like Gecko) " | |
"Chrome/91.0.4472.124 Safari/537.36" | |
) | |
} | |
try: | |
response = requests.get(url, headers=headers, timeout=30) | |
response.raise_for_status() | |
return response.text | |
except requests.RequestException as e: | |
print(f"Error fetching data: {e}", file=sys.stderr) | |
sys.exit(1) | |
def extract_temperatures(html_content): | |
"""Extract all daily high and low temperatures from HTML content using table parsing.""" | |
temperatures = [] | |
soup = BeautifulSoup(html_content, "html.parser") | |
# Find the main table (assume the first table is the weather table) | |
table = soup.find("table") | |
if not table: | |
return temperatures | |
# Find header row to determine column indices | |
headers = [th.get_text(strip=True) for th in table.find_all("th")] | |
date_idx = None | |
high_idx = None | |
low_idx = None | |
for i, h in enumerate(headers): | |
if "日期" in h or "Date" in h: | |
date_idx = i | |
if "最高温" in h: | |
high_idx = i | |
if "最低温" in h: | |
low_idx = i | |
if high_idx is None or low_idx is None: | |
return temperatures | |
# Iterate over table rows (skip header) | |
for row in table.find_all("tr")[1:]: | |
cols = [td.get_text(strip=True) for td in row.find_all(["td", "th"])] | |
if len(cols) < max(high_idx, low_idx) + 1: | |
continue | |
date = cols[date_idx] if date_idx is not None else "" | |
try: | |
high = float(cols[high_idx].replace("℃", "")) | |
low = float(cols[low_idx].replace("℃", "")) | |
except ValueError: | |
continue | |
temperatures.append({"date": date, "high": high, "low": low}) | |
return temperatures | |
def find_extremes(temperatures): | |
"""Find the lowest and highest temperatures from the data.""" | |
if not temperatures: | |
return None, None | |
all_temps = [] | |
for day in temperatures: | |
all_temps.extend([day["high"], day["low"]]) | |
if not all_temps: | |
return None, None | |
return min(all_temps), max(all_temps) | |
def main(): | |
"""Main function to extract and print temperature extremes.""" | |
# Load city code from config | |
city_code = load_config() | |
# Determine month to fetch | |
if len(sys.argv) > 1: | |
# Month provided as command line argument | |
month = sys.argv[1] | |
# Validate month format | |
try: | |
datetime.strptime(month, "%Y-%m") | |
except ValueError: | |
print("Error: Month must be in YYYY-MM format", file=sys.stderr) | |
sys.exit(1) | |
else: | |
# Use last month as default | |
month = get_last_month() | |
# Fetch weather data | |
html_content = fetch_weather_data(city_code, month) | |
# Extract temperature data | |
temperatures = extract_temperatures(html_content) | |
if not temperatures: | |
print("Error: No temperature data found", file=sys.stderr) | |
sys.exit(1) | |
# Print daily temperatures in CSV format | |
for day in temperatures: | |
print(f"{day['low']},{day['high']}") | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beautifulsoup4 | |
configparser | |
requests |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment