Created
April 28, 2026 09:22
-
-
Save iqiancheng/708203fc00ac9b3d4fe7bc8bf078f2bc to your computer and use it in GitHub Desktop.
把微信账单Excel 转为CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # filename: wechat_bill_to_csv.py | |
| import argparse | |
| import os | |
| import sys | |
| from typing import List, Optional | |
| import pandas as pd | |
| import re | |
| def clean_filename_suffix(name: str) -> str: | |
| """ | |
| Remove the pattern '——【解压密码可在微信支付公众号查看】' from filename. | |
| """ | |
| pattern = r'——【解压密码可在微信支付公众号查看】' | |
| return re.sub(pattern, '', name) | |
| def sanitize_filename(name: str) -> str: | |
| safe = "".join(c if c.isalnum() or c in ("-", "_", " ", ".") else "_" for c in name.strip()) | |
| return safe or "sheet" | |
| def read_excel_all_sheets(path: str) -> List[pd.DataFrame]: | |
| """ | |
| Read .xlsx or .xls, returning list of DataFrames (one per sheet). | |
| Tries multiple engines to improve compatibility. | |
| """ | |
| dfs: List[pd.DataFrame] = [] | |
| # Try openpyxl first (xlsx) | |
| try: | |
| xl = pd.ExcelFile(path, engine="openpyxl") | |
| for sheet in xl.sheet_names: | |
| df = pd.read_excel(path, sheet_name=sheet, engine="openpyxl", dtype=object) | |
| dfs.append(df.fillna("")) | |
| return dfs | |
| except Exception as e_openpyxl: | |
| # Try xlrd (older .xls) | |
| try: | |
| xl = pd.ExcelFile(path, engine="xlrd") | |
| for sheet in xl.sheet_names: | |
| df = pd.read_excel(path, sheet_name=sheet, engine="xlrd", dtype=object) | |
| dfs.append(df.fillna("")) | |
| return dfs | |
| except Exception as e_xlrd: | |
| # Propagate original error if neither works | |
| raise RuntimeError(f"Failed to read Excel with openpyxl ({e_openpyxl}) and xlrd ({e_xlrd}).") | |
| def detect_and_load(path: str) -> List[tuple]: | |
| """ | |
| Detects the file type and returns list of (name, DataFrame). | |
| Supports: CSV, Excel (.xlsx/.xls), HTML table (WeChat export). | |
| """ | |
| if not os.path.isfile(path): | |
| raise FileNotFoundError(f"Input file not found: {path}") | |
| ext = os.path.splitext(path)[1].lower() | |
| # CSV | |
| if ext in (".csv",): | |
| df = pd.read_csv(path, dtype=object).fillna("") | |
| return [("sheet", df)] | |
| # Excel | |
| if ext in (".xlsx", ".xls"): | |
| try: | |
| dfs = read_excel_all_sheets(path) | |
| # name sheets for output | |
| xl = pd.ExcelFile(path) | |
| names = xl.sheet_names | |
| return list(zip(names if names else ["sheet"], dfs)) | |
| except Exception as e: | |
| # Fall through to HTML parse attempt | |
| # Many "File is not a zip file" cases are actually HTML renamed to .xlsx | |
| pass | |
| # Try HTML parse regardless of extension | |
| try: | |
| tables = pd.read_html(path) # returns list of DataFrames | |
| # If WeChat export has multiple tables, keep them all | |
| named = [] | |
| for i, df in enumerate(tables): | |
| named.append((f"table_{i+1}", df.astype(object).fillna(""))) | |
| if named: | |
| return named | |
| except Exception: | |
| pass | |
| # Try reading as text CSV with semicolon/comma autos | |
| try: | |
| df_try = pd.read_csv(path, dtype=object) | |
| return [("sheet", df_try.fillna(""))] | |
| except Exception: | |
| pass | |
| raise RuntimeError( | |
| "Unsupported or corrupted file. If this is a WeChat export, try re-exporting as CSV, or save the web page as HTML and use that file." | |
| ) | |
| def write_csvs(named_dfs: List[tuple], base_name: str, out_dir: str, encoding: str = "utf-8-sig") -> List[str]: | |
| os.makedirs(out_dir, exist_ok=True) | |
| paths = [] | |
| if len(named_dfs) == 1: | |
| out_path = os.path.join(out_dir, f"{base_name}.csv") | |
| named_dfs[0][1].astype(str).to_csv(out_path, index=False, encoding=encoding) | |
| paths.append(out_path) | |
| else: | |
| for name, df in named_dfs: | |
| safe = sanitize_filename(name) | |
| out_path = os.path.join(out_dir, f"{base_name}__{safe}.csv") | |
| df.astype(str).to_csv(out_path, index=False, encoding=encoding) | |
| paths.append(out_path) | |
| return paths | |
| def main(argv: Optional[List[str]] = None): | |
| parser = argparse.ArgumentParser(description="Convert WeChat bill export (xlsx/xls/csv/html) to CSV.") | |
| parser.add_argument("input_path", help="Path to the WeChat bill file (.xlsx/.xls/.csv/.html)") | |
| parser.add_argument("-o", "--out-dir", default=None, help="Output directory (default: same as input)") | |
| parser.add_argument("-e", "--encoding", default="utf-8-sig", help="CSV encoding (default: utf-8-sig)") | |
| args = parser.parse_args(argv) | |
| input_path = args.input_path | |
| out_dir = args.out_dir or os.path.dirname(os.path.abspath(input_path)) or os.getcwd() | |
| # Clean the filename by removing the suffix pattern before generating base_name | |
| input_basename = os.path.basename(input_path) | |
| cleaned_basename = clean_filename_suffix(input_basename) | |
| base_name = os.path.splitext(cleaned_basename)[0] | |
| try: | |
| named_dfs = detect_and_load(input_path) | |
| paths = write_csvs(named_dfs, base_name, out_dir, args.encoding) | |
| if len(paths) == 1: | |
| print(f"CSV saved: {paths[0]}") | |
| else: | |
| print("CSV files saved:") | |
| for p in paths: | |
| print(f"- {p}") | |
| except Exception as e: | |
| print(f"Error: {e}", file=sys.stderr) | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment