Skip to content

Instantly share code, notes, and snippets.

@iqiancheng
Created April 28, 2026 09:22
Show Gist options
  • Select an option

  • Save iqiancheng/708203fc00ac9b3d4fe7bc8bf078f2bc to your computer and use it in GitHub Desktop.

Select an option

Save iqiancheng/708203fc00ac9b3d4fe7bc8bf078f2bc to your computer and use it in GitHub Desktop.
把微信账单Excel 转为CSV
# filename: wechat_bill_to_csv.py
import argparse
import os
import sys
from typing import List, Optional
import pandas as pd
import re
def clean_filename_suffix(name: str) -> str:
"""
Remove the pattern '——【解压密码可在微信支付公众号查看】' from filename.
"""
pattern = r'——【解压密码可在微信支付公众号查看】'
return re.sub(pattern, '', name)
def sanitize_filename(name: str) -> str:
safe = "".join(c if c.isalnum() or c in ("-", "_", " ", ".") else "_" for c in name.strip())
return safe or "sheet"
def read_excel_all_sheets(path: str) -> List[pd.DataFrame]:
"""
Read .xlsx or .xls, returning list of DataFrames (one per sheet).
Tries multiple engines to improve compatibility.
"""
dfs: List[pd.DataFrame] = []
# Try openpyxl first (xlsx)
try:
xl = pd.ExcelFile(path, engine="openpyxl")
for sheet in xl.sheet_names:
df = pd.read_excel(path, sheet_name=sheet, engine="openpyxl", dtype=object)
dfs.append(df.fillna(""))
return dfs
except Exception as e_openpyxl:
# Try xlrd (older .xls)
try:
xl = pd.ExcelFile(path, engine="xlrd")
for sheet in xl.sheet_names:
df = pd.read_excel(path, sheet_name=sheet, engine="xlrd", dtype=object)
dfs.append(df.fillna(""))
return dfs
except Exception as e_xlrd:
# Propagate original error if neither works
raise RuntimeError(f"Failed to read Excel with openpyxl ({e_openpyxl}) and xlrd ({e_xlrd}).")
def detect_and_load(path: str) -> List[tuple]:
"""
Detects the file type and returns list of (name, DataFrame).
Supports: CSV, Excel (.xlsx/.xls), HTML table (WeChat export).
"""
if not os.path.isfile(path):
raise FileNotFoundError(f"Input file not found: {path}")
ext = os.path.splitext(path)[1].lower()
# CSV
if ext in (".csv",):
df = pd.read_csv(path, dtype=object).fillna("")
return [("sheet", df)]
# Excel
if ext in (".xlsx", ".xls"):
try:
dfs = read_excel_all_sheets(path)
# name sheets for output
xl = pd.ExcelFile(path)
names = xl.sheet_names
return list(zip(names if names else ["sheet"], dfs))
except Exception as e:
# Fall through to HTML parse attempt
# Many "File is not a zip file" cases are actually HTML renamed to .xlsx
pass
# Try HTML parse regardless of extension
try:
tables = pd.read_html(path) # returns list of DataFrames
# If WeChat export has multiple tables, keep them all
named = []
for i, df in enumerate(tables):
named.append((f"table_{i+1}", df.astype(object).fillna("")))
if named:
return named
except Exception:
pass
# Try reading as text CSV with semicolon/comma autos
try:
df_try = pd.read_csv(path, dtype=object)
return [("sheet", df_try.fillna(""))]
except Exception:
pass
raise RuntimeError(
"Unsupported or corrupted file. If this is a WeChat export, try re-exporting as CSV, or save the web page as HTML and use that file."
)
def write_csvs(named_dfs: List[tuple], base_name: str, out_dir: str, encoding: str = "utf-8-sig") -> List[str]:
os.makedirs(out_dir, exist_ok=True)
paths = []
if len(named_dfs) == 1:
out_path = os.path.join(out_dir, f"{base_name}.csv")
named_dfs[0][1].astype(str).to_csv(out_path, index=False, encoding=encoding)
paths.append(out_path)
else:
for name, df in named_dfs:
safe = sanitize_filename(name)
out_path = os.path.join(out_dir, f"{base_name}__{safe}.csv")
df.astype(str).to_csv(out_path, index=False, encoding=encoding)
paths.append(out_path)
return paths
def main(argv: Optional[List[str]] = None):
parser = argparse.ArgumentParser(description="Convert WeChat bill export (xlsx/xls/csv/html) to CSV.")
parser.add_argument("input_path", help="Path to the WeChat bill file (.xlsx/.xls/.csv/.html)")
parser.add_argument("-o", "--out-dir", default=None, help="Output directory (default: same as input)")
parser.add_argument("-e", "--encoding", default="utf-8-sig", help="CSV encoding (default: utf-8-sig)")
args = parser.parse_args(argv)
input_path = args.input_path
out_dir = args.out_dir or os.path.dirname(os.path.abspath(input_path)) or os.getcwd()
# Clean the filename by removing the suffix pattern before generating base_name
input_basename = os.path.basename(input_path)
cleaned_basename = clean_filename_suffix(input_basename)
base_name = os.path.splitext(cleaned_basename)[0]
try:
named_dfs = detect_and_load(input_path)
paths = write_csvs(named_dfs, base_name, out_dir, args.encoding)
if len(paths) == 1:
print(f"CSV saved: {paths[0]}")
else:
print("CSV files saved:")
for p in paths:
print(f"- {p}")
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment