Last active
May 22, 2025 10:07
-
-
Save erbanku/ab9481facbd6b8f393706b99bb18fd3c to your computer and use it in GitHub Desktop.
Update Mathpix Notes archive online images to local (including updating links in the file).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import requests | |
from urllib.parse import urlparse | |
def download_file(url, folder): | |
parsed_url = urlparse(url) | |
filename = os.path.basename(parsed_url.path) | |
base, ext = os.path.splitext(filename) | |
local_path = os.path.join(folder, filename) | |
i = 1 | |
while os.path.exists(local_path): | |
filename = f"{base}_{i}{ext}" | |
local_path = os.path.join(folder, filename) | |
i += 1 | |
try: | |
resp = requests.get(url, stream=True, timeout=15) | |
resp.raise_for_status() | |
with open(local_path, "wb") as f: | |
for chunk in resp.iter_content(1024): | |
f.write(chunk) | |
print(f"Downloaded: {url} -> {local_path}") | |
return local_path | |
except Exception as e: | |
print(f"Failed to download {url}: {e}") | |
return None | |
def update_markdown(md_path, assets_dir): | |
with open(md_path, "r", encoding="utf-8") as file: | |
content = file.read() | |
pattern_md = re.compile(r"!\[.*?\]\((https?://[^\s)]+)\)") | |
pattern_html = re.compile(r'<img\s+[^>]*src="(https?://[^"]+)"') | |
urls = set(pattern_md.findall(content) + pattern_html.findall(content)) | |
if not urls: | |
print(f" No remote images found in {md_path}") | |
return | |
url_to_local = {} | |
for url in urls: | |
local_file = download_file(url, assets_dir) | |
if local_file: | |
rel_path = "assets/" + os.path.basename(local_file) | |
url_to_local[url] = rel_path | |
for url, rel_path in url_to_local.items(): | |
content = content.replace(url, rel_path) | |
print(f" Replaced {url} -> {rel_path}") | |
with open(md_path, "w", encoding="utf-8") as file: | |
file.write(content) | |
def process_all_markdowns(notes_dir="notes"): | |
assets_dir = os.path.join(notes_dir, "assets") | |
if not os.path.exists(assets_dir): | |
os.makedirs(assets_dir, exist_ok=True) | |
any_found = False | |
for fname in os.listdir(notes_dir): | |
md_path = os.path.join(notes_dir, fname) | |
if os.path.isfile(md_path) and fname.endswith(".mmd"): | |
print(f"Processing {md_path} ...") | |
update_markdown(md_path, assets_dir) | |
any_found = True | |
if not any_found: | |
print(f"No .mmd files found in {notes_dir}!") | |
print("全部处理完成!") | |
if __name__ == "__main__": | |
import sys | |
# 支持命令行参数指定目录 | |
if len(sys.argv) > 1: | |
process_all_markdowns(sys.argv[1]) | |
else: | |
process_all_markdowns() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment