Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save erbanku/ab9481facbd6b8f393706b99bb18fd3c to your computer and use it in GitHub Desktop.
Save erbanku/ab9481facbd6b8f393706b99bb18fd3c to your computer and use it in GitHub Desktop.
Update Mathpix Notes archive online images to local (including updating links in the file).
import os
import re
import requests
from urllib.parse import urlparse
def download_file(url, folder):
parsed_url = urlparse(url)
filename = os.path.basename(parsed_url.path)
base, ext = os.path.splitext(filename)
local_path = os.path.join(folder, filename)
i = 1
while os.path.exists(local_path):
filename = f"{base}_{i}{ext}"
local_path = os.path.join(folder, filename)
i += 1
try:
resp = requests.get(url, stream=True, timeout=15)
resp.raise_for_status()
with open(local_path, "wb") as f:
for chunk in resp.iter_content(1024):
f.write(chunk)
print(f"Downloaded: {url} -> {local_path}")
return local_path
except Exception as e:
print(f"Failed to download {url}: {e}")
return None
def update_markdown(md_path, assets_dir):
with open(md_path, "r", encoding="utf-8") as file:
content = file.read()
pattern_md = re.compile(r"!\[.*?\]\((https?://[^\s)]+)\)")
pattern_html = re.compile(r'<img\s+[^>]*src="(https?://[^"]+)"')
urls = set(pattern_md.findall(content) + pattern_html.findall(content))
if not urls:
print(f" No remote images found in {md_path}")
return
url_to_local = {}
for url in urls:
local_file = download_file(url, assets_dir)
if local_file:
rel_path = "assets/" + os.path.basename(local_file)
url_to_local[url] = rel_path
for url, rel_path in url_to_local.items():
content = content.replace(url, rel_path)
print(f" Replaced {url} -> {rel_path}")
with open(md_path, "w", encoding="utf-8") as file:
file.write(content)
def process_all_markdowns(notes_dir="notes"):
assets_dir = os.path.join(notes_dir, "assets")
if not os.path.exists(assets_dir):
os.makedirs(assets_dir, exist_ok=True)
any_found = False
for fname in os.listdir(notes_dir):
md_path = os.path.join(notes_dir, fname)
if os.path.isfile(md_path) and fname.endswith(".mmd"):
print(f"Processing {md_path} ...")
update_markdown(md_path, assets_dir)
any_found = True
if not any_found:
print(f"No .mmd files found in {notes_dir}!")
print("全部处理完成!")
if __name__ == "__main__":
import sys
# 支持命令行参数指定目录
if len(sys.argv) > 1:
process_all_markdowns(sys.argv[1])
else:
process_all_markdowns()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment