Last active
November 12, 2024 09:42
-
-
Save SchneiderSam/6c3af18d5d5a4a9b9024d9f6f02f888a to your computer and use it in GitHub Desktop.
Markdown Note Exporter: Recursively Scan and Extract Document IDs and Headings to Create Notes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
# Alias-Wörterbuch für Bibelbücher | |
bible_aliases = { | |
"1. Mose": ["1. Mose", "1Mo", "1. Mo", "1 Mo", "Genesis"], | |
"2. Mose": ["2. Mose", "2Mo", "2. Mo", "2 Mo", "Exodus"], | |
"3. Mose": ["3. Mose", "3Mo", "3. Mo", "3 Mo", "Levitikus"], | |
"4. Mose": ["4. Mose", "4Mo", "4. Mo", "4 Mo", "Numeri"], | |
"5. Mose": ["5. Mose", "5Mo", "5. Mo", "5 Mo", "Deuteronomium"], | |
"Josua": ["Josua", "Jos"], | |
"Richter": ["Richter", "Ri"], | |
"Ruth": ["Ruth", "Ru"], | |
"1. Samuel": ["1. Samuel", "1Sa", "1. Sa", "1 Sam"], | |
"2. Samuel": ["2. Samuel", "2Sam", "2. Sam", "2 Sam"], | |
"1. Könige": ["1. Könige", "1Kö", "1. Kö", "1 Kön"], | |
"2. Könige": ["2. Könige", "2Kö", "2. Kö", "2 Kön"], | |
"1. Chronika": ["1. Chronika", "1Chr", "1. Chr", "1 Chr"], | |
"2. Chronika": ["2. Chronika", "2Chr", "2. Chr", "2 Chr"], | |
"Esra": ["Esra", "Esr"], | |
"Nehemia": ["Nehemia", "Ne"], | |
"Esther": ["Esther", "Est"], | |
"Hiob": ["Hiob", "Hi"], | |
"Psalmen": ["Psalmen", "Ps"], | |
"Sprüche": ["Sprüche", "Spr"], | |
"Prediger": ["Prediger", "Pr"], | |
"Das Hohelied": ["Das Hohelied", "Das Lied Salomos", "Hoh"], | |
"Jesaja": ["Jesaja", "Jes"], | |
"Jeremia": ["Jeremia", "Jer"], | |
"Klagelieder": ["Klagelieder", "Klg"], | |
"Hesekiel": ["Hesekiel", "Hes"], | |
"Daniel": ["Daniel", "Da"], | |
"Hosea": ["Hosea", "Hos"], | |
"Joel": ["Joel"], | |
"Amos": ["Amos", "Am"], | |
"Obadja": ["Obadja", "Ob"], | |
"Jona": ["Jona", "Jon"], | |
"Micha": ["Micha", "Mi"], | |
"Nahum": ["Nahum", "Nah"], | |
"Habakuk": ["Habakuk", "Hab"], | |
"Zephanja": ["Zephanja", "Ze"], | |
"Haggai": ["Haggai", "Hag"], | |
"Sacharja": ["Sacharja", "Sach"], | |
"Maleachi": ["Maleachi", "Mal"], | |
"Matthäus": ["Matthäus", "Mat"], | |
"Markus": ["Markus", "Mar"], | |
"Lukas": ["Lukas", "Luk"], | |
"Johannes": ["Johannes", "Joh"], | |
"Apostelgeschichte": ["Apostelgeschichte", "Apg"], | |
"Römer": ["Römer", "Rö"], | |
"1. Korinther": ["1. Korinther", "1Kor", "1. Kor", "1 Kor"], | |
"2. Korinther": ["2. Korinther", "2Kor", "2. Kor", "2 Kor"], | |
"Galater": ["Galater", "Gal"], | |
"Epheser": ["Epheser", "Eph"], | |
"Philipper": ["Philipper", "Phil"], | |
"Kolosser": ["Kolosser", "Kol"], | |
"1. Thessalonicher": ["1. Thessalonicher", "1Thess", "1. Thess", "1 Thess"], | |
"2. Thessalonicher": ["2. Thessalonicher", "2Thess", "2. Thess", "2 Thess"], | |
"1. Timotheus": ["1. Timotheus", "1Tim", "1. Tim", "1 Tim"], | |
"2. Timotheus": ["2. Timotheus", "2Tim", "2. Tim", "2 Tim"], | |
"Titus": ["Titus", "Tit"], | |
"Philemon": ["Philemon", "Phlm"], | |
"Hebräer": ["Hebräer", "Heb"], | |
"Jakobus": ["Jakobus", "Jak"], | |
"1. Petrus": ["1. Petrus", "1Petr", "1. Petr", "1 Petr"], | |
"2. Petrus": ["2. Petrus", "2Petr", "2. Petr", "2 Petr"], | |
"1. Johannes": ["1. Johannes", "1Joh", "1. Joh", "1 Joh"], | |
"2. Johannes": ["2. Johannes", "2Joh", "2. Joh", "2 Joh"], | |
"3. Johannes": ["3. Johannes", "3Joh", "3. Joh", "3 Joh"], | |
"Judas": ["Judas", "Jud"], | |
"Offenbarung": ["Offenbarung", "Off"] | |
} | |
def extract_info(content): | |
"""Extract document ID, heading, and link from the content of a Markdown file.""" | |
document_match = re.search(r'document:\s*"\[\[(.*?)\]\]"', content) | |
heading_match = re.search(r'heading:\s*(.*?)\n', content) | |
link_match = re.search(r'link:\s*(https?://[^\s]+)', content) | |
document_id = document_match.group(1) if document_match else None | |
heading = heading_match.group(1) if heading_match else None | |
link = link_match.group(1) if link_match else None | |
return document_id, heading, link | |
def get_aliases(heading): | |
"""Get alias list based on the book name in the heading, combined with chapter and verse.""" | |
print(f"Input heading: {heading}") | |
# Extract the book name and chapter/verse information | |
match = re.match(r"([^0-9]+)\s(\d+:\d+)", heading) | |
if not match: | |
print(f"No match found for heading: {heading}") | |
return [heading] | |
book = match.group(1).strip() | |
chapter_verse = match.group(2) | |
print(f"Extracted book: {book}") | |
print(f"Extracted chapter_verse: {chapter_verse}") | |
# Get the base aliases for the book | |
base_book = None | |
for key, aliases in bible_aliases.items(): | |
if book in aliases: | |
base_book = key | |
break | |
if base_book: | |
aliases_with_chapter_verse = [f"{alias} {chapter_verse}" for alias in bible_aliases[base_book]] | |
print(f"Generated aliases: {aliases_with_chapter_verse}") | |
else: | |
aliases_with_chapter_verse = [heading] | |
print(f"No base book found, using original heading: {heading}") | |
return aliases_with_chapter_verse | |
def create_new_note(export_folder_path, document_id, heading, link): | |
"""Create a new Markdown note in the export folder with the specified properties.""" | |
os.makedirs(export_folder_path, exist_ok=True) | |
# Get the aliases based on the book name and chapter/verse in the heading | |
aliases = get_aliases(heading) | |
new_filename = os.path.join(export_folder_path, f"{document_id}.md") | |
new_content = f"""--- | |
tags: | |
- 📝-type/vers | |
aliases: | |
""" | |
for alias in aliases: | |
new_content += f" - {alias}\n" | |
new_content += f""" | |
link: {link} | |
related: | |
created: | |
last modified: | |
--- | |
""" | |
with open(new_filename, 'w', encoding='utf-8') as file: | |
file.write(new_content) | |
def process_folder(folder_path, export_folder_path): | |
"""Recursively scan all Markdown files in the folder and its subfolders, and create new notes in the export folder.""" | |
for root, _, files in os.walk(folder_path): | |
for filename in files: | |
if filename.endswith(".md"): | |
file_path = os.path.join(root, filename) | |
with open(file_path, 'r', encoding='utf-8') as file: | |
content = file.read() | |
document_id, heading, link = extract_info(content) | |
if document_id and heading and link: | |
aliases = get_aliases(heading) | |
print(f"Debug - Aliases for {heading}: {aliases}") # Add this debug line | |
create_new_note(export_folder_path, document_id, heading, link) | |
# Pfade für den Quell- und Export-Ordner | |
folder_path = r"C:\Users\..." | |
export_folder_path = r"C:\Users\..." | |
process_folder(folder_path, export_folder_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Script only produces 1 aliases