Last active
January 3, 2025 20:58
-
-
Save maikol-solis/fbf7afbf541dbe11f4ffc55cfec0472d to your computer and use it in GitHub Desktop.
org-roam to denote converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This script reads and transforms Org-mode files by analyzing the filename | |
and its contents. It determines whether the file has a date-based signature | |
(e.g., 20220225T070414) or an alphanumeric signature (e.g., 1c3a). If it’s | |
date-based, that signature is directly used as the “Denote ID” and the rest of | |
the filename is treated as a descriptive part. If it’s alphanumeric, the script | |
converts letters into =N= form, parses the #+DATE: property if present (or uses | |
the current time), and constructs a new Denote ID. The script then rewrites the | |
file in a standardized format. It updates the front matter (properties, title, | |
date, etc.), preserves certain metadata (like :Time-stamp:), and removes old | |
metadata lines such as the old :PROPERTIES: block, #+TITLE:, and #+DATE: | |
(commented out). Finally, it saves the file under a new name reflecting the | |
transformed signature and/or Denote ID. | |
""" | |
import re | |
import os | |
from datetime import datetime | |
def letter_to_alphabet_index(matchobj): | |
""" | |
Convert a single letter (a-z) to =N=, where N is its 1-based alphabet index. | |
E.g., 'a' -> '=1=', 'c' -> '=3='. | |
""" | |
letter = matchobj.group(0).lower() | |
position = ord(letter) - ord("a") + 1 | |
return f"={position}=" | |
def convert_alphanumeric_signature(old_sig): | |
""" | |
Convert something like '1c3a' -> '1=3=3=1'. | |
- Digits stay digits | |
- Letters become '=N=' | |
Then remove a trailing '=' if it appears (e.g. '1=1=' -> '1=1'). | |
""" | |
pattern_letters = re.compile(r"[A-Za-z]") | |
new_sig = pattern_letters.sub(letter_to_alphabet_index, old_sig) | |
new_sig = re.sub(r"=$", "", new_sig) # remove trailing '=' if any | |
return new_sig | |
def parse_date_from_org(content): | |
""" | |
Attempt to find #+DATE: [YYYY-mm-dd ... HH:MM(:SS)?] | |
Return (raw_str, datetime_obj). | |
If not found or cannot parse, return (None, None). | |
Note: This is only for the #+date: property in the file content, | |
NOT for the final filename if the file is date-based (2022...). | |
""" | |
date_pattern = re.compile(r"^#\+DATE:\s*\[(.*?)\]", re.IGNORECASE | re.MULTILINE) | |
for line in content.splitlines(): | |
match = date_pattern.match(line.strip()) | |
if match: | |
raw_date_str = match.group(1) | |
dt_formats = [ | |
"%Y-%m-%d %a %H:%M:%S", # e.g. 2022-03-17 Thu 15:47:00 | |
"%Y-%m-%d %a %H:%M", # e.g. 2022-03-17 Thu 15:47 | |
"%Y-%m-%d %H:%M:%S", # e.g. 2022-03-17 15:47:00 | |
"%Y-%m-%d %H:%M", # e.g. 2022-03-17 15:47 | |
] | |
for fmt in dt_formats: | |
try: | |
dt_object = datetime.strptime(raw_date_str, fmt) | |
return raw_date_str, dt_object | |
except ValueError: | |
pass | |
return None, None | |
def datetime_to_denote_id(dt): | |
""" | |
Convert a Python datetime into YYYYmmddTHHMMSS string. | |
If dt is None, we use the current time. | |
""" | |
if dt is None: | |
dt = datetime.now() | |
return dt.strftime("%Y%m%dT%H%M%S") | |
def transform_org_file(old_filename): | |
""" | |
1) Identify old signature vs. descriptive part. | |
2) If old signature matches YYYYmmddTHHMMSS, then: | |
- Use that as the Denote ID | |
- Use an EMPTY new signature | |
- e.g. --<descr>@@<that_date>.org | |
Otherwise, we do the normal alphanumeric->=N= conversion and parse #+DATE:. | |
3) Convert underscores in descriptive part -> hyphens. | |
4) Rewrite content with updated front matter. | |
""" | |
base = os.path.basename(old_filename) | |
base_no_ext, ext = os.path.splitext(base) | |
if ext.lower() != ".org": | |
print(f"Skipping non-Org file: {old_filename}") | |
return | |
# Read original content | |
with open(old_filename, "r", encoding="utf-8") as f: | |
original_content = f.read() | |
# Figure out the "old_signature" vs. the descriptive part: | |
# e.g. "20220225T070414_significance_testing.org" | |
# e.g. "1c3a_principal_component_analysis.org" | |
if "_" in base_no_ext: | |
old_sig, old_descr = base_no_ext.split("_", 1) | |
elif "-" in base_no_ext: | |
# If no underscore, fallback to dash | |
old_sig, old_descr = base_no_ext.split("-", 1) | |
else: | |
old_sig = base_no_ext | |
old_descr = base_no_ext | |
# Decide if old_sig is date-based: e.g. "20220225T070414" | |
if re.match(r"^\d{8}T\d{6}$", old_sig): | |
# We use old_sig as the Denote ID | |
new_signature = "" # empty | |
denote_id = old_sig | |
# For the final name, we do: "--<descr>@@<denote_id>.org" | |
# We'll ignore #+DATE: for the final name in this scenario. | |
# But let's still parse #+DATE: for the #+date: property inside the file. | |
raw_date_str, dt_object = parse_date_from_org(original_content) | |
if not raw_date_str: | |
raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S") | |
# Build final filename | |
new_descr = old_descr.replace("_", "-") | |
new_filename = f"--{new_descr}@@{denote_id}.org" | |
else: | |
# This is the older "alphanumeric" style, e.g. "1c3a" | |
# -> convert letters -> "=N=", | |
# parse #+DATE: for Denote ID | |
new_signature = convert_alphanumeric_signature(old_sig) | |
new_descr = old_descr.replace("_", "-") | |
# parse #+DATE: to get dt for Denote ID | |
raw_date_str, dt_object = parse_date_from_org(original_content) | |
denote_id = datetime_to_denote_id(dt_object) | |
if not raw_date_str: | |
raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S") | |
# Build final name as normal | |
new_filename = f"=={new_signature}--{new_descr}@@{denote_id}.org" | |
# Parse #+TITLE from content | |
title_pattern = re.compile(r"^#\+TITLE:\s*(.*)$", re.IGNORECASE | re.MULTILINE) | |
tm = title_pattern.search(original_content) | |
if tm: | |
file_title = tm.group(1).strip() | |
else: | |
# fallback: use descriptive part for title | |
file_title = old_descr.replace("-", " ").title() | |
# Preserve any :Time-stamp: line | |
time_stamp_pattern = re.compile( | |
r"^:Time-stamp:\s*(<.*>)", re.MULTILINE | re.IGNORECASE | |
) | |
ts_match = time_stamp_pattern.search(original_content) | |
ts_value = ts_match.group(1) if ts_match else None | |
# Build new :PROPERTIES: block | |
new_props = ":PROPERTIES:\n:ID: {}\n".format( | |
new_signature if new_signature else "" | |
) | |
if ts_value: | |
new_props += f":Time-stamp: {ts_value}\n" | |
new_props += ":END:\n" | |
# Build front matter | |
# If the signature is empty, we might display it as "#+subtitle: " anyway. | |
new_front_matter = ( | |
f"{new_props}" | |
f"#+title: {file_title}\n" | |
f"#+subtitle: {new_signature}\n" | |
f"#+date: [{raw_date_str}]\n" | |
f"#+filetags:\n" | |
f"#+identifier: {denote_id}\n" | |
f"#+signature: {new_signature}\n\n" | |
) | |
# Remove old property blocks, #+TITLE, etc. | |
content_cleaned = re.sub( | |
r":PROPERTIES:.*?:END:\s*", "", original_content, flags=re.DOTALL | |
) | |
content_cleaned = re.sub( | |
r"^#\+TITLE:.*$", "", content_cleaned, flags=re.MULTILINE | re.IGNORECASE | |
) | |
# optionally comment out old #+DATE: | |
content_cleaned = re.sub( | |
r"^#\+DATE:.*$", "; \\0", content_cleaned, flags=re.MULTILINE | re.IGNORECASE | |
) | |
final_content = new_front_matter + content_cleaned.strip() + "\n" | |
# Write the new file | |
with open(new_filename, "w", encoding="utf-8") as fw: | |
fw.write(final_content) | |
print(f"Renamed '{old_filename}' -> '{new_filename}'") | |
def main(): | |
for fname in os.listdir("../2_ZK/"): | |
if fname.endswith(".org"): | |
transform_org_file("../2_ZK/" + fname) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment