Skip to content

Instantly share code, notes, and snippets.

@maikol-solis
Last active January 3, 2025 20:58
Show Gist options
  • Save maikol-solis/fbf7afbf541dbe11f4ffc55cfec0472d to your computer and use it in GitHub Desktop.
Save maikol-solis/fbf7afbf541dbe11f4ffc55cfec0472d to your computer and use it in GitHub Desktop.
org-roam to denote converter
"""
This script reads and transforms Org-mode files by analyzing the filename
and its contents. It determines whether the file has a date-based signature
(e.g., 20220225T070414) or an alphanumeric signature (e.g., 1c3a). If it’s
date-based, that signature is directly used as the “Denote ID” and the rest of
the filename is treated as a descriptive part. If it’s alphanumeric, the script
converts letters into =N= form, parses the #+DATE: property if present (or uses
the current time), and constructs a new Denote ID. The script then rewrites the
file in a standardized format. It updates the front matter (properties, title,
date, etc.), preserves certain metadata (like :Time-stamp:), and removes old
metadata lines such as the old :PROPERTIES: block, #+TITLE:, and #+DATE:
(commented out). Finally, it saves the file under a new name reflecting the
transformed signature and/or Denote ID.
"""
import re
import os
from datetime import datetime
def letter_to_alphabet_index(matchobj):
"""
Convert a single letter (a-z) to =N=, where N is its 1-based alphabet index.
E.g., 'a' -> '=1=', 'c' -> '=3='.
"""
letter = matchobj.group(0).lower()
position = ord(letter) - ord("a") + 1
return f"={position}="
def convert_alphanumeric_signature(old_sig):
"""
Convert something like '1c3a' -> '1=3=3=1'.
- Digits stay digits
- Letters become '=N='
Then remove a trailing '=' if it appears (e.g. '1=1=' -> '1=1').
"""
pattern_letters = re.compile(r"[A-Za-z]")
new_sig = pattern_letters.sub(letter_to_alphabet_index, old_sig)
new_sig = re.sub(r"=$", "", new_sig) # remove trailing '=' if any
return new_sig
def parse_date_from_org(content):
"""
Attempt to find #+DATE: [YYYY-mm-dd ... HH:MM(:SS)?]
Return (raw_str, datetime_obj).
If not found or cannot parse, return (None, None).
Note: This is only for the #+date: property in the file content,
NOT for the final filename if the file is date-based (2022...).
"""
date_pattern = re.compile(r"^#\+DATE:\s*\[(.*?)\]", re.IGNORECASE | re.MULTILINE)
for line in content.splitlines():
match = date_pattern.match(line.strip())
if match:
raw_date_str = match.group(1)
dt_formats = [
"%Y-%m-%d %a %H:%M:%S", # e.g. 2022-03-17 Thu 15:47:00
"%Y-%m-%d %a %H:%M", # e.g. 2022-03-17 Thu 15:47
"%Y-%m-%d %H:%M:%S", # e.g. 2022-03-17 15:47:00
"%Y-%m-%d %H:%M", # e.g. 2022-03-17 15:47
]
for fmt in dt_formats:
try:
dt_object = datetime.strptime(raw_date_str, fmt)
return raw_date_str, dt_object
except ValueError:
pass
return None, None
def datetime_to_denote_id(dt):
"""
Convert a Python datetime into YYYYmmddTHHMMSS string.
If dt is None, we use the current time.
"""
if dt is None:
dt = datetime.now()
return dt.strftime("%Y%m%dT%H%M%S")
def transform_org_file(old_filename):
"""
1) Identify old signature vs. descriptive part.
2) If old signature matches YYYYmmddTHHMMSS, then:
- Use that as the Denote ID
- Use an EMPTY new signature
- e.g. --<descr>@@<that_date>.org
Otherwise, we do the normal alphanumeric->=N= conversion and parse #+DATE:.
3) Convert underscores in descriptive part -> hyphens.
4) Rewrite content with updated front matter.
"""
base = os.path.basename(old_filename)
base_no_ext, ext = os.path.splitext(base)
if ext.lower() != ".org":
print(f"Skipping non-Org file: {old_filename}")
return
# Read original content
with open(old_filename, "r", encoding="utf-8") as f:
original_content = f.read()
# Figure out the "old_signature" vs. the descriptive part:
# e.g. "20220225T070414_significance_testing.org"
# e.g. "1c3a_principal_component_analysis.org"
if "_" in base_no_ext:
old_sig, old_descr = base_no_ext.split("_", 1)
elif "-" in base_no_ext:
# If no underscore, fallback to dash
old_sig, old_descr = base_no_ext.split("-", 1)
else:
old_sig = base_no_ext
old_descr = base_no_ext
# Decide if old_sig is date-based: e.g. "20220225T070414"
if re.match(r"^\d{8}T\d{6}$", old_sig):
# We use old_sig as the Denote ID
new_signature = "" # empty
denote_id = old_sig
# For the final name, we do: "--<descr>@@<denote_id>.org"
# We'll ignore #+DATE: for the final name in this scenario.
# But let's still parse #+DATE: for the #+date: property inside the file.
raw_date_str, dt_object = parse_date_from_org(original_content)
if not raw_date_str:
raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S")
# Build final filename
new_descr = old_descr.replace("_", "-")
new_filename = f"--{new_descr}@@{denote_id}.org"
else:
# This is the older "alphanumeric" style, e.g. "1c3a"
# -> convert letters -> "=N=",
# parse #+DATE: for Denote ID
new_signature = convert_alphanumeric_signature(old_sig)
new_descr = old_descr.replace("_", "-")
# parse #+DATE: to get dt for Denote ID
raw_date_str, dt_object = parse_date_from_org(original_content)
denote_id = datetime_to_denote_id(dt_object)
if not raw_date_str:
raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S")
# Build final name as normal
new_filename = f"=={new_signature}--{new_descr}@@{denote_id}.org"
# Parse #+TITLE from content
title_pattern = re.compile(r"^#\+TITLE:\s*(.*)$", re.IGNORECASE | re.MULTILINE)
tm = title_pattern.search(original_content)
if tm:
file_title = tm.group(1).strip()
else:
# fallback: use descriptive part for title
file_title = old_descr.replace("-", " ").title()
# Preserve any :Time-stamp: line
time_stamp_pattern = re.compile(
r"^:Time-stamp:\s*(<.*>)", re.MULTILINE | re.IGNORECASE
)
ts_match = time_stamp_pattern.search(original_content)
ts_value = ts_match.group(1) if ts_match else None
# Build new :PROPERTIES: block
new_props = ":PROPERTIES:\n:ID: {}\n".format(
new_signature if new_signature else ""
)
if ts_value:
new_props += f":Time-stamp: {ts_value}\n"
new_props += ":END:\n"
# Build front matter
# If the signature is empty, we might display it as "#+subtitle: " anyway.
new_front_matter = (
f"{new_props}"
f"#+title: {file_title}\n"
f"#+subtitle: {new_signature}\n"
f"#+date: [{raw_date_str}]\n"
f"#+filetags:\n"
f"#+identifier: {denote_id}\n"
f"#+signature: {new_signature}\n\n"
)
# Remove old property blocks, #+TITLE, etc.
content_cleaned = re.sub(
r":PROPERTIES:.*?:END:\s*", "", original_content, flags=re.DOTALL
)
content_cleaned = re.sub(
r"^#\+TITLE:.*$", "", content_cleaned, flags=re.MULTILINE | re.IGNORECASE
)
# optionally comment out old #+DATE:
content_cleaned = re.sub(
r"^#\+DATE:.*$", "; \\0", content_cleaned, flags=re.MULTILINE | re.IGNORECASE
)
final_content = new_front_matter + content_cleaned.strip() + "\n"
# Write the new file
with open(new_filename, "w", encoding="utf-8") as fw:
fw.write(final_content)
print(f"Renamed '{old_filename}' -> '{new_filename}'")
def main():
for fname in os.listdir("../2_ZK/"):
if fname.endswith(".org"):
transform_org_file("../2_ZK/" + fname)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment