maikol-solis · January 3, 2025 20:58
diff --git a/converter_org_roam_to_denote.py b/converter_org_roam_to_denote.py
 """
 This script reads and transforms Org-mode files by analyzing the filename
 and its contents. It determines whether the file has a date-based signature
 (e.g., 20220225T070414) or an alphanumeric signature (e.g., 1c3a). If it’s
 date-based, that signature is directly used as the “Denote ID” and the rest of
 the filename is treated as a descriptive part. If it’s alphanumeric, the script
 converts letters into =N= form, parses the #+DATE: property if present (or uses
 the current time), and constructs a new Denote ID. The script then rewrites the
 file in a standardized format. It updates the front matter (properties, title,
 date, etc.), preserves certain metadata (like :Time-stamp:), and removes old
 metadata lines such as the old :PROPERTIES: block, #+TITLE:, and #+DATE:
 (commented out). Finally, it saves the file under a new name reflecting the
 transformed signature and/or Denote ID.
 """

 import re
 import os
 from datetime import datetime


 def letter_to_alphabet_index(matchobj):
    """
    Convert a single letter (a-z) to =N=, where N is its 1-based alphabet index.
    E.g., 'a' -> '=1=', 'c' -> '=3='.
    """
    letter = matchobj.group(0).lower()
    position = ord(letter) - ord("a") + 1
    return f"={position}="


 def convert_alphanumeric_signature(old_sig):
    """
    Convert something like '1c3a' -> '1=3=3=1'.
    - Digits stay digits
    - Letters become '=N='
    Then remove a trailing '=' if it appears (e.g. '1=1=' -> '1=1').
    """
    pattern_letters = re.compile(r"[A-Za-z]")
    new_sig = pattern_letters.sub(letter_to_alphabet_index, old_sig)
    new_sig = re.sub(r"=$", "", new_sig)  # remove trailing '=' if any
    return new_sig


 def parse_date_from_org(content):
    """
    Attempt to find #+DATE: [YYYY-mm-dd ... HH:MM(:SS)?]
    Return (raw_str, datetime_obj).
    If not found or cannot parse, return (None, None).

    Note: This is only for the #+date: property in the file content,
    NOT for the final filename if the file is date-based (2022...).
    """
    date_pattern = re.compile(r"^#\+DATE:\s*\[(.*?)\]", re.IGNORECASE | re.MULTILINE)
    for line in content.splitlines():
        match = date_pattern.match(line.strip())
        if match:
            raw_date_str = match.group(1)
            dt_formats = [
                "%Y-%m-%d %a %H:%M:%S",  # e.g. 2022-03-17 Thu 15:47:00
                "%Y-%m-%d %a %H:%M",  # e.g. 2022-03-17 Thu 15:47
                "%Y-%m-%d %H:%M:%S",  # e.g. 2022-03-17 15:47:00
                "%Y-%m-%d %H:%M",  # e.g. 2022-03-17 15:47
            ]
            for fmt in dt_formats:
                try:
                    dt_object = datetime.strptime(raw_date_str, fmt)
                    return raw_date_str, dt_object
                except ValueError:
                    pass
    return None, None


 def datetime_to_denote_id(dt):
    """
    Convert a Python datetime into YYYYmmddTHHMMSS string.
    If dt is None, we use the current time.
    """
    if dt is None:
        dt = datetime.now()
    return dt.strftime("%Y%m%dT%H%M%S")


 def transform_org_file(old_filename):
    """
    1) Identify old signature vs. descriptive part.
    2) If old signature matches  YYYYmmddTHHMMSS, then:
         - Use that as the Denote ID
         - Use an EMPTY new signature
         - e.g.   --<descr>@@<that_date>.org
       Otherwise, we do the normal alphanumeric->=N= conversion and parse #+DATE:.
    3) Convert underscores in descriptive part -> hyphens.
    4) Rewrite content with updated front matter.
    """
    base = os.path.basename(old_filename)
    base_no_ext, ext = os.path.splitext(base)
    if ext.lower() != ".org":
        print(f"Skipping non-Org file: {old_filename}")
        return

    # Read original content
    with open(old_filename, "r", encoding="utf-8") as f:
        original_content = f.read()

    # Figure out the "old_signature" vs. the descriptive part:
    #   e.g. "20220225T070414_significance_testing.org"
    #   e.g. "1c3a_principal_component_analysis.org"
    if "_" in base_no_ext:
        old_sig, old_descr = base_no_ext.split("_", 1)
    elif "-" in base_no_ext:
        # If no underscore, fallback to dash
        old_sig, old_descr = base_no_ext.split("-", 1)
    else:
        old_sig = base_no_ext
        old_descr = base_no_ext

    # Decide if old_sig is date-based: e.g. "20220225T070414"
    if re.match(r"^\d{8}T\d{6}$", old_sig):
        # We use old_sig as the Denote ID
        new_signature = ""  # empty
        denote_id = old_sig
        # For the final name, we do: "--<descr>@@<denote_id>.org"
        # We'll ignore #+DATE: for the final name in this scenario.
        # But let's still parse #+DATE: for the #+date: property inside the file.
        raw_date_str, dt_object = parse_date_from_org(original_content)
        if not raw_date_str:
            raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S")
        # Build final filename
        new_descr = old_descr.replace("_", "-")
        new_filename = f"--{new_descr}@@{denote_id}.org"
    else:
        # This is the older "alphanumeric" style, e.g. "1c3a"
        # -> convert letters -> "=N=",
        # parse #+DATE: for Denote ID
        new_signature = convert_alphanumeric_signature(old_sig)
        new_descr = old_descr.replace("_", "-")

        # parse #+DATE: to get dt for Denote ID
        raw_date_str, dt_object = parse_date_from_org(original_content)
        denote_id = datetime_to_denote_id(dt_object)
        if not raw_date_str:
            raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S")

        # Build final name as normal
        new_filename = f"=={new_signature}--{new_descr}@@{denote_id}.org"

    # Parse #+TITLE from content
    title_pattern = re.compile(r"^#\+TITLE:\s*(.*)$", re.IGNORECASE | re.MULTILINE)
    tm = title_pattern.search(original_content)
    if tm:
        file_title = tm.group(1).strip()
    else:
        # fallback: use descriptive part for title
        file_title = old_descr.replace("-", " ").title()

    # Preserve any :Time-stamp: line
    time_stamp_pattern = re.compile(
        r"^:Time-stamp:\s*(<.*>)", re.MULTILINE | re.IGNORECASE
    )
    ts_match = time_stamp_pattern.search(original_content)
    ts_value = ts_match.group(1) if ts_match else None

    # Build new :PROPERTIES: block
    new_props = ":PROPERTIES:\n:ID:       {}\n".format(
        new_signature if new_signature else ""
    )
    if ts_value:
        new_props += f":Time-stamp: {ts_value}\n"
    new_props += ":END:\n"

    # Build front matter
    # If the signature is empty, we might display it as "#+subtitle:  " anyway.
    new_front_matter = (
        f"{new_props}"
        f"#+title:      {file_title}\n"
        f"#+subtitle:   {new_signature}\n"
        f"#+date:       [{raw_date_str}]\n"
        f"#+filetags:\n"
        f"#+identifier: {denote_id}\n"
        f"#+signature:  {new_signature}\n\n"
    )

    # Remove old property blocks, #+TITLE, etc.
    content_cleaned = re.sub(
        r":PROPERTIES:.*?:END:\s*", "", original_content, flags=re.DOTALL
    )
    content_cleaned = re.sub(
        r"^#\+TITLE:.*$", "", content_cleaned, flags=re.MULTILINE | re.IGNORECASE
    )
    # optionally comment out old #+DATE:
    content_cleaned = re.sub(
        r"^#\+DATE:.*$", "; \\0", content_cleaned, flags=re.MULTILINE | re.IGNORECASE
    )

    final_content = new_front_matter + content_cleaned.strip() + "\n"

    # Write the new file
    with open(new_filename, "w", encoding="utf-8") as fw:
        fw.write(final_content)

    print(f"Renamed '{old_filename}' -> '{new_filename}'")


 def main():
    for fname in os.listdir("../2_ZK/"):
        if fname.endswith(".org"):
            transform_org_file("../2_ZK/" + fname)


 if __name__ == "__main__":
    main()
	"""
	This script reads and transforms Org-mode files by analyzing the filename
	and its contents. It determines whether the file has a date-based signature
	(e.g., 20220225T070414) or an alphanumeric signature (e.g., 1c3a). If it’s
	date-based, that signature is directly used as the “Denote ID” and the rest of
	the filename is treated as a descriptive part. If it’s alphanumeric, the script
	converts letters into =N= form, parses the #+DATE: property if present (or uses
	the current time), and constructs a new Denote ID. The script then rewrites the
	file in a standardized format. It updates the front matter (properties, title,
	date, etc.), preserves certain metadata (like :Time-stamp:), and removes old
	metadata lines such as the old :PROPERTIES: block, #+TITLE:, and #+DATE:
	(commented out). Finally, it saves the file under a new name reflecting the
	transformed signature and/or Denote ID.
	"""

	import re
	import os
	from datetime import datetime


	def letter_to_alphabet_index(matchobj):
	"""
	Convert a single letter (a-z) to =N=, where N is its 1-based alphabet index.
	E.g., 'a' -> '=1=', 'c' -> '=3='.
	"""
	letter = matchobj.group(0).lower()
	position = ord(letter) - ord("a") + 1
	return f"={position}="


	def convert_alphanumeric_signature(old_sig):
	"""
	Convert something like '1c3a' -> '1=3=3=1'.
	- Digits stay digits
	- Letters become '=N='
	Then remove a trailing '=' if it appears (e.g. '1=1=' -> '1=1').
	"""
	pattern_letters = re.compile(r"[A-Za-z]")
	new_sig = pattern_letters.sub(letter_to_alphabet_index, old_sig)
	new_sig = re.sub(r"=$", "", new_sig) # remove trailing '=' if any
	return new_sig


	def parse_date_from_org(content):
	"""
	Attempt to find #+DATE: [YYYY-mm-dd ... HH:MM(:SS)?]
	Return (raw_str, datetime_obj).
	If not found or cannot parse, return (None, None).

	Note: This is only for the #+date: property in the file content,
	NOT for the final filename if the file is date-based (2022...).
	"""
	date_pattern = re.compile(r"^#\+DATE:\s\[(.?)\]", re.IGNORECASE \| re.MULTILINE)
	for line in content.splitlines():
	match = date_pattern.match(line.strip())
	if match:
	raw_date_str = match.group(1)
	dt_formats = [
	"%Y-%m-%d %a %H:%M:%S", # e.g. 2022-03-17 Thu 15:47:00
	"%Y-%m-%d %a %H:%M", # e.g. 2022-03-17 Thu 15:47
	"%Y-%m-%d %H:%M:%S", # e.g. 2022-03-17 15:47:00
	"%Y-%m-%d %H:%M", # e.g. 2022-03-17 15:47
	]
	for fmt in dt_formats:
	try:
	dt_object = datetime.strptime(raw_date_str, fmt)
	return raw_date_str, dt_object
	except ValueError:
	pass
	return None, None


	def datetime_to_denote_id(dt):
	"""
	Convert a Python datetime into YYYYmmddTHHMMSS string.
	If dt is None, we use the current time.
	"""
	if dt is None:
	dt = datetime.now()
	return dt.strftime("%Y%m%dT%H%M%S")


	def transform_org_file(old_filename):
	"""
	1) Identify old signature vs. descriptive part.
	2) If old signature matches YYYYmmddTHHMMSS, then:
	- Use that as the Denote ID
	- Use an EMPTY new signature
	- e.g. --<descr>@@<that_date>.org
	Otherwise, we do the normal alphanumeric->=N= conversion and parse #+DATE:.
	3) Convert underscores in descriptive part -> hyphens.
	4) Rewrite content with updated front matter.
	"""
	base = os.path.basename(old_filename)
	base_no_ext, ext = os.path.splitext(base)
	if ext.lower() != ".org":
	print(f"Skipping non-Org file: {old_filename}")
	return

	# Read original content
	with open(old_filename, "r", encoding="utf-8") as f:
	original_content = f.read()

	# Figure out the "old_signature" vs. the descriptive part:
	# e.g. "20220225T070414_significance_testing.org"
	# e.g. "1c3a_principal_component_analysis.org"
	if "_" in base_no_ext:
	old_sig, old_descr = base_no_ext.split("_", 1)
	elif "-" in base_no_ext:
	# If no underscore, fallback to dash
	old_sig, old_descr = base_no_ext.split("-", 1)
	else:
	old_sig = base_no_ext
	old_descr = base_no_ext

	# Decide if old_sig is date-based: e.g. "20220225T070414"
	if re.match(r"^\d{8}T\d{6}$", old_sig):
	# We use old_sig as the Denote ID
	new_signature = "" # empty
	denote_id = old_sig
	# For the final name, we do: "--<descr>@@<denote_id>.org"
	# We'll ignore #+DATE: for the final name in this scenario.
	# But let's still parse #+DATE: for the #+date: property inside the file.
	raw_date_str, dt_object = parse_date_from_org(original_content)
	if not raw_date_str:
	raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S")
	# Build final filename
	new_descr = old_descr.replace("_", "-")
	new_filename = f"--{new_descr}@@{denote_id}.org"
	else:
	# This is the older "alphanumeric" style, e.g. "1c3a"
	# -> convert letters -> "=N=",
	# parse #+DATE: for Denote ID
	new_signature = convert_alphanumeric_signature(old_sig)
	new_descr = old_descr.replace("_", "-")

	# parse #+DATE: to get dt for Denote ID
	raw_date_str, dt_object = parse_date_from_org(original_content)
	denote_id = datetime_to_denote_id(dt_object)
	if not raw_date_str:
	raw_date_str = datetime.now().strftime("%Y-%m-%d %a %H:%M:%S")

	# Build final name as normal
	new_filename = f"=={new_signature}--{new_descr}@@{denote_id}.org"

	# Parse #+TITLE from content
	title_pattern = re.compile(r"^#\+TITLE:\s(.)$", re.IGNORECASE \| re.MULTILINE)
	tm = title_pattern.search(original_content)
	if tm:
	file_title = tm.group(1).strip()
	else:
	# fallback: use descriptive part for title
	file_title = old_descr.replace("-", " ").title()

	# Preserve any :Time-stamp: line
	time_stamp_pattern = re.compile(
	r"^:Time-stamp:\s(<.>)", re.MULTILINE \| re.IGNORECASE
	)
	ts_match = time_stamp_pattern.search(original_content)
	ts_value = ts_match.group(1) if ts_match else None

	# Build new :PROPERTIES: block
	new_props = ":PROPERTIES:\n:ID: {}\n".format(
	new_signature if new_signature else ""
	)
	if ts_value:
	new_props += f":Time-stamp: {ts_value}\n"
	new_props += ":END:\n"

	# Build front matter
	# If the signature is empty, we might display it as "#+subtitle: " anyway.
	new_front_matter = (
	f"{new_props}"
	f"#+title: {file_title}\n"
	f"#+subtitle: {new_signature}\n"
	f"#+date: [{raw_date_str}]\n"
	f"#+filetags:\n"
	f"#+identifier: {denote_id}\n"
	f"#+signature: {new_signature}\n\n"
	)

	# Remove old property blocks, #+TITLE, etc.
	content_cleaned = re.sub(
	r":PROPERTIES:.?:END:\s", "", original_content, flags=re.DOTALL
	)
	content_cleaned = re.sub(
	r"^#\+TITLE:.*$", "", content_cleaned, flags=re.MULTILINE \| re.IGNORECASE
	)
	# optionally comment out old #+DATE:
	content_cleaned = re.sub(
	r"^#\+DATE:.*$", "; \\0", content_cleaned, flags=re.MULTILINE \| re.IGNORECASE
	)

	final_content = new_front_matter + content_cleaned.strip() + "\n"

	# Write the new file
	with open(new_filename, "w", encoding="utf-8") as fw:
	fw.write(final_content)

	print(f"Renamed '{old_filename}' -> '{new_filename}'")


	def main():
	for fname in os.listdir("../2_ZK/"):
	if fname.endswith(".org"):
	transform_org_file("../2_ZK/" + fname)


	if __name__ == "__main__":
	main()