nymous · January 17, 2026 18:54
diff --git a/README.md b/README.md
diff --git a/convert.sh b/convert.sh
 #!/usr/bin/env bash
 set -euo pipefail

 pagename="${1//:/\/}"
 mkdir -p "poly/$(dirname "${pagename}")"
 xclip -out -selection clipboard > "poly/${pagename}.txt"
 python doku2md.py -f "poly/${pagename}.txt"
 difft "poly/${pagename}.txt" "poly/${pagename}.md" || true
 xclip -selection clipboard "poly/${pagename}.md"
 echo "Converted file is in clipboard, just Ctrl+V!"

diff --git a/doku2md.py b/doku2md.py
 #!/usr/bin/env python3

 import argparse
 import os
 import re
 from functools import reduce


 class DokuWiki2MarkDown:

    @staticmethod
    def convert_file(filepath, lang, ts):
        try:
            with open(filepath, 'r') as f:
                dokuwiki_text = f.read()
        except FileNotFoundError:
            print(f"Error: File {filepath} not found.")
            return

        markdown_text = DokuWiki2MarkDown._dokuwiki_to_markdown(dokuwiki_text, lang, ts)

        new_filepath = os.path.splitext(filepath)[0] + '.md'
        with open(new_filepath, 'w') as f:
            print(f"Saving {new_filepath}")
            f.write(markdown_text)

    @staticmethod
    def convert_directory(directory, lang, ts):
        try:
            for root, dirs, files in os.walk(directory):
                for file in files:
                    if file.endswith('.txt'):
                        DokuWiki2MarkDown.convert_file(os.path.join(root, file), lang, ts)
        except NotADirectoryError:
            print(f"Error: Directory {directory} not found.")

    @staticmethod
    def _dokuwiki_to_markdown(dokuwiki_text, codeblk_lang, timestamps):

        # Remove timestamps if elected
        if not timestamps:
            dokuwiki_text = DokuWiki2MarkDown._rm_timestamp(dokuwiki_text)
        dokuwiki_text = DokuWiki2MarkDown._tr_codeblocks(dokuwiki_text, codeblk_lang)

        # Transform the rest ()
        # - bold and block quotes share the same syntax in DokuWiki and MarkDown
        transforms = [
 #            DokuWiki2MarkDown._tr_links_initial_escape,
            DokuWiki2MarkDown._tr_external_links,
            DokuWiki2MarkDown._tr_headers,
            DokuWiki2MarkDown._tr_italic,
            DokuWiki2MarkDown._tr_underline,
            DokuWiki2MarkDown._tr_monospaced,
            DokuWiki2MarkDown._tr_strikethrough,
 #            DokuWiki2MarkDown._tr_images,
            DokuWiki2MarkDown._tr_footnotes,
            DokuWiki2MarkDown._tr_tables,
            DokuWiki2MarkDown._tr_lists,
            DokuWiki2MarkDown._tr_linebreaks,
 #           DokuWiki2MarkDown._tr_links_unescape,
            DokuWiki2MarkDown._rm_single_space_at_line_end,
            DokuWiki2MarkDown._rm_newlines
            ]
        dokuwiki_text = reduce(lambda text, func: func(text), transforms, dokuwiki_text)
        dokuwiki_text = "<!DOCTYPE markdown>\n\n" + dokuwiki_text
        return dokuwiki_text

    @staticmethod
    def _rm_timestamp(text: str) -> str:
        return re.sub(r' *Created \w+ \d{2} \w+ \d{4}\n', '', text)

    @staticmethod
    def _tr_italic(text: str) -> str:
        return re.sub(r'(?<!https:)(?<!http:)//(.*?)//', r'*\1*', text)

    @staticmethod
    def _tr_underline(text: str) -> str:
        # Underline (not supported in Markdown, converted to bold)
        return re.sub(r'__(.*?)__', r'**\1**', text)

    @staticmethod
    def _tr_monospaced(text: str) -> str:
        return re.sub(r'\'\'(.*?)\'\'', r'`\1`', text)

    @staticmethod
    def _tr_strikethrough(text: str) -> str:
        return re.sub(r'<del>(.*?)</del>', r'~~\1~~', text)

    @staticmethod
    def _tr_external_links(text: str) -> str:
        return re.sub(r'\[\[(https?:\/\/[^]]+)\|(.+)\]\](?=[^]])', r'[\2](\1)', text)

    @staticmethod
    def _tr_links_initial_escape(text: str) -> str:
        def replace_link(match):
            url, _, title = match.groups()
            if not title:
                title = url
            
            # hack to avoid italic, bold, underline getting crushed
            url = re.sub(r'/', "##URL#ESCAPED#SLASH##", url)
            url = re.sub(r'\*', "##URL#ESCAPED#ASTERISK##", url)
            url = re.sub(r'_', "##URL#ESCAPED#UNDERSCORE##", url)

            title = re.sub(r'/', "##URL#ESCAPED#SLASH##", title)
            title = re.sub(r'\*', "##URL#ESCAPED#ASTERISK##", title)
            title = re.sub(r'_', "##URL#ESCAPED#UNDERSCORE##", title)
            
            return f'[{title}]({url})'
        return re.sub(r'\[\[(.*?)(\|(.*?))?\]\]', replace_link, text)

    @staticmethod
    def _tr_links_unescape(text: str) -> str:
        text = re.sub("##URL#ESCAPED#SLASH##", "/", text)
        text = re.sub("##URL#ESCAPED#ASTERISK##", "*", text)
        text = re.sub("##URL#ESCAPED#UNDERSCORE##", "_", text)
        
        return text

    @staticmethod
    def _tr_headers(text: str) -> str:
        for i in range(6, 1, -1):
            text = re.sub(rf" *{'=' * i} *(.*?) *{'=' * i} *\s+", rf"{'#' * (7 - i)} \1\n\n", text)
        return text

    @staticmethod
    def _tr_codeblocks(text: str, lang) -> str:
        #lang_type = '' if lang is None else lang
        text = re.sub(r'\n*<(?:code|file) ?([^> -]*)? ?([^>]*)?>\n{0,}(.*?)\n{0,}</(?:code|file)>',
                      r'\n\n~#~#~\2~#~#~\n```\1\n\3\n```\n', text, flags=re.DOTALL)
        text = re.sub(r'~#~#~(.+)~#~#~',
                      r'`\1`', text)
        return re.sub(r'~#~#~~#~#~',
                      r'', text)
        # return re.sub(r'\n*<(?:code|file)[^>]*>\n{0,}(.*?)\n{0,}</(?:code|file)>',
        #               rf'\n\n```{lang_type}\n\1\n```\n', text, flags=re.DOTALL)

    @staticmethod
    def _tr_images(text: str) -> str:
        return re.sub(r'\{\{(.*?)(\|(.*?))?\}\}', r'![\3](\1)', text)

    @staticmethod
    def _tr_footnotes(text: str) -> str:
        return re.sub(r'\(\((.*?)\)\)', r'[^1]\n\n[^1]: \1', text)

    @staticmethod
    def _tr_linebreaks(text: str) -> str:
        return re.sub(r' *\\{2} *\n', r'  \n', text)

    @staticmethod
    def _tr_lists(text: str) -> str:
        lines = text.split('\n')
        ordered_list_counter = 0
        for i, line in enumerate(lines):
            match = re.match(r'(\s*)([-*])(.*)', line)
            if match and not line.startswith("----"):
                spaces, bullet, rest = match.groups()
                indentation = len(spaces) // 2 - 1
                if bullet == '-':
                    ordered_list_counter += 1
                    bullet = str(ordered_list_counter) + '.'
                else:
                    # It's an unordered list item
                    bullet = '*'
                    # Reset counter when encountering an unordered list item
                    ordered_list_counter = 0
                lines[i] = '  '*indentation + bullet + rest
        return '\n'.join(lines)

    @staticmethod
    def _tr_tables(input_dokuwiki):
        lines = input_dokuwiki.strip().split('\n')  # Splitting the DokuWiki text into lines
        in_table = False  # Flag to indicate whether we are currently processing a table
        output_markdown = []  # List to store the converted Markdown lines
        added_separator = False  # Flag to indicate whether the separator line has been added

        for line in lines:
            # Check if the line is part of a table (starts with ^ for headers or | for regular cells)
            if re.match(r'\s*(\^|\|).*', line):
                if not in_table:  # Entering a table
                    in_table = True
                    added_separator = False  # Reset the separator flag

                # Replace ^ with | for headers
                line = re.sub(r'\^', '|', line)

                # Handle colspan (||) by replacing it with empty cell markers (| |)
                line = re.sub(r'\|\|', '| |', line)

                # Remove rowspan indicators (:::)
                line = re.sub(r':::', '', line)

                # Add table separator after header row, if not already added
                if re.match(r'\|.*\|', line) and not added_separator:
                    output_markdown.append(line.strip())
                    num_columns = line.count('|') - 1
                    separator = '| ' + ' --- |' * num_columns
                    output_markdown.append(separator)
                    added_separator = True  # Set the separator flag
                elif not added_separator:
                    # If it's a header row but the separator is not yet added
                    output_markdown.append(line.strip())
                else:
                    # Append other processed lines to the Markdown text
                    output_markdown.append(line.strip())

            else:
                # We are outside a table, reset the flag
                if in_table:
                    in_table = False
                output_markdown.append(line)

        # Join the Markdown lines into a single string and return
        text = '\n'.join(output_markdown)
        return text + '\n'

    @staticmethod
    def _rm_newlines(text: str) -> str:
        """Remove any excessive (2+) newlines and replace with 2 \n"""
        return re.sub(r'(\n\s*){2,}', r'\n\n', text)

    @staticmethod
    def _rm_single_space_at_line_end(text: str) -> str:
        return re.sub(r'(?<! ) (?! )$', '', text, flags=re.MULTILINE)


 def main():
    parser = argparse.ArgumentParser(description='Convert Dokuwiki to Markdown.')
    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-f', '--file', help='File to convert.')
    group.add_argument('-d', '--directory', help='Directory of files to convert.')
    parser.add_argument('-l', '--lang', help='Codeblocks will be labeled with this Language (eg. shell).')
    parser.add_argument('-T', '--timestamps', dest='timestamps', action='store_true',
                        help='Keep textual timestamps in documents. (Default is to remove timestamps)')

    args = parser.parse_args()
    dw2md = DokuWiki2MarkDown()
    if args.file:
        dw2md.convert_file(args.file, args.lang, args.timestamps)
    elif args.directory:
        dw2md.convert_directory(args.directory, args.lang, args.timestamps)


 if __name__ == '__main__':
    main()
	#!/usr/bin/env bash
	set -euo pipefail

	pagename="${1//:/\/}"
	mkdir -p "poly/$(dirname "${pagename}")"
	xclip -out -selection clipboard > "poly/${pagename}.txt"
	python doku2md.py -f "poly/${pagename}.txt"
	difft "poly/${pagename}.txt" "poly/${pagename}.md" \|\| true
	xclip -selection clipboard "poly/${pagename}.md"
	echo "Converted file is in clipboard, just Ctrl+V!"
	#!/usr/bin/env python3

	import argparse
	import os
	import re
	from functools import reduce


	class DokuWiki2MarkDown:

	@staticmethod
	def convert_file(filepath, lang, ts):
	try:
	with open(filepath, 'r') as f:
	dokuwiki_text = f.read()
	except FileNotFoundError:
	print(f"Error: File {filepath} not found.")
	return

	markdown_text = DokuWiki2MarkDown._dokuwiki_to_markdown(dokuwiki_text, lang, ts)

	new_filepath = os.path.splitext(filepath)[0] + '.md'
	with open(new_filepath, 'w') as f:
	print(f"Saving {new_filepath}")
	f.write(markdown_text)

	@staticmethod
	def convert_directory(directory, lang, ts):
	try:
	for root, dirs, files in os.walk(directory):
	for file in files:
	if file.endswith('.txt'):
	DokuWiki2MarkDown.convert_file(os.path.join(root, file), lang, ts)
	except NotADirectoryError:
	print(f"Error: Directory {directory} not found.")

	@staticmethod
	def _dokuwiki_to_markdown(dokuwiki_text, codeblk_lang, timestamps):

	# Remove timestamps if elected
	if not timestamps:
	dokuwiki_text = DokuWiki2MarkDown._rm_timestamp(dokuwiki_text)
	dokuwiki_text = DokuWiki2MarkDown._tr_codeblocks(dokuwiki_text, codeblk_lang)

	# Transform the rest ()
	# - bold and block quotes share the same syntax in DokuWiki and MarkDown
	transforms = [
	# DokuWiki2MarkDown._tr_links_initial_escape,
	DokuWiki2MarkDown._tr_external_links,
	DokuWiki2MarkDown._tr_headers,
	DokuWiki2MarkDown._tr_italic,
	DokuWiki2MarkDown._tr_underline,
	DokuWiki2MarkDown._tr_monospaced,
	DokuWiki2MarkDown._tr_strikethrough,
	# DokuWiki2MarkDown._tr_images,
	DokuWiki2MarkDown._tr_footnotes,
	DokuWiki2MarkDown._tr_tables,
	DokuWiki2MarkDown._tr_lists,
	DokuWiki2MarkDown._tr_linebreaks,
	# DokuWiki2MarkDown._tr_links_unescape,
	DokuWiki2MarkDown._rm_single_space_at_line_end,
	DokuWiki2MarkDown._rm_newlines
	]
	dokuwiki_text = reduce(lambda text, func: func(text), transforms, dokuwiki_text)
	dokuwiki_text = "<!DOCTYPE markdown>\n\n" + dokuwiki_text
	return dokuwiki_text

	@staticmethod
	def _rm_timestamp(text: str) -> str:
	return re.sub(r' *Created \w+ \d{2} \w+ \d{4}\n', '', text)

	@staticmethod
	def _tr_italic(text: str) -> str:
	return re.sub(r'(?<!https:)(?<!http:)//(.?)//', r'\1*', text)

	@staticmethod
	def _tr_underline(text: str) -> str:
	# Underline (not supported in Markdown, converted to bold)
	return re.sub(r'__(.?)__', r'\1*', text)

	@staticmethod
	def _tr_monospaced(text: str) -> str:
	return re.sub(r'\'\'(.*?)\'\'', r'`\1`', text)

	@staticmethod
	def _tr_strikethrough(text: str) -> str:
	return re.sub(r'<del>(.*?)</del>', r'~~\1~~', text)

	@staticmethod
	def _tr_external_links(text: str) -> str:
	return re.sub(r'\[\[(https?:\/\/[^]]+)\\|(.+)\]\](?=[^]])', r'[\2](\1)', text)

	@staticmethod
	def _tr_links_initial_escape(text: str) -> str:
	def replace_link(match):
	url, _, title = match.groups()
	if not title:
	title = url

	# hack to avoid italic, bold, underline getting crushed
	url = re.sub(r'/', "##URL#ESCAPED#SLASH##", url)
	url = re.sub(r'\*', "##URL#ESCAPED#ASTERISK##", url)
	url = re.sub(r'_', "##URL#ESCAPED#UNDERSCORE##", url)

	title = re.sub(r'/', "##URL#ESCAPED#SLASH##", title)
	title = re.sub(r'\*', "##URL#ESCAPED#ASTERISK##", title)
	title = re.sub(r'_', "##URL#ESCAPED#UNDERSCORE##", title)

	return f'[{title}]({url})'
	return re.sub(r'\[\[(.?)(\\|(.?))?\]\]', replace_link, text)

	@staticmethod
	def _tr_links_unescape(text: str) -> str:
	text = re.sub("##URL#ESCAPED#SLASH##", "/", text)
	text = re.sub("##URL#ESCAPED#ASTERISK##", "*", text)
	text = re.sub("##URL#ESCAPED#UNDERSCORE##", "_", text)

	return text

	@staticmethod
	def _tr_headers(text: str) -> str:
	for i in range(6, 1, -1):
	text = re.sub(rf" {'=' i} (.?) {'=' i} \s+", rf"{'#' (7 - i)} \1\n\n", text)
	return text

	@staticmethod
	def _tr_codeblocks(text: str, lang) -> str:
	#lang_type = '' if lang is None else lang
	text = re.sub(r'\n<(?:code\|file) ?([^> -])? ?([^>])?>\n{0,}(.?)\n{0,}</(?:code\|file)>',
	r'\n\n~#~#~\2~#~#~\n```\1\n\3\n```\n', text, flags=re.DOTALL)
	text = re.sub(r'~#~#~(.+)~#~#~',
	r'`\1`', text)
	return re.sub(r'~#~#~~#~#~',
	r'', text)
	# return re.sub(r'\n<(?:code\|file)[^>]>\n{0,}(.*?)\n{0,}</(?:code\|file)>',
	# rf'\n\n```{lang_type}\n\1\n```\n', text, flags=re.DOTALL)

	@staticmethod
	def _tr_images(text: str) -> str:
	return re.sub(r'\{\{(.?)(\\|(.?))?\}\}', r'![\3](\1)', text)

	@staticmethod
	def _tr_footnotes(text: str) -> str:
	return re.sub(r'\(\((.*?)\)\)', r'[^1]\n\n[^1]: \1', text)

	@staticmethod
	def _tr_linebreaks(text: str) -> str:
	return re.sub(r' \\{2} \n', r' \n', text)

	@staticmethod
	def _tr_lists(text: str) -> str:
	lines = text.split('\n')
	ordered_list_counter = 0
	for i, line in enumerate(lines):
	match = re.match(r'(\s)([-])(.*)', line)
	if match and not line.startswith("----"):
	spaces, bullet, rest = match.groups()
	indentation = len(spaces) // 2 - 1
	if bullet == '-':
	ordered_list_counter += 1
	bullet = str(ordered_list_counter) + '.'
	else:
	# It's an unordered list item
	bullet = '*'
	# Reset counter when encountering an unordered list item
	ordered_list_counter = 0
	lines[i] = ' '*indentation + bullet + rest
	return '\n'.join(lines)

	@staticmethod
	def _tr_tables(input_dokuwiki):
	lines = input_dokuwiki.strip().split('\n') # Splitting the DokuWiki text into lines
	in_table = False # Flag to indicate whether we are currently processing a table
	output_markdown = [] # List to store the converted Markdown lines
	added_separator = False # Flag to indicate whether the separator line has been added

	for line in lines:
	# Check if the line is part of a table (starts with ^ for headers or \| for regular cells)
	if re.match(r'\s(\^\|\\|).', line):
	if not in_table: # Entering a table
	in_table = True
	added_separator = False # Reset the separator flag

	# Replace ^ with \| for headers
	line = re.sub(r'\^', '\|', line)

	# Handle colspan (\|\|) by replacing it with empty cell markers (\| \|)
	line = re.sub(r'\\|\\|', '\| \|', line)

	# Remove rowspan indicators (:::)
	line = re.sub(r':::', '', line)

	# Add table separator after header row, if not already added
	if re.match(r'\\|.*\\|', line) and not added_separator:
	output_markdown.append(line.strip())
	num_columns = line.count('\|') - 1
	separator = '\| ' + ' --- \|' * num_columns
	output_markdown.append(separator)
	added_separator = True # Set the separator flag
	elif not added_separator:
	# If it's a header row but the separator is not yet added
	output_markdown.append(line.strip())
	else:
	# Append other processed lines to the Markdown text
	output_markdown.append(line.strip())

	else:
	# We are outside a table, reset the flag
	if in_table:
	in_table = False
	output_markdown.append(line)

	# Join the Markdown lines into a single string and return
	text = '\n'.join(output_markdown)
	return text + '\n'

	@staticmethod
	def _rm_newlines(text: str) -> str:
	"""Remove any excessive (2+) newlines and replace with 2 \n"""
	return re.sub(r'(\n\s*){2,}', r'\n\n', text)

	@staticmethod
	def _rm_single_space_at_line_end(text: str) -> str:
	return re.sub(r'(?<! ) (?! )$', '', text, flags=re.MULTILINE)


	def main():
	parser = argparse.ArgumentParser(description='Convert Dokuwiki to Markdown.')
	group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument('-f', '--file', help='File to convert.')
	group.add_argument('-d', '--directory', help='Directory of files to convert.')
	parser.add_argument('-l', '--lang', help='Codeblocks will be labeled with this Language (eg. shell).')
	parser.add_argument('-T', '--timestamps', dest='timestamps', action='store_true',
	help='Keep textual timestamps in documents. (Default is to remove timestamps)')

	args = parser.parse_args()
	dw2md = DokuWiki2MarkDown()
	if args.file:
	dw2md.convert_file(args.file, args.lang, args.timestamps)
	elif args.directory:
	dw2md.convert_directory(args.directory, args.lang, args.timestamps)


	if __name__ == '__main__':
	main()