Skip to content

Instantly share code, notes, and snippets.

@nymous
Last active January 17, 2026 18:54
Show Gist options
  • Select an option

  • Save nymous/603dd31461a3ed351f5b784e84402974 to your computer and use it in GitHub Desktop.

Select an option

Save nymous/603dd31461a3ed351f5b784e84402974 to your computer and use it in GitHub Desktop.
Convert Dokuwiki to Markdown syntax

"Fork" of https://github.com/mm503/Dokuwiki2Markdown, with the following changes:

  • disable the internal link conversion, keep the external link conversion (anything starting with http(s)://)
  • disable images conversion
  • enhance the code/file conversion to reuse the syntax and prepend the file path if defined
  • prepend <!DOCTYPE markdown>\n\n to the file for https://github.com/clockoon/dokuwiki-plugin-commonmark compatibility

To use with convert.sh (needs xclip and difft installed, porting to Wayland is left as an exercice to the reader).

  1. Create a poly folder next to the Python script
  2. Go to a page on your Dokuwiki
  3. Click "Edit this page", copy the content to the clipboard
  4. Run ./convert.sh <path:to:your:page:in:dokuwiki>
  5. The script will convert it, show you the diff so you can check the results (and make sure the syntax is correct)(please check!)
  6. Result is already in your clipboard, you can Ctrl+V in Dokuwiki editor
#!/usr/bin/env bash
set -euo pipefail
pagename="${1//:/\/}"
mkdir -p "poly/$(dirname "${pagename}")"
xclip -out -selection clipboard > "poly/${pagename}.txt"
python doku2md.py -f "poly/${pagename}.txt"
difft "poly/${pagename}.txt" "poly/${pagename}.md" || true
xclip -selection clipboard "poly/${pagename}.md"
echo "Converted file is in clipboard, just Ctrl+V!"
#!/usr/bin/env python3
import argparse
import os
import re
from functools import reduce
class DokuWiki2MarkDown:
@staticmethod
def convert_file(filepath, lang, ts):
try:
with open(filepath, 'r') as f:
dokuwiki_text = f.read()
except FileNotFoundError:
print(f"Error: File {filepath} not found.")
return
markdown_text = DokuWiki2MarkDown._dokuwiki_to_markdown(dokuwiki_text, lang, ts)
new_filepath = os.path.splitext(filepath)[0] + '.md'
with open(new_filepath, 'w') as f:
print(f"Saving {new_filepath}")
f.write(markdown_text)
@staticmethod
def convert_directory(directory, lang, ts):
try:
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.txt'):
DokuWiki2MarkDown.convert_file(os.path.join(root, file), lang, ts)
except NotADirectoryError:
print(f"Error: Directory {directory} not found.")
@staticmethod
def _dokuwiki_to_markdown(dokuwiki_text, codeblk_lang, timestamps):
# Remove timestamps if elected
if not timestamps:
dokuwiki_text = DokuWiki2MarkDown._rm_timestamp(dokuwiki_text)
dokuwiki_text = DokuWiki2MarkDown._tr_codeblocks(dokuwiki_text, codeblk_lang)
# Transform the rest ()
# - bold and block quotes share the same syntax in DokuWiki and MarkDown
transforms = [
# DokuWiki2MarkDown._tr_links_initial_escape,
DokuWiki2MarkDown._tr_external_links,
DokuWiki2MarkDown._tr_headers,
DokuWiki2MarkDown._tr_italic,
DokuWiki2MarkDown._tr_underline,
DokuWiki2MarkDown._tr_monospaced,
DokuWiki2MarkDown._tr_strikethrough,
# DokuWiki2MarkDown._tr_images,
DokuWiki2MarkDown._tr_footnotes,
DokuWiki2MarkDown._tr_tables,
DokuWiki2MarkDown._tr_lists,
DokuWiki2MarkDown._tr_linebreaks,
# DokuWiki2MarkDown._tr_links_unescape,
DokuWiki2MarkDown._rm_single_space_at_line_end,
DokuWiki2MarkDown._rm_newlines
]
dokuwiki_text = reduce(lambda text, func: func(text), transforms, dokuwiki_text)
dokuwiki_text = "<!DOCTYPE markdown>\n\n" + dokuwiki_text
return dokuwiki_text
@staticmethod
def _rm_timestamp(text: str) -> str:
return re.sub(r' *Created \w+ \d{2} \w+ \d{4}\n', '', text)
@staticmethod
def _tr_italic(text: str) -> str:
return re.sub(r'(?<!https:)(?<!http:)//(.*?)//', r'*\1*', text)
@staticmethod
def _tr_underline(text: str) -> str:
# Underline (not supported in Markdown, converted to bold)
return re.sub(r'__(.*?)__', r'**\1**', text)
@staticmethod
def _tr_monospaced(text: str) -> str:
return re.sub(r'\'\'(.*?)\'\'', r'`\1`', text)
@staticmethod
def _tr_strikethrough(text: str) -> str:
return re.sub(r'<del>(.*?)</del>', r'~~\1~~', text)
@staticmethod
def _tr_external_links(text: str) -> str:
return re.sub(r'\[\[(https?:\/\/[^]]+)\|(.+)\]\](?=[^]])', r'[\2](\1)', text)
@staticmethod
def _tr_links_initial_escape(text: str) -> str:
def replace_link(match):
url, _, title = match.groups()
if not title:
title = url
# hack to avoid italic, bold, underline getting crushed
url = re.sub(r'/', "##URL#ESCAPED#SLASH##", url)
url = re.sub(r'\*', "##URL#ESCAPED#ASTERISK##", url)
url = re.sub(r'_', "##URL#ESCAPED#UNDERSCORE##", url)
title = re.sub(r'/', "##URL#ESCAPED#SLASH##", title)
title = re.sub(r'\*', "##URL#ESCAPED#ASTERISK##", title)
title = re.sub(r'_', "##URL#ESCAPED#UNDERSCORE##", title)
return f'[{title}]({url})'
return re.sub(r'\[\[(.*?)(\|(.*?))?\]\]', replace_link, text)
@staticmethod
def _tr_links_unescape(text: str) -> str:
text = re.sub("##URL#ESCAPED#SLASH##", "/", text)
text = re.sub("##URL#ESCAPED#ASTERISK##", "*", text)
text = re.sub("##URL#ESCAPED#UNDERSCORE##", "_", text)
return text
@staticmethod
def _tr_headers(text: str) -> str:
for i in range(6, 1, -1):
text = re.sub(rf" *{'=' * i} *(.*?) *{'=' * i} *\s+", rf"{'#' * (7 - i)} \1\n\n", text)
return text
@staticmethod
def _tr_codeblocks(text: str, lang) -> str:
#lang_type = '' if lang is None else lang
text = re.sub(r'\n*<(?:code|file) ?([^> -]*)? ?([^>]*)?>\n{0,}(.*?)\n{0,}</(?:code|file)>',
r'\n\n~#~#~\2~#~#~\n```\1\n\3\n```\n', text, flags=re.DOTALL)
text = re.sub(r'~#~#~(.+)~#~#~',
r'`\1`', text)
return re.sub(r'~#~#~~#~#~',
r'', text)
# return re.sub(r'\n*<(?:code|file)[^>]*>\n{0,}(.*?)\n{0,}</(?:code|file)>',
# rf'\n\n```{lang_type}\n\1\n```\n', text, flags=re.DOTALL)
@staticmethod
def _tr_images(text: str) -> str:
return re.sub(r'\{\{(.*?)(\|(.*?))?\}\}', r'![\3](\1)', text)
@staticmethod
def _tr_footnotes(text: str) -> str:
return re.sub(r'\(\((.*?)\)\)', r'[^1]\n\n[^1]: \1', text)
@staticmethod
def _tr_linebreaks(text: str) -> str:
return re.sub(r' *\\{2} *\n', r' \n', text)
@staticmethod
def _tr_lists(text: str) -> str:
lines = text.split('\n')
ordered_list_counter = 0
for i, line in enumerate(lines):
match = re.match(r'(\s*)([-*])(.*)', line)
if match and not line.startswith("----"):
spaces, bullet, rest = match.groups()
indentation = len(spaces) // 2 - 1
if bullet == '-':
ordered_list_counter += 1
bullet = str(ordered_list_counter) + '.'
else:
# It's an unordered list item
bullet = '*'
# Reset counter when encountering an unordered list item
ordered_list_counter = 0
lines[i] = ' '*indentation + bullet + rest
return '\n'.join(lines)
@staticmethod
def _tr_tables(input_dokuwiki):
lines = input_dokuwiki.strip().split('\n') # Splitting the DokuWiki text into lines
in_table = False # Flag to indicate whether we are currently processing a table
output_markdown = [] # List to store the converted Markdown lines
added_separator = False # Flag to indicate whether the separator line has been added
for line in lines:
# Check if the line is part of a table (starts with ^ for headers or | for regular cells)
if re.match(r'\s*(\^|\|).*', line):
if not in_table: # Entering a table
in_table = True
added_separator = False # Reset the separator flag
# Replace ^ with | for headers
line = re.sub(r'\^', '|', line)
# Handle colspan (||) by replacing it with empty cell markers (| |)
line = re.sub(r'\|\|', '| |', line)
# Remove rowspan indicators (:::)
line = re.sub(r':::', '', line)
# Add table separator after header row, if not already added
if re.match(r'\|.*\|', line) and not added_separator:
output_markdown.append(line.strip())
num_columns = line.count('|') - 1
separator = '| ' + ' --- |' * num_columns
output_markdown.append(separator)
added_separator = True # Set the separator flag
elif not added_separator:
# If it's a header row but the separator is not yet added
output_markdown.append(line.strip())
else:
# Append other processed lines to the Markdown text
output_markdown.append(line.strip())
else:
# We are outside a table, reset the flag
if in_table:
in_table = False
output_markdown.append(line)
# Join the Markdown lines into a single string and return
text = '\n'.join(output_markdown)
return text + '\n'
@staticmethod
def _rm_newlines(text: str) -> str:
"""Remove any excessive (2+) newlines and replace with 2 \n"""
return re.sub(r'(\n\s*){2,}', r'\n\n', text)
@staticmethod
def _rm_single_space_at_line_end(text: str) -> str:
return re.sub(r'(?<! ) (?! )$', '', text, flags=re.MULTILINE)
def main():
parser = argparse.ArgumentParser(description='Convert Dokuwiki to Markdown.')
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-f', '--file', help='File to convert.')
group.add_argument('-d', '--directory', help='Directory of files to convert.')
parser.add_argument('-l', '--lang', help='Codeblocks will be labeled with this Language (eg. shell).')
parser.add_argument('-T', '--timestamps', dest='timestamps', action='store_true',
help='Keep textual timestamps in documents. (Default is to remove timestamps)')
args = parser.parse_args()
dw2md = DokuWiki2MarkDown()
if args.file:
dw2md.convert_file(args.file, args.lang, args.timestamps)
elif args.directory:
dw2md.convert_directory(args.directory, args.lang, args.timestamps)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment