Last active
June 27, 2022 18:53
-
-
Save ifiokjr/ef8722805e56086785ae8ab46ca0c261 to your computer and use it in GitHub Desktop.
Tiny markdown to html converter in TypeScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* @packageDocumentation | |
* | |
* Provides the main method used to convert markdown to html. | |
*/ | |
import { isString } from '@remirror/core'; | |
/** | |
* Converts the provided markdown to HTML. | |
*/ | |
export function markdownToHtml(markdown: string): string { | |
return parse(markdown); | |
} | |
interface Rule { | |
regex: RegExp; | |
replacer: StringReplacerFunction | string; | |
} | |
function parse(text: string): string { | |
// clean input | |
text = text | |
.replace(/[\b\v\f\r]/g, '') | |
.replace(/\\./g, (match) => `&#${match.charCodeAt(1)};`); | |
let temp = block(text); | |
if (temp === text && !temp.match(/^\s*$/i)) { | |
temp = inlineBlock(temp) | |
// handle paragraphs | |
.replace(/((.|\n)+?)(\n\n+|$)/g, (match, text) => tag('p', text)); | |
} | |
return temp.replace(/&#(\d+);/g, (_, code) => | |
String.fromCharCode(Number.parseInt(code, 10)), | |
); | |
} | |
/** | |
* Process the markdown with the rules provided. | |
*/ | |
function processMarkdown( | |
text: string, | |
rules: Rule[], | |
parse: ParseMatchFunction, | |
) { | |
for (const rule of rules) { | |
const { regex, replacer } = rule; | |
const content = regex.exec(text); | |
// No content found for the current rule therefore we can move to the next | |
// one. | |
if (!content) { | |
continue; | |
} | |
// Keep track of where the original content ended in relation to the text | |
// provided. | |
const endOfContentIndex = content.index + content[0].length; | |
const textBeforeHtmlReplacement = parse(text.slice(0, content.index)); | |
const textAfterHtmlReplacement = parse(text.slice(endOfContentIndex)); | |
// The replacement text that has been transformed to HTML. | |
let htmlReplacement: string; | |
if (isString(replacer)) { | |
// String `Replacer`s only support replacing the first digit - like `$1`. | |
htmlReplacement = replacer.replace( | |
/\$(\d)/g, | |
(_, firstDigit) => content[firstDigit], | |
); | |
} else { | |
// With function `Replacer`s the whole match and all content is provided | |
const [fullMatch, ...rest] = content; | |
htmlReplacement = replacer(fullMatch, ...rest); | |
} | |
return `${textBeforeHtmlReplacement}${htmlReplacement}${textAfterHtmlReplacement}`; | |
} | |
// No matches found in loop so we can return the text unchanged. | |
return text; | |
} | |
function inline(text: string): string { | |
return processMarkdown( | |
text, | |
[ | |
// - Bold => `**bold**` | |
// - Italic => `*italic*` | `_italic_` | |
// - Bold and Italic => `**_mixed_**` TODO this doesn't check for | |
// correctly matching tags. | |
{ | |
regex: /([*_]{1,3})((.|\n)+?)\1/g, | |
replacer: (_, tokens, content) => { | |
tokens = tokens.length; | |
content = inline(content); | |
if (tokens > 1) { | |
content = tag('strong', content); | |
} | |
if (tokens % 2) { | |
content = tag('em', content); | |
} | |
return content; | |
}, | |
}, | |
// - Underline => `~underline~` | |
// - Strikethrough => `~~strike-through~~` | |
// - Delete => `~~~delete~~` | |
{ | |
regex: /(~{1,3})((.|\n)+?)\1/g, | |
replacer: (_, tokens, content) => | |
tag(['u', 's', 'del'][tokens.length - 1], inline(content)), | |
}, | |
// - Replace remaining lines with a break tag => `<br />` | |
{ regex: / {2}\n|\n {2}/g, replacer: '<br />' }, | |
], | |
inline, | |
); | |
} | |
function inlineBlock(text = '', shouldInline = true) { | |
// A collection of all the tags created so far. | |
const gatheredTags: string[] = []; | |
function injectInlineBlock(text: string): string { | |
return text.replace(/\\(\d+)/g, (match, code) => | |
injectInlineBlock(gatheredTags[Number.parseInt(code, 10) - 1]), | |
); | |
} | |
text = text | |
.trim() | |
// inline code block | |
.replace( | |
/`([^`]*)`/g, | |
(_, text) => `\\${gatheredTags.push(tag('code', encodeHtml(text)))}`, | |
) | |
// inline media (a / img / iframe) | |
.replace( | |
/[!&]?\[([!&]?\[.*?\)|[^\]]*?)]\((.*?)( .*?)?\)|(\w+:\/\/[\w!$'()*+,./-]+)/g, | |
(match, text, href, title, link) => { | |
if (link) { | |
return shouldInline | |
? `\\${gatheredTags.push(tag('a', link, { href: link }))}` | |
: match; | |
} | |
if (match[0] === '&') { | |
text = text.match(/^(.+),(.+),([^ \]]+)( ?.+?)?$/); | |
return `\\${gatheredTags.push( | |
tag('iframe', '', { | |
width: text[1], | |
height: text[2], | |
frameborder: text[3], | |
class: text[4], | |
src: href, | |
title, | |
}), | |
)}`; | |
} | |
return `\\${gatheredTags.push( | |
match[0] === '!' | |
? tag('img', '', { src: href, alt: text, title }) | |
: tag('a', inlineBlock(text, false), { href, title }), | |
)}`; | |
}, | |
); | |
text = injectInlineBlock(shouldInline ? inline(text) : text); | |
return text; | |
} | |
function block(text: string) { | |
return processMarkdown( | |
text, | |
[ | |
// comments | |
{ regex: /<!--((.|\n)*?)-->/g, replacer: '<!--$1-->' }, | |
// pre format block | |
{ | |
regex: /^("""|```)(.*)\n((.*\n)*?)\1/gm, | |
replacer: (_, wrapper, classNames, text) => | |
wrapper === '"""' | |
? tag('div', parse(text), { class: classNames }) | |
: tag('pre', tag('code', encodeHtml(text), { class: classNames })), | |
}, | |
// blockquotes | |
{ | |
regex: /(^>.*\n?)+/gm, | |
replacer: chain('blockquote', /^> ?(.*)$/gm, '$1', inline), | |
}, | |
// tables | |
{ | |
regex: /((^|\n)\|.+)+/g, | |
replacer: chain('table', /^.*(\n\|---.*?)?$/gm, (match, subline) => | |
chain('tr', /\|(-?)([^|]+)\1(\|$)?/gm, (match, type, text) => | |
tag(type || subline ? 'th' : 'td', inlineBlock(text)), | |
)(match.slice(0, match.length - (subline || '').length)), | |
), | |
}, | |
// lists | |
{ regex: /(?:(^|\n)([+-]|\d+\.) +(.*(\n[\t ]+.*)*))+/g, replacer: list }, | |
//anchor | |
{ regex: /#\[([^\]]+?)]/g, replacer: '<a name="$1"></a>' }, | |
// headlines | |
{ | |
regex: /^(#+) +(.*)$/gm, | |
replacer: (_, headerSyntax, headerText) => | |
tag(`h${headerSyntax.length}`, inlineBlock(headerText)), | |
}, | |
// horizontal rule | |
{ regex: /^(===+|---+)(?=\s*$)/gm, replacer: '<hr>' }, | |
], | |
parse, | |
); | |
} | |
type ParseMatchFunction = (match: string) => string; | |
type StringReplacerFunction = (substring: string, ...args: any[]) => string; | |
/** | |
* Chain string replacement methods and output a function that returns the tag | |
* representation of the match. | |
*/ | |
function chain( | |
tagName: string, | |
regex: RegExp, | |
replacer: string | StringReplacerFunction, | |
parser?: ParseMatchFunction, | |
): ParseMatchFunction { | |
return (match: string) => { | |
match = match.replace(regex, replacer as string); | |
return tag(tagName, parser ? parser(match) : match); | |
}; | |
} | |
/** | |
* Handle lists in markdown. | |
*/ | |
function list(text: string) { | |
const wrapperTag = text.match(/^[+-]/m) ? 'ul' : 'ol'; | |
return text | |
? `<${wrapperTag}>${text.replace( | |
/(?:[+-]|\d+\.) +(.*)\n?(([\t ].*\n?)*)/g, | |
(_, listItemText, childList) => | |
`<li>${inlineBlock( | |
`${listItemText}\n${removeIndentation(childList || '').replace( | |
/(?:(^|\n)([+-]|\d+\.) +(.*(\n[\t ]+.*)*))+/g, | |
list, | |
)}`, | |
)}</li>`, | |
)}</${wrapperTag}>` | |
: ''; | |
} | |
/** | |
* Encode html tags within the markdown output. | |
*/ | |
function encodeHtml(text: string) { | |
return text | |
? text.replace(/"/g, '"').replace(/</g, '<').replace(/>/g, '>') | |
: ''; | |
} | |
function removeIndentation(text: string) { | |
return text.replace( | |
new RegExp(`^${(text.match(/^\S?\s+/) || '')[0]}`, 'gm'), | |
'', | |
); | |
} | |
/** | |
* Create a tag with the content provided. | |
*/ | |
function tag( | |
tag: string, | |
text: string, | |
attributes?: Record<string, string>, | |
): string { | |
return `<${ | |
tag + | |
(attributes | |
? ` ${Object.keys(attributes) | |
.map((k) => | |
attributes[k] ? `${k}="${encodeHtml(attributes[k]) || ''}"` : '', | |
) | |
.join(' ')}` | |
: '') | |
}>${text}</${tag}>`; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment