Last active
April 8, 2022 08:09
-
-
Save patarapolw/915ab9921dc0de11824f407483c0b258 to your computer and use it in GitHub Desktop.
Markdown pre-processing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const MD_INDEX = 'README' | |
function mdRemoveIndex(path: string, ext = '.md') { | |
if (path.endsWith('/' + MD_INDEX + ext)) { | |
path = path.substring(0, path.length - ext.length - MD_INDEX.length - 1) | |
} | |
return path | |
} | |
function mdPreprocess( | |
md: string, | |
{ | |
rootURL | |
}: { | |
rootURL: string | |
} | |
) { | |
const parseMd = (md: string) => mdPreprocess(md, { rootURL }) | |
// console.log('better-anchor') | |
md = md.replace( | |
/([^!]|^)\[(.+)\]\((.+)\)/g, | |
(...[raw = '', f = '']: string[]) => { | |
return ( | |
f + | |
parseList( | |
bracketMatcher(raw.substring(f.length), [ | |
{ name: 'txt', on: '[', off: ']' }, | |
{ name: 'href', on: '(', off: ')' } | |
]), | |
'txt', | |
'href', | |
(txt, href) => { | |
if (!href.includes('://')) { | |
if (href.startsWith('./')) { | |
href = href.substring(2) | |
} | |
if (href[0] !== '/') { | |
href = rootURL + '/' + href | |
} | |
href = mdRemoveIndex(href) | |
} | |
return `[${parseMd(txt)}](${href})` | |
} | |
) | |
) | |
} | |
) | |
// Not working in Gitea, anyway. | |
// md = md.replace(/<x-youtube id="([^"]+)" ?\/>/g, (m) => { | |
// return `<iframe width="560" height="315" src="https://www.youtube.com/embed/${m[1]}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>` | |
// }) | |
/** BBCode-like */ | |
{ | |
let m: RegExpExecArray | null | |
let s = md | |
const newMD: string[] = [] | |
while ((m = /\[([a-z]+)(.*)\][^]+\[\/\1\]/.exec(s))) { | |
newMD.push(s.substring(0, m.index)) | |
s = s.substring(m.index) | |
const [, tag = ''] = m | |
let [, , meta = ''] = m | |
const openingTag = `[${tag}` | |
const closingTag = `[/${tag}]` | |
meta = bracketMatcher(`[${tag}${meta}]`, [ | |
{ name: 'square', on: '[', off: ']' } | |
])[0]!.s.substring(openingTag.length) | |
meta = meta.substring(0, meta.length - 1) | |
const offset = openingTag.length + meta.length + 1 | |
let nextClose = s.indexOf(closingTag, offset) | |
let nextOpen = s.indexOf(openingTag, offset) | |
while (nextOpen >= 0 && nextOpen < nextClose) { | |
const i = s.indexOf(closingTag, nextClose + 1) | |
if (i < 0) { | |
break | |
} | |
nextClose = i | |
nextOpen = s.indexOf(openingTag, nextOpen + 1) | |
} | |
const content = s.substring(offset, nextClose) | |
newMD.push( | |
(() => { | |
switch (tag) { | |
case 'details': { | |
let summary = '' | |
if (meta[0] === '=' && meta.length > 1) { | |
summary = `<summary>${parseMd(meta.substring(1))}</summary>` | |
} | |
return [ | |
'<details>', | |
summary, | |
'\n' + | |
parseMd(content.replace(/^\n+/, '').replace(/\n+$/, '')) + | |
'\n', | |
'</details><br/>' | |
] | |
.filter((s) => s) | |
.join('\n') | |
} | |
case 'quote': { | |
return parseMd(content.replace(/^\n+/, '').replace(/\n+$/, '')) | |
.split('\n') | |
.map((ln) => (ln ? '> ' + ln : '>')) | |
.join('\n') | |
} | |
} | |
return content | |
})() | |
) | |
s = s.substring(offset + content.length + closingTag.length) | |
} | |
if (newMD.length) { | |
md = newMD.join('') + s | |
} | |
} | |
/** Custom Markdown */ | |
{ | |
// console.log('furigana') | |
md = md.replace(/\[(.+)\]{(.+)}/g, (...[raw = '']: string[]) => { | |
return parseList( | |
bracketMatcher(raw, [ | |
{ name: 'base', on: '[', off: ']' }, | |
{ name: 'furigana', on: '{', off: '}' } | |
]), | |
'base', | |
'furigana', | |
makeFurigana | |
) | |
}) | |
// console.log('IME2Furigana normal') | |
md = md.replace(/<([^a-z-]+)>\[(.+)\]/g, (...[raw = '']: string[]) => { | |
return parseList( | |
bracketMatcher(raw, [ | |
{ name: 'base', on: '<', off: '>' }, | |
{ name: 'furigana', on: '[', off: ']' } | |
]), | |
'base', | |
'furigana', | |
makeFurigana | |
) | |
}) | |
// console.log('IME2Furigana spoiler') | |
md = md.replace(/<([^a-z-]+)>{(.+)}/g, (...[raw = '']: string[]) => { | |
return parseList( | |
bracketMatcher(raw, [ | |
{ name: 'base', on: '<', off: '>' }, | |
{ name: 'furigana', on: '{', off: '}' } | |
]), | |
'base', | |
'furigana', | |
makeFurigana | |
) | |
}) | |
// console.log('user') | |
md = md.replace( | |
/(^|.*[^\[])(@[a-z0-9]+)($|[^\]].*)/gi, | |
(...[, p1, p2 = '', p3]: string[]) => { | |
return `${p1}[${p2}](https://community.wanikani.com/u/${p2.substring( | |
1 | |
)})${p3}` | |
} | |
) | |
} | |
function makeFurigana(base: string, ruby: string) { | |
const reJaChar = /[\p{sc=Han}\p{sc=Katakana}\p{sc=Hiragana}]/gu | |
if (ruby === '*' && reJaChar.test(base)) { | |
return base | |
.split(reJaChar) | |
.map((s, i) => | |
i % 2 | |
? s | |
.split('') | |
.map( | |
(c) => | |
`<ruby><rp> </rp>${c}<rp>[</rp><rt>●</rt><rp>]</rp></ruby>` | |
) | |
: s | |
) | |
.join('') | |
} | |
return `<ruby><rp> </rp>${parseMd(base)}<rp>[</rp><rt>${parseMd( | |
ruby | |
)}</rt><rp>]</rp></ruby>` | |
} | |
return md | |
} | |
interface IBracket { | |
name: string | |
on: string | |
off: string | |
} | |
function bracketMatcher(raw: string, bTypes: IBracket[]) { | |
let s = '' | |
let type: IBracket | undefined | |
const rt: { | |
s: string | |
b: IBracket | undefined | |
}[] = [] | |
const brackets: string[] = [] | |
const onMap = Object.fromEntries(bTypes.map((v) => [v.on, v])) | |
const offMap = Object.fromEntries(bTypes.map((v) => [v.off, v])) | |
raw.split('').map((c) => { | |
let b: IBracket | undefined | |
if ((b = onMap[c])) { | |
if (!type && s) { | |
rt.push({ s, b: type }) | |
s = '' | |
} | |
type = type || b | |
if (type && type.name === b.name) { | |
brackets.push(type.name) | |
s += c | |
return | |
} | |
} | |
s += c | |
if ((b = offMap[c])) { | |
if (brackets[brackets.length - 1] === b.name) { | |
brackets.pop() | |
if (!brackets.length) { | |
rt.push({ s, b }) | |
s = '' | |
type = undefined | |
} | |
return | |
} | |
} | |
}) | |
if (s) { | |
rt.push({ s, b: type }) | |
} | |
return rt | |
} | |
function parseList( | |
rt: { | |
s: string | |
b: IBracket | undefined | |
}[], | |
type1: string, | |
type2: string, | |
parser: (p1: string, p2: string) => string | |
) { | |
let out = '' | |
for (let i = 0; i < rt.length; i++) { | |
const { s, b } = rt[i]! | |
if (b?.name === type1 && rt[i + 1] && rt[i + 1]?.b?.name === type2) { | |
const s1 = rt[i + 1]!.s | |
out += parser( | |
s.substring(1, s.length - 1), | |
s1.substring(1, s1.length - 1) | |
) | |
i++ | |
continue | |
} | |
out += s | |
} | |
return out | |
} | |
export { mdPreprocess } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The real file is here, which might update faster - https://git.polv.cc/polv/jpdiary/src/branch/main/__packages__/next/markdown/_parser/preprocess.ts