Last active
April 25, 2025 11:00
-
-
Save kolay-v/f0b615a70c5fe6a7320e753cf53e6f3a to your computer and use it in GitHub Desktop.
Markdown to telegram entities
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import remarkMath from "npm:remark-math"; | |
import remarkParse from "npm:remark-parse"; | |
import { unified } from "npm:unified"; | |
import { type MessageEntity } from "npm:grammy/types"; | |
// @deno-types="npm:@types/mdast" | |
import type { RootContent } from "npm:mdast"; | |
export function mdV2(markdown: string) { | |
const processor = unified().use(remarkParse).use(remarkMath); | |
const ast = processor.parse(markdown); | |
let text = ""; | |
let listDepth = -1; | |
let listOrder: number | null = null; | |
const entities: MessageEntity[] = []; | |
function processNode( | |
node: RootContent, | |
formatters: ("bold" | "italic")[] = [], | |
) { | |
if (node.type === "text") { | |
for (const formatter of formatters) { | |
entities.push({ | |
type: formatter, | |
offset: text.length, | |
length: node.value.length, | |
}); | |
} | |
text += node.value; | |
return; | |
} | |
if (node.type === "paragraph") { | |
node.children.forEach((child) => processNode(child, formatters)); | |
text += "\n"; | |
return; | |
} | |
if (node.type === "strong") { | |
node.children.forEach((child) => | |
processNode(child, [...formatters, "bold"]), | |
); | |
return; | |
} | |
if (node.type === "heading") { | |
node.children.forEach((child) => | |
processNode(child, [...formatters, "bold"]), | |
); | |
text += "\n"; | |
return; | |
} | |
if (node.type === "emphasis") { | |
node.children.forEach((child) => | |
processNode(child, [...formatters, "italic"]), | |
); | |
return; | |
} | |
if (node.type === "inlineCode") { | |
const codeStart = text.length; | |
text += node.value; | |
entities.push({ | |
type: "code", | |
offset: codeStart, | |
length: node.value.length, | |
}); | |
return; | |
} | |
if (node.type === "list") { | |
listDepth++; | |
if (node.ordered) { | |
listOrder = node.start ?? null; | |
} | |
node.children.forEach((child) => processNode(child, formatters)); | |
if (node.ordered) { | |
listOrder = null; | |
} | |
listDepth--; | |
return; | |
} | |
if (node.type === "listItem") { | |
text += " ".repeat(listDepth); | |
if (listOrder != null) { | |
text += `${listOrder}. `; | |
listOrder++; | |
} else { | |
text += "* "; | |
} | |
node.children.forEach((child) => processNode(child, formatters)); | |
return; | |
} | |
if (node.type === "link") { | |
const startOffset = text.length; | |
node.children.forEach((child) => processNode(child, formatters)); | |
entities.push({ | |
type: "text_link", | |
offset: startOffset, | |
length: text.length - startOffset, | |
url: node.url, | |
}); | |
return; | |
} | |
if (node.type === "code") { | |
const codeStart = text.length; | |
text += node.value; | |
entities.push({ | |
type: "pre", | |
language: node.lang ?? undefined, | |
offset: codeStart, | |
length: node.value.length, | |
}); | |
return; | |
} | |
if (node.type === "math") { | |
// TODO: Process math | |
return; | |
} | |
if (node.type === "inlineMath") { | |
// TODO: Process math | |
return; | |
} | |
console.log(node); | |
if ("children" in node) { | |
node.children.forEach((child) => processNode(child, formatters)); | |
return; | |
} | |
} | |
ast.children.forEach((node) => processNode(node)); | |
return { text, entities }; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { type Token, type Tokens, lexer } from "npm:marked"; | |
const escapeHTML = (text: string): string => { | |
const escapeChar = (c: string): string => { | |
switch (c) { | |
case "&": | |
return "&"; | |
case '"': | |
return """; | |
case "<": | |
return "<"; | |
default: | |
return c; | |
} | |
}; | |
return text.split("").map(escapeChar).join(""); | |
}; | |
function lexerToHTML(tokens: Token[]): string { | |
const tokenStrings: string[] = tokens.map((token) => { | |
if (token.type === "space") { | |
return escapeHTML(token.raw); | |
} else if (token.type === "code") { | |
return `<pre><code class="language-${token.lang}">${ | |
token.escaped ? token.text : escapeHTML(token.text) | |
}</code></pre>\n`; | |
} else if (token.type === "blockquote") { | |
return `<blockquote>${lexerToHTML(token.tokens!)}</blockquote>\n`; | |
} else if (token.type === "html") { | |
return `<pre><code class="language-html">${escapeHTML( | |
token.text, | |
)}</code></pre>\n`; | |
} else if (token.type === "heading") { | |
return `<b>${token.text}</b>\n`; | |
} else if (token.type === "hr") { | |
return "------------\n"; | |
} else if (token.type === "list") { | |
const items: Tokens.ListItem[] = token.items; | |
return items | |
.map((item, idx) => { | |
const bullet = token.start === "" ? "- " : `${token.start + idx}) `; | |
return `${bullet}${lexerToHTML(item.tokens)}\n`; | |
}) | |
.reduce((acc, current) => `${acc}${current}`); | |
} else if (token.type === "list_item") { | |
// ? | |
} else if (token.type === "checkbox") { | |
return token.checked ? "[x] " : "[ ] "; | |
} else if (token.type === "paragraph") { | |
return `${lexerToHTML(token.tokens!)}\n`; | |
} else if (token.type === "table") { | |
const res: string[] = []; | |
const header: Tokens.TableCell[] = token.header; | |
const rows: Tokens.TableCell[][] = token.rows; | |
res.push( | |
header.reduce( | |
(acc, current) => `${acc} ${lexerToHTML(current.tokens)} |`, | |
`|`, | |
), | |
); | |
res.push(Array(header.length).fill("---").join("")); | |
rows.forEach((row) => { | |
res.push( | |
row.reduce( | |
(acc, current) => `${acc} ${lexerToHTML(current.tokens)} |`, | |
`|`, | |
), | |
); | |
}); | |
return res.reduce((acc, current) => `${acc}${current}\n`, ``); | |
} else if (token.type === "strong") { | |
return `<b>${lexerToHTML(token.tokens!)}</b>`; | |
} else if (token.type === "em") { | |
return `<i>${lexerToHTML(token.tokens!)}</i>`; | |
} else if (token.type === "codespan") { | |
return `<code>${token.text}</code>`; | |
} else if (token.type === "br") { | |
return escapeHTML(token.raw); | |
} else if (token.type === "del") { | |
return `<s>${token.text}</s>`; | |
} else if (token.type === "link") { | |
return `<a href="${token.href}">${token.text}</a>`; | |
} else if (token.type === "image") { | |
return `${token.href} (${token.text})`; | |
} else if (token.type === "text") { | |
return "tokens" in token && token.tokens | |
? lexerToHTML(token.tokens) | |
: escapeHTML(token.text); | |
} | |
console.warn("Unhandled token type", token.type); | |
return `<i>${escapeHTML(token.raw)}</i>`; | |
}); | |
return tokenStrings.join(""); | |
} | |
export function mdToHTML(text: string) { | |
const lexerRes = lexer(text); | |
return lexerToHTML(lexerRes); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment