Skip to content

Instantly share code, notes, and snippets.

@kolay-v
Last active April 25, 2025 11:00
Show Gist options
  • Save kolay-v/f0b615a70c5fe6a7320e753cf53e6f3a to your computer and use it in GitHub Desktop.
Save kolay-v/f0b615a70c5fe6a7320e753cf53e6f3a to your computer and use it in GitHub Desktop.
Markdown to telegram entities
import remarkMath from "npm:remark-math";
import remarkParse from "npm:remark-parse";
import { unified } from "npm:unified";
import { type MessageEntity } from "npm:grammy/types";
// @deno-types="npm:@types/mdast"
import type { RootContent } from "npm:mdast";
export function mdV2(markdown: string) {
const processor = unified().use(remarkParse).use(remarkMath);
const ast = processor.parse(markdown);
let text = "";
let listDepth = -1;
let listOrder: number | null = null;
const entities: MessageEntity[] = [];
function processNode(
node: RootContent,
formatters: ("bold" | "italic")[] = [],
) {
if (node.type === "text") {
for (const formatter of formatters) {
entities.push({
type: formatter,
offset: text.length,
length: node.value.length,
});
}
text += node.value;
return;
}
if (node.type === "paragraph") {
node.children.forEach((child) => processNode(child, formatters));
text += "\n";
return;
}
if (node.type === "strong") {
node.children.forEach((child) =>
processNode(child, [...formatters, "bold"]),
);
return;
}
if (node.type === "heading") {
node.children.forEach((child) =>
processNode(child, [...formatters, "bold"]),
);
text += "\n";
return;
}
if (node.type === "emphasis") {
node.children.forEach((child) =>
processNode(child, [...formatters, "italic"]),
);
return;
}
if (node.type === "inlineCode") {
const codeStart = text.length;
text += node.value;
entities.push({
type: "code",
offset: codeStart,
length: node.value.length,
});
return;
}
if (node.type === "list") {
listDepth++;
if (node.ordered) {
listOrder = node.start ?? null;
}
node.children.forEach((child) => processNode(child, formatters));
if (node.ordered) {
listOrder = null;
}
listDepth--;
return;
}
if (node.type === "listItem") {
text += " ".repeat(listDepth);
if (listOrder != null) {
text += `${listOrder}. `;
listOrder++;
} else {
text += "* ";
}
node.children.forEach((child) => processNode(child, formatters));
return;
}
if (node.type === "link") {
const startOffset = text.length;
node.children.forEach((child) => processNode(child, formatters));
entities.push({
type: "text_link",
offset: startOffset,
length: text.length - startOffset,
url: node.url,
});
return;
}
if (node.type === "code") {
const codeStart = text.length;
text += node.value;
entities.push({
type: "pre",
language: node.lang ?? undefined,
offset: codeStart,
length: node.value.length,
});
return;
}
if (node.type === "math") {
// TODO: Process math
return;
}
if (node.type === "inlineMath") {
// TODO: Process math
return;
}
console.log(node);
if ("children" in node) {
node.children.forEach((child) => processNode(child, formatters));
return;
}
}
ast.children.forEach((node) => processNode(node));
return { text, entities };
}
import { type Token, type Tokens, lexer } from "npm:marked";
const escapeHTML = (text: string): string => {
const escapeChar = (c: string): string => {
switch (c) {
case "&":
return "&";
case '"':
return """;
case "<":
return "&lt;";
default:
return c;
}
};
return text.split("").map(escapeChar).join("");
};
function lexerToHTML(tokens: Token[]): string {
const tokenStrings: string[] = tokens.map((token) => {
if (token.type === "space") {
return escapeHTML(token.raw);
} else if (token.type === "code") {
return `<pre><code class="language-${token.lang}">${
token.escaped ? token.text : escapeHTML(token.text)
}</code></pre>\n`;
} else if (token.type === "blockquote") {
return `<blockquote>${lexerToHTML(token.tokens!)}</blockquote>\n`;
} else if (token.type === "html") {
return `<pre><code class="language-html">${escapeHTML(
token.text,
)}</code></pre>\n`;
} else if (token.type === "heading") {
return `<b>${token.text}</b>\n`;
} else if (token.type === "hr") {
return "------------\n";
} else if (token.type === "list") {
const items: Tokens.ListItem[] = token.items;
return items
.map((item, idx) => {
const bullet = token.start === "" ? "- " : `${token.start + idx}) `;
return `${bullet}${lexerToHTML(item.tokens)}\n`;
})
.reduce((acc, current) => `${acc}${current}`);
} else if (token.type === "list_item") {
// ?
} else if (token.type === "checkbox") {
return token.checked ? "[x] " : "[ ] ";
} else if (token.type === "paragraph") {
return `${lexerToHTML(token.tokens!)}\n`;
} else if (token.type === "table") {
const res: string[] = [];
const header: Tokens.TableCell[] = token.header;
const rows: Tokens.TableCell[][] = token.rows;
res.push(
header.reduce(
(acc, current) => `${acc} ${lexerToHTML(current.tokens)} |`,
`|`,
),
);
res.push(Array(header.length).fill("---").join(""));
rows.forEach((row) => {
res.push(
row.reduce(
(acc, current) => `${acc} ${lexerToHTML(current.tokens)} |`,
`|`,
),
);
});
return res.reduce((acc, current) => `${acc}${current}\n`, ``);
} else if (token.type === "strong") {
return `<b>${lexerToHTML(token.tokens!)}</b>`;
} else if (token.type === "em") {
return `<i>${lexerToHTML(token.tokens!)}</i>`;
} else if (token.type === "codespan") {
return `<code>${token.text}</code>`;
} else if (token.type === "br") {
return escapeHTML(token.raw);
} else if (token.type === "del") {
return `<s>${token.text}</s>`;
} else if (token.type === "link") {
return `<a href="${token.href}">${token.text}</a>`;
} else if (token.type === "image") {
return `${token.href} (${token.text})`;
} else if (token.type === "text") {
return "tokens" in token && token.tokens
? lexerToHTML(token.tokens)
: escapeHTML(token.text);
}
console.warn("Unhandled token type", token.type);
return `<i>${escapeHTML(token.raw)}</i>`;
});
return tokenStrings.join("");
}
export function mdToHTML(text: string) {
const lexerRes = lexer(text);
return lexerToHTML(lexerRes);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment