kolay-v · April 25, 2025 11:00
diff --git a/1markdown.ts b/1markdown.ts
 import remarkMath from "npm:remark-math";
 import remarkParse from "npm:remark-parse";
 import { unified } from "npm:unified";
 import { type MessageEntity } from "npm:grammy/types";
 // @deno-types="npm:@types/mdast"
 import type { RootContent } from "npm:mdast";

 export function mdV2(markdown: string) {
 	const processor = unified().use(remarkParse).use(remarkMath);

 	const ast = processor.parse(markdown);
 	let text = "";
 	let listDepth = -1;
 	let listOrder: number | null = null;
 	const entities: MessageEntity[] = [];

 	function processNode(
 		node: RootContent,
 		formatters: ("bold" | "italic")[] = [],
 	) {
 		if (node.type === "text") {
 			for (const formatter of formatters) {
 				entities.push({
 					type: formatter,
 					offset: text.length,
 					length: node.value.length,
 				});
 			}
 			text += node.value;
 			return;
 		}
 		if (node.type === "paragraph") {
 			node.children.forEach((child) => processNode(child, formatters));
 			text += "\n";
 			return;
 		}
 		if (node.type === "strong") {
 			node.children.forEach((child) =>
 				processNode(child, [...formatters, "bold"]),
 			);
 			return;
 		}
 		if (node.type === "heading") {
 			node.children.forEach((child) =>
 				processNode(child, [...formatters, "bold"]),
 			);
 			text += "\n";
 			return;
 		}
 		if (node.type === "emphasis") {
 			node.children.forEach((child) =>
 				processNode(child, [...formatters, "italic"]),
 			);
 			return;
 		}
 		if (node.type === "inlineCode") {
 			const codeStart = text.length;
 			text += node.value;
 			entities.push({
 				type: "code",
 				offset: codeStart,
 				length: node.value.length,
 			});
 			return;
 		}
 		if (node.type === "list") {
 			listDepth++;
 			if (node.ordered) {
 				listOrder = node.start ?? null;
 			}
 			node.children.forEach((child) => processNode(child, formatters));
 			if (node.ordered) {
 				listOrder = null;
 			}
 			listDepth--;
 			return;
 		}
 		if (node.type === "listItem") {
 			text += "  ".repeat(listDepth);
 			if (listOrder != null) {
 				text += `${listOrder}. `;
 				listOrder++;
 			} else {
 				text += "* ";
 			}
 			node.children.forEach((child) => processNode(child, formatters));
 			return;
 		}
 		if (node.type === "link") {
 			const startOffset = text.length;
 			node.children.forEach((child) => processNode(child, formatters));
 			entities.push({
 				type: "text_link",
 				offset: startOffset,
 				length: text.length - startOffset,
 				url: node.url,
 			});
 			return;
 		}
 		if (node.type === "code") {
 			const codeStart = text.length;
 			text += node.value;
 			entities.push({
 				type: "pre",
 				language: node.lang ?? undefined,
 				offset: codeStart,
 				length: node.value.length,
 			});
 			return;
 		}
 		if (node.type === "math") {
 			// TODO: Process math
 			return;
 		}
 		if (node.type === "inlineMath") {
 			// TODO: Process math
 			return;
 		}
 		console.log(node);
 		if ("children" in node) {
 			node.children.forEach((child) => processNode(child, formatters));
 			return;
 		}
 	}
 	ast.children.forEach((node) => processNode(node));
 	return { text, entities };
 }
diff --git a/2markdown-marked.ts b/2markdown-marked.ts
 import { type Token, type Tokens, lexer } from "npm:marked";

 const escapeHTML = (text: string): string => {
 	const escapeChar = (c: string): string => {
 		switch (c) {
 			case "&":
 				return "&amp;";
 			case '"':
 				return "&quot;";
 			case "<":
 				return "&lt;";
 			default:
 				return c;
 		}
 	};
 	return text.split("").map(escapeChar).join("");
 };

 function lexerToHTML(tokens: Token[]): string {
 	const tokenStrings: string[] = tokens.map((token) => {
 		if (token.type === "space") {
 			return escapeHTML(token.raw);
 		} else if (token.type === "code") {
 			return `<pre><code class="language-${token.lang}">${
 				token.escaped ? token.text : escapeHTML(token.text)
 			}</code></pre>\n`;
 		} else if (token.type === "blockquote") {
 			return `<blockquote>${lexerToHTML(token.tokens!)}</blockquote>\n`;
 		} else if (token.type === "html") {
 			return `<pre><code class="language-html">${escapeHTML(
 				token.text,
 			)}</code></pre>\n`;
 		} else if (token.type === "heading") {
 			return `<b>${token.text}</b>\n`;
 		} else if (token.type === "hr") {
 			return "------------\n";
 		} else if (token.type === "list") {
 			const items: Tokens.ListItem[] = token.items;
 			return items
 				.map((item, idx) => {
 					const bullet = token.start === "" ? "- " : `${token.start + idx}) `;
 					return `${bullet}${lexerToHTML(item.tokens)}\n`;
 				})
 				.reduce((acc, current) => `${acc}${current}`);
 		} else if (token.type === "list_item") {
 			// ?
 		} else if (token.type === "checkbox") {
 			return token.checked ? "[x] " : "[ ] ";
 		} else if (token.type === "paragraph") {
 			return `${lexerToHTML(token.tokens!)}\n`;
 		} else if (token.type === "table") {
 			const res: string[] = [];
 			const header: Tokens.TableCell[] = token.header;
 			const rows: Tokens.TableCell[][] = token.rows;
 			res.push(
 				header.reduce(
 					(acc, current) => `${acc} ${lexerToHTML(current.tokens)} |`,
 					`|`,
 				),
 			);
 			res.push(Array(header.length).fill("---").join(""));
 			rows.forEach((row) => {
 				res.push(
 					row.reduce(
 						(acc, current) => `${acc} ${lexerToHTML(current.tokens)} |`,
 						`|`,
 					),
 				);
 			});
 			return res.reduce((acc, current) => `${acc}${current}\n`, ``);
 		} else if (token.type === "strong") {
 			return `<b>${lexerToHTML(token.tokens!)}</b>`;
 		} else if (token.type === "em") {
 			return `<i>${lexerToHTML(token.tokens!)}</i>`;
 		} else if (token.type === "codespan") {
 			return `<code>${token.text}</code>`;
 		} else if (token.type === "br") {
 			return escapeHTML(token.raw);
 		} else if (token.type === "del") {
 			return `<s>${token.text}</s>`;
 		} else if (token.type === "link") {
 			return `<a href="${token.href}">${token.text}</a>`;
 		} else if (token.type === "image") {
 			return `${token.href} (${token.text})`;
 		} else if (token.type === "text") {
 			return "tokens" in token && token.tokens
 				? lexerToHTML(token.tokens)
 				: escapeHTML(token.text);
 		}

 		console.warn("Unhandled token type", token.type);
 		return `<i>${escapeHTML(token.raw)}</i>`;
 	});

 	return tokenStrings.join("");
 }

 export function mdToHTML(text: string) {
 	const lexerRes = lexer(text);
 	return lexerToHTML(lexerRes);
 }
	import remarkMath from "npm:remark-math";
	import remarkParse from "npm:remark-parse";
	import { unified } from "npm:unified";
	import { type MessageEntity } from "npm:grammy/types";
	// @deno-types="npm:@types/mdast"
	import type { RootContent } from "npm:mdast";

	export function mdV2(markdown: string) {
	const processor = unified().use(remarkParse).use(remarkMath);

	const ast = processor.parse(markdown);
	let text = "";
	let listDepth = -1;
	let listOrder: number \| null = null;
	const entities: MessageEntity[] = [];

	function processNode(
	node: RootContent,
	formatters: ("bold" \| "italic")[] = [],
	) {
	if (node.type === "text") {
	for (const formatter of formatters) {
	entities.push({
	type: formatter,
	offset: text.length,
	length: node.value.length,
	});
	}
	text += node.value;
	return;
	}
	if (node.type === "paragraph") {
	node.children.forEach((child) => processNode(child, formatters));
	text += "\n";
	return;
	}
	if (node.type === "strong") {
	node.children.forEach((child) =>
	processNode(child, [...formatters, "bold"]),
	);
	return;
	}
	if (node.type === "heading") {
	node.children.forEach((child) =>
	processNode(child, [...formatters, "bold"]),
	);
	text += "\n";
	return;
	}
	if (node.type === "emphasis") {
	node.children.forEach((child) =>
	processNode(child, [...formatters, "italic"]),
	);
	return;
	}
	if (node.type === "inlineCode") {
	const codeStart = text.length;
	text += node.value;
	entities.push({
	type: "code",
	offset: codeStart,
	length: node.value.length,
	});
	return;
	}
	if (node.type === "list") {
	listDepth++;
	if (node.ordered) {
	listOrder = node.start ?? null;
	}
	node.children.forEach((child) => processNode(child, formatters));
	if (node.ordered) {
	listOrder = null;
	}
	listDepth--;
	return;
	}
	if (node.type === "listItem") {
	text += " ".repeat(listDepth);
	if (listOrder != null) {
	text += `${listOrder}. `;
	listOrder++;
	} else {
	text += "* ";
	}
	node.children.forEach((child) => processNode(child, formatters));
	return;
	}
	if (node.type === "link") {
	const startOffset = text.length;
	node.children.forEach((child) => processNode(child, formatters));
	entities.push({
	type: "text_link",
	offset: startOffset,
	length: text.length - startOffset,
	url: node.url,
	});
	return;
	}
	if (node.type === "code") {
	const codeStart = text.length;
	text += node.value;
	entities.push({
	type: "pre",
	language: node.lang ?? undefined,
	offset: codeStart,
	length: node.value.length,
	});
	return;
	}
	if (node.type === "math") {
	// TODO: Process math
	return;
	}
	if (node.type === "inlineMath") {
	// TODO: Process math
	return;
	}
	console.log(node);
	if ("children" in node) {
	node.children.forEach((child) => processNode(child, formatters));
	return;
	}
	}
	ast.children.forEach((node) => processNode(node));
	return { text, entities };
	}
	import { type Token, type Tokens, lexer } from "npm:marked";

	const escapeHTML = (text: string): string => {
	const escapeChar = (c: string): string => {
	switch (c) {
	case "&":
	return "&";
	case '"':
	return """;
	case "<":
	return "<";
	default:
	return c;
	}
	};
	return text.split("").map(escapeChar).join("");
	};

	function lexerToHTML(tokens: Token[]): string {
	const tokenStrings: string[] = tokens.map((token) => {
	if (token.type === "space") {
	return escapeHTML(token.raw);
	} else if (token.type === "code") {
	return `<pre><code class="language-${token.lang}">${
	token.escaped ? token.text : escapeHTML(token.text)
	}</code></pre>\n`;
	} else if (token.type === "blockquote") {
	return `<blockquote>${lexerToHTML(token.tokens!)}</blockquote>\n`;
	} else if (token.type === "html") {
	return `<pre><code class="language-html">${escapeHTML(
	token.text,
	)}</code></pre>\n`;
	} else if (token.type === "heading") {
	return `<b>${token.text}</b>\n`;
	} else if (token.type === "hr") {
	return "------------\n";
	} else if (token.type === "list") {
	const items: Tokens.ListItem[] = token.items;
	return items
	.map((item, idx) => {
	const bullet = token.start === "" ? "- " : `${token.start + idx}) `;
	return `${bullet}${lexerToHTML(item.tokens)}\n`;
	})
	.reduce((acc, current) => `${acc}${current}`);
	} else if (token.type === "list_item") {
	// ?
	} else if (token.type === "checkbox") {
	return token.checked ? "[x] " : "[ ] ";
	} else if (token.type === "paragraph") {
	return `${lexerToHTML(token.tokens!)}\n`;
	} else if (token.type === "table") {
	const res: string[] = [];
	const header: Tokens.TableCell[] = token.header;
	const rows: Tokens.TableCell[][] = token.rows;
	res.push(
	header.reduce(
	(acc, current) => `${acc} ${lexerToHTML(current.tokens)} \|`,
	`\|`,
	),
	);
	res.push(Array(header.length).fill("---").join(""));
	rows.forEach((row) => {
	res.push(
	row.reduce(
	(acc, current) => `${acc} ${lexerToHTML(current.tokens)} \|`,
	`\|`,
	),
	);
	});
	return res.reduce((acc, current) => `${acc}${current}\n`, ``);
	} else if (token.type === "strong") {
	return `<b>${lexerToHTML(token.tokens!)}</b>`;
	} else if (token.type === "em") {
	return `<i>${lexerToHTML(token.tokens!)}</i>`;
	} else if (token.type === "codespan") {
	return `<code>${token.text}</code>`;
	} else if (token.type === "br") {
	return escapeHTML(token.raw);
	} else if (token.type === "del") {
	return `<s>${token.text}</s>`;
	} else if (token.type === "link") {
	return `<a href="${token.href}">${token.text}</a>`;
	} else if (token.type === "image") {
	return `${token.href} (${token.text})`;
	} else if (token.type === "text") {
	return "tokens" in token && token.tokens
	? lexerToHTML(token.tokens)
	: escapeHTML(token.text);
	}

	console.warn("Unhandled token type", token.type);
	return `<i>${escapeHTML(token.raw)}</i>`;
	});

	return tokenStrings.join("");
	}

	export function mdToHTML(text: string) {
	const lexerRes = lexer(text);
	return lexerToHTML(lexerRes);
	}