Last active
July 8, 2025 11:51
-
-
Save 4msar/7119c35a59daad3f48a4ed1efe91bb21 to your computer and use it in GitHub Desktop.
Cloudflare worker to perse meta data from a url
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const docsHtml = `<!DOCTYPE html> | |
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<title>Meta Parser API</title> | |
<style> | |
body { font-family: system-ui, sans-serif; padding: 2rem; background: #f8f9fa; color: #333; } | |
code { background: #eee; padding: 2px 5px; border-radius: 4px; } | |
pre { background: #f1f1f1; padding: 1rem; border-radius: 6px; } | |
</style> | |
</head> | |
<body> | |
<h1>Meta Parser API</h1> | |
<p>This API extracts metadata from a given URL and returns it as JSON.</p> | |
<h2>Usage</h2> | |
<p>GET request with a <code>?url=</code> query:</p> | |
<pre><code>https://meta.msar.me/?url=https://msar.me</code></pre> | |
<h2>Response</h2> | |
<pre><code>{ | |
"title": "Saiful Alam - The Architect of Software Solutions!", | |
"description": "I'm a passionate software engineer, and general technology enthusiast living in Dhaka, Bangladesh. I'm currently working at Cefalo Bangladesh as a Software Engineer. Specially interested in web technologies, and currently working with React and Laravel.", | |
"author": "Saiful Alam Rakib" | |
}</code></pre> | |
<footer style="margin-top: 3rem; font-size: 0.9em; color: #666;"> | |
© <script>document.write(new Date().getFullYear())</script> Meta Parser | <a style="color:black;" target="_blank" href="https://msar.me">msar.me</a> | |
</footer> | |
</body> | |
</html>`; | |
export default docsHtml; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import docsHtml from "./docs.js"; | |
export default { | |
async fetch(request, env, ctx) { | |
const { searchParams } = new URL(request.url); | |
const targetUrl = searchParams.get("url"); | |
if (!targetUrl) { | |
return new Response(docsHtml, { | |
headers: { "Content-Type": "text/html; charset=UTF-8" }, | |
}); | |
// return new Response(JSON.stringify({ error: "Missing 'url' parameter" }), { | |
// status: 400, | |
// headers: { "Content-Type": "application/json" }, | |
// }); | |
} | |
const cache = caches.default; | |
const cacheKey = new Request(targetUrl, request); | |
// Try to find the response in cache | |
let response = await cache.match(cacheKey); | |
if (response && !searchParams.has('refresh')) { | |
return response; | |
} | |
try { | |
const res = await fetch(targetUrl, { | |
method: "GET", | |
headers: { | |
"User-Agent": | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + | |
"(KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36", | |
"Accept": | |
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif," + | |
"image/webp,image/apng,*/*;q=0.8", | |
"Accept-Language": "en-US,en;q=0.9", | |
"Cache-Control": "no-cache", | |
"Pragma": "no-cache", | |
"Connection": "keep-alive", | |
}, | |
redirect: "follow", | |
}); | |
const result = await res.text(); | |
if(!res.ok){ | |
return new Response(JSON.stringify({ error: "Something went wrong",result }, null, 2), { | |
status: 500, | |
headers: { "Content-Type": "application/json" }, | |
}); | |
} | |
const metadata = extractMetaData(result); | |
// return new Response(JSON.stringify(metadata, null, 2), { | |
// headers: { "Content-Type": "application/json" }, | |
// }); | |
// Clone response to save a copy in the cache | |
const responseToCache = new Response(JSON.stringify(metadata, null, 2)); | |
responseToCache.headers.set('Cache-Control', 'public, max-age=86400'); | |
// Store it in the cache | |
ctx.waitUntil(cache.put(cacheKey, responseToCache.clone())); | |
return responseToCache; | |
} catch (err) { | |
return new Response(JSON.stringify({ error: err.message }), { | |
status: 500, | |
headers: { "Content-Type": "application/json" }, | |
}); | |
} | |
}, | |
}; | |
function extractMetaData(html) { | |
const result = {}; | |
// Extract <title> | |
const titleMatch = html.match(/<title[^>]*>([^<]*)<\/title>/i); | |
if (titleMatch) { | |
result.title = titleMatch[1].trim(); | |
} | |
// Extract <meta> tags | |
const metaRegex = /<meta\s+[^>]*>/gi; | |
const attrRegex = /(\w+)=["']([^"']*)["']/g; | |
const metaTags = html.match(metaRegex) || []; | |
for (const tag of metaTags) { | |
const attributes = {}; | |
let match; | |
while ((match = attrRegex.exec(tag))) { | |
attributes[match[1].toLowerCase()] = match[2]; | |
} | |
const key = attributes.name || attributes.property; | |
if (key && attributes.content) { | |
result[key] = attributes.content; | |
} | |
} | |
return result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment