Last active
March 4, 2021 10:45
-
-
Save GregBrimble/cf110434f6e48829125c3eb2ca70ce3d to your computer and use it in GitHub Desktop.
A Cloudflare Workers proxy for Archive.org
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
addEventListener('fetch', event => { | |
event.respondWith(handleError(handleRequest, event.request)) | |
}) | |
class InvalidURLPathnameError extends Error { | |
constructor(pathname) { | |
super(`Invalid URL pathname: ${pathname}`) | |
this.name = "InvalidURLPathnameError" | |
} | |
} | |
const BOTTLE_REGEX = /^(.*)\.\w+\.bottle(?:\.\d+)?\.tar\.gz$/i | |
const extractPartsFromURL = (url) => { | |
try { | |
const parts = url.pathname.split('/') | |
const org = parts[1] | |
const packageNameVersion = BOTTLE_REGEX.exec(parts[3])[1] | |
const packageName = packageNameVersion.substring(0, packageNameVersion.lastIndexOf('-')) | |
switch (org) { | |
case 'linuxbrew': { | |
return { | |
item: org, | |
file: `bottles/${parts[3]}` | |
} | |
} | |
case 'brewsci': { | |
const repo = parts[2] === 'core' ? 'bottles' : `bottles-${parts[2]}` | |
return { | |
item: org, | |
file: `${repo}/${parts[3]}` | |
} | |
} | |
default: { | |
const repo = parts[2] === 'bottles' ? 'core' : parts[2].split('-')[1] | |
return { | |
item: `${org}-${repo}-${packageName}`, | |
file: parts[3] | |
} | |
} | |
} | |
} catch (error) { | |
throw new InvalidURLPathnameError(url.pathname) | |
} | |
} | |
const flattenRedirectResponseToRequest = (response, request) => { | |
if (response.headers.has('location')) { | |
let redirectURL = response.headers.get('location') | |
if (!redirectURL.startsWith('http')) { | |
// redirectURL is a relative URI and must be resolved against the request URI: https://tools.ietf.org/html/rfc7231#section-7.1.2 | |
redirectURL = new URL(redirectURL, request).toString() | |
} | |
return new Request(redirectURL) | |
} | |
} | |
const getFileMetadata = async ({ item, file }) => { | |
const request = new Request(`https://${item}.s3.us.archive.org/${file}`) | |
let response = await fetch(request, { method: 'HEAD', cf: { cacheEverything: true }}) | |
const redirectRequest = flattenRedirectResponseToRequest(response, request) | |
if (redirectRequest) { | |
response = await fetch(redirectRequest, { method: 'HEAD', cf: { cacheEverything: true, cacheKey: `metadata:${item}:${file}` }}) | |
} | |
if (response.ok) { | |
return { | |
digest: `MD5=${response.headers.get('ETag').replaceAll('"', '')}` | |
// Also available: upload time, file size, meta description etc. | |
} | |
} else { | |
return {} | |
} | |
} | |
const handleError = async (handler, request) => { | |
try { | |
return await handler(request) | |
} catch (error) { | |
if (error instanceof InvalidURLPathnameError) { | |
return new Response("Could not parse URL pathname", { status: 400 }) | |
} else { | |
return new Response(error.message) | |
// TODO: Log error | |
return new Response("Internal Error", { status: 500 }) | |
} | |
} | |
} | |
/*** | |
* Main handler function | |
*/ | |
const handleRequest = async (request) => { | |
const url = new URL(request.url) | |
const pathname = url.pathname | |
const { item, file } = extractPartsFromURL(url) | |
const transformedRequest = new Request(`https://archive.org/download/${item}/${file}`) | |
// Makes a GET request to the transformed URL (on Archive.org) and cache the response | |
// We can easily customize the caching rules: https://developers.cloudflare.com/workers/examples/cache-using-fetch | |
let response = await fetch(transformedRequest, { cf: { cacheEverything: true }}) | |
const redirectRequest = flattenRedirectResponseToRequest(transformedRequest, request) | |
if (redirectRequest) { | |
response = await fetch(redirectRequest, { cf: { cacheEverything: true, cacheKey: `${item}:${file}` }}) | |
} | |
if (response.ok) { | |
const metadata = await getFileMetadata({ item, file }) | |
return new Response(response.body, { | |
status: response.status, | |
statusText: response.statusText, | |
headers: { | |
...(Object.fromEntries(response.headers.entries())), | |
"X-ARCHIVE-URL": transformedRequest.url, | |
...metadata | |
} | |
}) | |
} else if (response.status === 404) { | |
return new Response("Not Found", { status: 404 }) | |
} else { | |
return new Response("Bad Gateway", { status: 502 }) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment