Last active
February 9, 2023 17:17
-
-
Save justinmeiners/24dcf5904490b621220bed643651f681 to your computer and use it in GitHub Desktop.
A simple caching HTTP proxy in Python 3.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A simple HTTP proxy which does caching of requests. | |
# Inspired by: https://gist.github.com/bxt/5195500 | |
# but updated for Python 3 and some additional sanity improvements: | |
# - shutil is used to serve files in a streaming manner, so the entire data is not loaded into memory. | |
# - the http request is written to a temp file and renamed on success | |
# - forward headers | |
import http.server | |
import socketserver | |
import urllib.request | |
import shutil | |
import os | |
import hashlib | |
import sys | |
class CacheHandler(http.server.SimpleHTTPRequestHandler): | |
def do_GET(self): | |
m = hashlib.md5() | |
m.update(self.path.encode("utf-8")) | |
cache_filename = m.hexdigest() ".cached" | |
if not os.path.exists(cache_filename): | |
print("cache miss") | |
with open(cache_filename + ".temp", "wb") as output: | |
req = urllib.request.Request("http:/" + self.path) | |
# copy request headers | |
for k in self.headers: | |
if k not in ["Host"]: | |
req.add_header(k, self.headers[k]) | |
try: | |
resp = urllib.request.urlopen(req) | |
shutil.copyfileobj(resp, output) | |
os.rename(cache_filename + ".temp", cache_filename) | |
except urllib.error.HTTPError as err: | |
self.send_response(err.code) | |
self.end_headers() | |
return | |
else: | |
print("cache hit") | |
with open(cache_filename, "rb") as cached: | |
self.send_response(200) | |
self.end_headers() | |
shutil.copyfileobj(cached, self.wfile) | |
socketserver.TCPServer.allow_reuse_address = True | |
with socketserver.TCPServer(("", 8000), CacheHandler) as httpd: | |
try: | |
httpd.serve_forever() | |
except KeyboardInterrupt: | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Great job man !
https://gist.github.com/Jegeva/dafe74058ea30495c84c536a142a1144
added signal handling and tcpreuse to be easily spawed from another script in a subprocess without socket reuse problem