-
-
Save fallenflint/118886b3e61903a20d99d528d70f0b58 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import SocketServer | |
import SimpleHTTPServer | |
import urllib | |
import socket | |
import argparse | |
import webbrowser | |
import re | |
class Processor(object): | |
def __init__(self, host, port, site): | |
self.replace_re = re.compile( | |
r'(href=[\'"])(?P<proto>http|https?:)?(/*)%s(/?.*?["\'])' % site, | |
re.U | re.I | re.S | re.M) | |
self.host = host | |
self.port = port | |
self.site = site | |
self.hostport = '{}:{}'.format(host, port) if port != 80 else host | |
def replacer(self, matchobj): | |
result = (''.join(filter(None, matchobj.groups()[:3])) + | |
self.hostport + matchobj.groups()[-1]) | |
if matchobj.groups('proto'): | |
result = result.replace('https://', 'http://') | |
return result | |
def process(self, stream): | |
body = tag = close_tag = script = False | |
counter = 0 | |
buf = [] | |
for letter in stream: | |
if close_tag: # if previous iteration set closing flag | |
close_tag = tag = False | |
if not letter.isalnum() and counter == 6: | |
yield u'™' | |
if letter == '<': # detect tag | |
tag = True | |
tag_content = [] | |
tag_closing = False | |
counter = 0 | |
if tag: # whether it's an opening or closing tag | |
buf.append(letter) | |
tag_content.append(letter) | |
if ''.join(tag_content) == '</': | |
tag_closing = True | |
if letter == '>': # when the tag is being closed | |
close_tag = True | |
tag_words = (''.join(tag_content[1:-1]) | |
.replace('/', '').lower().split()) | |
if 'body' in tag_words: | |
body = False if tag_closing else True | |
if 'script' in tag_words: | |
script = False if tag_closing else True | |
result = ''.join(buf) | |
if 'a' in tag_words: | |
result = self.replace_re.sub(self.replacer, result) | |
yield result[:-1] | |
buf = [] | |
if body and not (tag or script) and letter.isalnum(): | |
counter += 1 | |
else: | |
counter = 0 | |
if not buf: | |
yield letter | |
def get_handler(site, processor): | |
site_url = site if 'http' in site else 'http://' + site | |
class Proxy(SimpleHTTPServer.SimpleHTTPRequestHandler): | |
def do_GET(self): | |
req = urllib.urlopen(site_url + self.path) | |
data = req.read() | |
if ('text/html' in req.headers['content-type'] and | |
'charset' in req.headers['content-type']): | |
charset = req.headers['content-type'].split('charset=')[-1] | |
data = u''.join( | |
processor.process(unicode(data, charset))) | |
self.wfile.write(data.encode(charset)) | |
else: | |
self.wfile.write(data) | |
return Proxy | |
class ReusingServer(SocketServer.ForkingTCPServer): | |
"""TCP Server that doesn't fail if port is still in TIME_WAIT state""" | |
def server_bind(self): | |
self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) | |
self.socket.bind(self.server_address) | |
def main(host, port, site): | |
processor = Processor(host, port, site) | |
httpd = ReusingServer((host, port), get_handler(site, processor)) | |
print "serving at {0}:{1} -> {2}".format(host, port, site) | |
try: | |
# although it's quite dirty, it should work in most cases | |
webbrowser.open("http://{}:{}".format(host, port)) | |
httpd.serve_forever(poll_interval=0.1) | |
except KeyboardInterrupt: | |
httpd.server_close() | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'-p', '--port', | |
help="Local port used to listen to, defaults to 8000", | |
default=8000, type=int) | |
parser.add_argument( | |
'--host', | |
help="Hostname used to listen to, defaults to localhost", | |
default='localhost') | |
parser.add_argument( | |
'-s', '--site', | |
help="Site to use, defaults to habrahabr.ru", | |
default='habrahabr.ru') | |
args = parser.parse_args() | |
main(args.host, args.port, args.site) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment