Last active
August 1, 2017 18:05
-
-
Save pathcl/58d485df76af2ca6b699 to your computer and use it in GitHub Desktop.
Bottle && selenium
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bottle import run, request, Bottle | |
from selenium import webdriver | |
from jinja2 import Environment, FileSystemLoader | |
import re | |
import os | |
app = Bottle() | |
path = os.getcwd() | |
urls = path + '/urls.txt' | |
images = path + '/img/*.jpg' | |
def render_template(template_filename, context): | |
PATH = os.path.dirname(os.path.abspath(__file__)) | |
TEMPLATE_ENVIRONMENT = Environment(autoescape=False, loader=FileSystemLoader(os.path.join(PATH, 'templates')), | |
trim_blocks=False) | |
return TEMPLATE_ENVIRONMENT.get_template(template_filename).render(context) | |
def create_index_html(): | |
""" | |
Creates an index for urls given, containing every img | |
""" | |
fname = "internet.html" | |
urls = [] | |
images_list = os.listdir(path + '/img/') | |
for img in images_list: | |
img = '/img/' + img | |
urls.append(img) | |
context = { | |
'urls': urls | |
} | |
with open(fname, 'w') as f: | |
html = render_template('index.html', context) | |
f.write(html) | |
@app.route('/upload', method='POST') | |
def upload(): | |
try: | |
if os.path.exists(path + '/urls.txt'): | |
os.remove(urls) | |
os.popen('rm -fr ' + images) | |
upload = request.files.get('upload') | |
name, ext = os.path.splitext(upload.filename) | |
upload.save(path) | |
return ''' | |
<meta http-equiv="refresh" content="0; url=/selenium" /> | |
''' | |
except Exception as e: | |
print(e) | |
@app.route('/selenium', method='GET') | |
def main(): | |
""" | |
Takes urls.txt and creates screenshot of every url | |
TODO: make it async | |
""" | |
with open('urls.txt') as urls: | |
for url in urls: | |
url = url.strip() | |
browser = webdriver.PhantomJS() | |
browser.delete_all_cookies() | |
browser.get(url) | |
browser.set_page_load_timeout(15) | |
url = re.sub(r'(^http://|^https://)', '', url) | |
browser.save_screenshot('img/'+ url + '.jpg') | |
browser.quit() | |
create_index_html() | |
return ''' | |
<meta http-equiv="refresh" content="0; url=/results.html" /> | |
''' | |
run(app, host='127.0.0.1', port='8080', reloader=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment