Skip to content

Instantly share code, notes, and snippets.

@jkctech
Created August 18, 2024 20:14
Show Gist options
  • Save jkctech/838ecc20e71c5de30b152fb437d66709 to your computer and use it in GitHub Desktop.
Save jkctech/838ecc20e71c5de30b152fb437d66709 to your computer and use it in GitHub Desktop.
Simple web relay using Flask and Selenium to return raw content of webpage. Useful for scraping dynamic loaded webpages.
import time
import logging
from flask import Flask
from flask import request
from flask import Response
from selenium import webdriver
# SETTINGS
port = 80
defaultwait = 3
defaultlogging = False
customlogging = True
headless = True
# Create webdriver, headless mode
options = webdriver.FirefoxOptions()
if headless:
options.add_argument('--headless')
driver = webdriver.Firefox(options=options)
# Disable logging
log = logging.getLogger('werkzeug')
log.disabled = not defaultlogging
# Request page from webdriver, log and send pagecontent back
def getPage(url, delay=defaultwait):
driver.get(url)
time.sleep(delay)
raw = driver.page_source
if customlogging:
print("GET: {}".format(url))
return raw
# Define flask app
app = Flask("server")
# Main route
# Args:
# - url (Url to relay)
# - delay (Seconds to wait for page to finish loading)
@app.route('/')
def root():
url = request.args.get('url', default=None, type=str)
delay = request.args.get('delay', default=defaultwait, type=int)
return Response(getPage(url, delay), mimetype='text/html')
# Run app indefinitely
app.run(debug=False, port=port)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment