Skip to content

Instantly share code, notes, and snippets.

@gradha
Created August 5, 2018 11:30
Show Gist options
  • Select an option

  • Save gradha/83385ac91ad279c978d7647e4db5f56f to your computer and use it in GitHub Desktop.

Select an option

Save gradha/83385ac91ad279c978d7647e4db5f56f to your computer and use it in GitHub Desktop.
Simple python cgi-bin proxy to fetch rss feeds from Europe (see https://github.com/ViennaRSS/vienna-rss/issues/1166)
#!/usr/bin/env python
print "Content-Type: text/xml"
print
# To use this script, put it on your host supporting python 2.x, in the cgi-bin
# directory. Then test with your browser something like
# https://your.host.com/cgi-bin/tumblr-gdpr-cgi-bin-proxy?u=hematocritico.tumblr.com/rss
# Hopefully that works and you get some rss xml.
import cgi
import cgitb; cgitb.enable()
import urllib2
import re
import sys
def fetch_rss(partial_url):
REGEX = re.compile('tumblr_form_key.*?content="([^"]*)')
#FEED_URL = "https://hematocritico.tumblr.com/rss"
FEED_URL = "https://" + partial_url
CONSENT_URL = "https://www.tumblr.com/svc/privacy/consent"
REFERER = ("https://www.tumblr.com/privacy/consent?redirect=" +
urllib2.quote(FEED_URL, safe=""))
CONTENT_TYPE = "application/json"
USER_AGENT_KEY = 'User-Agent'
USER_AGENT_DATA = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11'
DATA = """{
"eu_resident": true,
"gdpr_is_acceptable_age": true,
"gdpr_consent_core": true,
"gdpr_consent_first_party_ads": true,
"gdpr_consent_third_party_ads": true,
"gdpr_consent_search_history": true,
"redirect_to": "%s"
}""" % (FEED_URL)
hsh = urllib2.HTTPSHandler()
#hsh.set_http_debuglevel(1)
cookie_processor = urllib2.HTTPCookieProcessor()
opener = urllib2.build_opener(hsh, cookie_processor)
urllib2.install_opener(opener)
#print("\tFetch tumblr form key")
request = urllib2.Request(FEED_URL, headers = {
USER_AGENT_KEY: USER_AGENT_DATA,
})
a = urllib2.urlopen(request).read()
m = REGEX.search(a)
tumblr_form_key = m.group(1)
#print("\tRequest cookie consent (pfg)")
request = urllib2.Request(CONSENT_URL, DATA, {
"Content-Type": CONTENT_TYPE,
#"X-Requested-With": "XMLHttpRequest",
"Origin": "https://www.tumblr.com",
USER_AGENT_KEY: USER_AGENT_DATA,
"x-tumblr-form-key": tumblr_form_key,
"referer": REFERER})
a = urllib2.urlopen(request).read()
#print("\tTry to fetch again rss")
request = urllib2.Request(FEED_URL, headers = {
USER_AGENT_KEY: USER_AGENT_DATA,
})
return urllib2.urlopen(request).read()
arguments = cgi.FieldStorage()
print fetch_rss(arguments.getfirst("u", "nourl"))
@gyab
Copy link
Copy Markdown

gyab commented Jul 17, 2019

Thank you for this.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment