Last active
November 13, 2016 18:31
-
-
Save tesu/3516618b582611daa6c2e3be0164a165 to your computer and use it in GitHub Desktop.
tallies up your sadpanda favorite tags
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import codecs | |
import csv | |
import json | |
import operator | |
import re | |
import requests | |
import time | |
MEMBER_ID = '' | |
PASS_HASH = '' | |
sleep = 5 | |
page = 0 | |
d = [] | |
doujins = [] | |
freq = {} | |
freqnn = {} | |
cookies = { | |
'ipb_member_id': MEMBER_ID, | |
'ipb_pass_hash': PASS_HASH, | |
's': 'f3fefd0f1b529496b358ce7912b0da55cd7809984dbd4ed66ecaab18891240516ab5653bb516af8a23dd53c5121d328c37e173530aa8ff8af889403f4869db2f' | |
} | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.87 Safari/537.36' | |
} | |
def fetch_api(d): | |
json = { | |
'method': 'gdata', | |
'gidlist': d[0:25], | |
'namespace': 1 | |
} | |
r = requests.post('https://exhentai.org/api.php', json=json, cookies=cookies) | |
if 'Your IP address has been temporarily banned' in r.text: | |
print(r.text.encode('ascii',errors='ignore').decode('ascii'), flush=True) | |
return | |
for doujin in r.json()['gmetadata']: | |
doujins.append(doujin) | |
time.sleep(sleep) | |
print(str(len(d)) + ' doujins left to fetch from api.', flush=True) | |
if len(d) > 25: | |
fetch_api(d[25:]) | |
try: | |
s = requests.Session() | |
r = s.get('https://exhentai.org/favorites.php', cookies=cookies, headers=headers) | |
while 'No hits found' not in r.text: | |
if 'Your IP address has been temporarily banned' in r.text: | |
print(r.text.encode('ascii',errors='ignore').decode('ascii'), flush=True) | |
break | |
for l in re.finditer(r'https://exhentai\.org/g/(\d+)/([^/]+)/', r.text): | |
d.append([int(l.group(1)), l.group(2)]) | |
page = page + 1 | |
time.sleep(sleep/5) | |
r = s.get('https://exhentai.org/favorites.php?page='+str(page), cookies=cookies, headers=headers) | |
print(str(len(d)) + ' doujins fetched from favorites.', flush=True) | |
print('Stopped fetching doujins from favorites.', flush=True) | |
fetch_api(d) | |
print('Stopped fetching tags from api.', flush=True) | |
finally: | |
for d in doujins: | |
for t in d['tags']: | |
c = freq.get(t, 0) | |
freq[t] = c+1 | |
t = re.sub(r'^[^:]+:','',t) | |
c = freqnn.get(t, 0) | |
freqnn[t] = c+1 | |
with open('freq.csv', 'w') as file: | |
w = csv.DictWriter(file, fieldnames=['tag', 'frequency']) | |
w.writeheader() | |
for t, c in sorted(freq.items(), key=operator.itemgetter(1), reverse=True): | |
w.writerow({'tag': t, 'frequency': c}) | |
with open('freqnn.csv', 'w') as file: | |
w = csv.DictWriter(file, fieldnames=['tag', 'frequency']) | |
w.writeheader() | |
for t, c in sorted(freqnn.items(), key=operator.itemgetter(1), reverse=True): | |
w.writerow({'tag': t, 'frequency': c}) | |
with codecs.open('doujins.json', 'w', 'utf-8') as file: | |
json.dump(doujins, file, indent=4, sort_keys=True, ensure_ascii=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment