Last active
February 2, 2019 01:11
-
-
Save ulgens/bc7732ad1a454291063c08bebebb460b to your computer and use it in GitHub Desktop.
Humble Bundle Book Download
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install wget | |
# Save purchase/download page to index.html first | |
from lxml import html | |
import wget | |
book_list_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div""" | |
book_name_xpath = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[2]/div[1]/a/text()""" | |
download_links_path = """//*[@id="papers-content"]/div[11]/div[4]/div/div/div/div[{index}]/div/div[3]/div/div/div/div[1]/a/@href""" | |
with open("index.html") as file: | |
content = file.read() | |
tree = html.fromstring(content) | |
books = tree.xpath(book_list_xpath) | |
for index, book in enumerate(books): | |
names = book.xpath(book_name_xpath.format(index=index)) | |
# Garbage (header, download etc.) | |
if not names: | |
continue | |
name = names[0].strip() | |
download_links = book.xpath(download_links_path.format(index=index)) | |
cbz_links = [l for l in download_links if "cbz" in l] | |
if not cbz_links: | |
print(f"Couldn't find CBZ for {name}") | |
continue | |
cbz_link = cbz_links[0] | |
print(f"{index}/{len(books)} -> {name}") | |
wget.download(cbz_link, f"{name}.cbz") | |
print("\n") | |
# 6/80 -> Wynonna Earp Legends: Doc Holliday #1 | |
# 100% [........................................................................] 14278645 / 14278645 | |
# 7/80 -> Wynonna Earp Legends: Doc Holliday #2 | |
# 100% [........................................................................] 16174759 / 16174759 | |
# 8/80 -> The Last Fall | |
# 100% [........................................................................] 69379908 / 69379908 | |
# 9/80 -> Comic Book History of Comics | |
# 9% [...... ] 14303232 / 148509839 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment