Last active
February 17, 2025 21:33
-
-
Save hcallen/56717cf0d9f1b6a880e4ea2ba06ce852 to your computer and use it in GitHub Desktop.
Download all files from a Gumroad purchase in your library
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Download all files from Gumroad directory | |
""" | |
import re | |
import json | |
import requests | |
from selenium import webdriver | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
CONTENT_URL = '' # directory to download (ex. https://gumroad.com/d/16f471d07602451bfff70ce676728ca3) | |
def sanitize_filename(filename): | |
sanitized = '' | |
invalid_chars = '<>:"/\\|?*' | |
for c in filename: | |
if c not in invalid_chars: | |
sanitized += c | |
return sanitized | |
def download_file(url, filename, file_size): | |
response = requests.get(url, stream=True) | |
response.raise_for_status() | |
progress = 0 | |
with open(filename, 'wb') as file: | |
for chunk in response.iter_content(chunk_size=8192): | |
file.write(chunk) | |
progress += len(chunk) | |
percent_progress = (progress/file_size) * 100 | |
print(f'{filename} - {percent_progress:.2f}%') | |
def init_session(): | |
session = requests.Session() | |
driver = webdriver.Chrome() | |
driver.get(CONTENT_URL) | |
WebDriverWait(driver, 9999999999999).until(EC.url_matches(CONTENT_URL)) | |
for cookie in driver.get_cookies(): | |
session.cookies.set(cookie['name'], cookie['value']) | |
driver.close() | |
return session | |
def main(): | |
s = init_session() | |
r = s.get(CONTENT_URL) | |
match = re.search( | |
r"<script type=\"application/json\" class=\"js-react-on-rails-component\" data-component-name=\"DownloadPageWithContent\".*>(.*)</script>", | |
r.text) | |
match.group(1) | |
content = json.loads(match.group(1)) | |
to_download = [d for d in content['content']['content_items'] if | |
'download_url' in d.keys() and d['download_url']] | |
for item in to_download: | |
dl_url = f'https://gumroad.com/{item['download_url']}' | |
attachment_url = s.get(dl_url).url | |
clean_filename = sanitize_filename(item['file_name']) | |
filename = f'{clean_filename}.{item['extension']}' | |
download_file(attachment_url, filename, item['file_size']) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment