Last active
March 28, 2021 14:30
-
-
Save WP-LKL/8900138462e6003f3ce7793e64e15ca4 to your computer and use it in GitHub Desktop.
Minimalist parallel image downloading with python. Please consult TOS/robots.txt prior to use.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import concurrent.futures | |
import urllib.request | |
import uuid | |
import requests | |
def getURLs(filename : str) -> list: | |
with open(filename, 'r') as f: | |
urls = f.read().splitlines() | |
return urls | |
URLS = getURLs("image_urls.txt") | |
dir = "img/b01_" # folder/batchPrefix | |
# Retrieve a single url and download its image | |
def load_url(url): | |
response = requests.get(url) | |
# ./dir/BatchPrefix_uuid.jpg | |
file = open(dir+uuid.uuid4().hex[:7]+".jpg", "wb") | |
file.write(response.content) | |
file.close() | |
with concurrent.futures.ThreadPoolExecutor(max_workers=None) as executor: | |
future_to_url = {executor.submit(load_url, url): url for url in URLS} | |
concurrent.futures.as_completed(future_to_url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment