Last active
December 30, 2021 19:36
-
-
Save fmalina/03c84100e84ecc2ae2cd23d60e11959e to your computer and use it in GitHub Desktop.
Download a product image from Amazon by ASIN code with no Amazon affiliate account
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Install dependencies using: | |
pip3 install Pillow, requests, lxml, cssselect | |
Edit your product ASIN IDs | |
Enter API KEY from scraperapi.com | |
""" | |
from io import BytesIO | |
import requests | |
from PIL import Image, UnidentifiedImageError | |
from lxml.html import fromstring | |
PRODUCTS = ['B00RVXBFWQ'] | |
PROXY_API_KEY = "<see scraperapi.com>" | |
PROXY = f"http://api.scraperapi.com?api_key={PROXY_API_KEY}&url=" | |
def save_img(url, name): | |
response = requests.get(PROXY + url, stream=True) | |
out_path = f'static/bookimg/{name}.jpg' | |
try: | |
i = Image.open(BytesIO(response.content)) | |
i.save(out_path) | |
except (UnidentifiedImageError, OSError) as e: | |
print(e) | |
def get_img_by_asin(asin, save_name): | |
url = PROXY + f'https://www.amazon.co.uk/dp/{asin}/' | |
print(url) | |
html = requests.get(url).content | |
dom = fromstring(html) | |
try: | |
img = dom.cssselect("#ebooks-img-canvas img")[-1] | |
save_img(img.get('src'), save_name) | |
except IndexError: | |
print('No image or bad response') | |
if __name__ == '__main__': | |
for asin in PRODUCTS: | |
get_img_by_asin(asin, image_name=asin) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment