Created
November 5, 2020 18:13
-
-
Save RyanKung/648213bc9761af0f707331cafead7f5b to your computer and use it in GitHub Desktop.
test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml import html | |
from PIL import Image | |
import io | |
from itertools import starmap | |
def grab_list(url): | |
title_path = '//td[@class="huanhang"]/a/text()' | |
link_path = '//td[@class="huanhang"]/a/@href' | |
raw_data = html.fromstring(requests.get(url).text) | |
return tuple(zip([i.strip() for i in raw_data.xpath(title_path)], raw_data.xpath(link_path))) | |
def grab_target_base_url(src): | |
raw = html.fromstring(requests.get(src).text) | |
base_path = '//input[@id="dp"]/@value' | |
return raw.xpath(base_path)[0] | |
def grab_target_gifs(base_url): | |
ret = [] | |
start = 1 | |
while 1: | |
resp = requests.get(base_url + str(start) + ".gif") | |
if not resp.headers['Content-Type'] == 'image/gif': | |
break | |
ret.append(resp.content) | |
start += 1 | |
return ret | |
def compose_to_pdf(gifs_data, save_path): | |
gifs = [Image.open(io.BytesIO(i)) for i in gifs_data] | |
gifs[0].save(save_path + ".pdf", "PDF" ,resolution=100.0, save_all=True, append_images=gifs[1:]) | |
print("Success! %s" % save_path) | |
return True | |
def download_pdf(filename, src, dir="reports/"): | |
return compose_to_pdf(grab_target_gifs(grab_target_base_url(src)), dir + filename) | |
def lets_go(url): | |
return list(starmap(download_pdf, grab_list(url))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment