Skip to content

Instantly share code, notes, and snippets.

@RyanKung
Created November 5, 2020 18:13
Show Gist options
  • Save RyanKung/648213bc9761af0f707331cafead7f5b to your computer and use it in GitHub Desktop.
Save RyanKung/648213bc9761af0f707331cafead7f5b to your computer and use it in GitHub Desktop.
test
import requests
from lxml import html
from PIL import Image
import io
from itertools import starmap
def grab_list(url):
title_path = '//td[@class="huanhang"]/a/text()'
link_path = '//td[@class="huanhang"]/a/@href'
raw_data = html.fromstring(requests.get(url).text)
return tuple(zip([i.strip() for i in raw_data.xpath(title_path)], raw_data.xpath(link_path)))
def grab_target_base_url(src):
raw = html.fromstring(requests.get(src).text)
base_path = '//input[@id="dp"]/@value'
return raw.xpath(base_path)[0]
def grab_target_gifs(base_url):
ret = []
start = 1
while 1:
resp = requests.get(base_url + str(start) + ".gif")
if not resp.headers['Content-Type'] == 'image/gif':
break
ret.append(resp.content)
start += 1
return ret
def compose_to_pdf(gifs_data, save_path):
gifs = [Image.open(io.BytesIO(i)) for i in gifs_data]
gifs[0].save(save_path + ".pdf", "PDF" ,resolution=100.0, save_all=True, append_images=gifs[1:])
print("Success! %s" % save_path)
return True
def download_pdf(filename, src, dir="reports/"):
return compose_to_pdf(grab_target_gifs(grab_target_base_url(src)), dir + filename)
def lets_go(url):
return list(starmap(download_pdf, grab_list(url)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment