Last active
January 4, 2016 17:09
-
-
Save wuchong/8651764 to your computer and use it in GitHub Desktop.
Python爬虫:暴走漫画上的GIF趣图
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
#--------------------------------------- | |
# 程序:暴走漫画的GIF趣图爬虫 | |
# 版本: 0.1 | |
# 作者:WuChong | |
# 日期:2014-01-27 | |
# 语言:Python 3.3 | |
# 说明:能自定义下载页数,默认全部下载,未加多线程功能 | |
#--------------------------------------- | |
import urllib.request | |
import bs4,os | |
page_sum = 1 #设置下载页数 | |
path = os.getcwd() | |
path = os.path.join(path,'暴走GIF') | |
if not os.path.exists(path): | |
os.mkdir(path) #创建文件夹 | |
url = "http://baozoumanhua.com/gif/month/page/" #url地址 | |
headers = { #伪装浏览器 | |
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' | |
' Chrome/32.0.1700.76 Safari/537.36' | |
} | |
for count in range(page_sum): | |
req = urllib.request.Request( | |
url = url+str(count+1), | |
headers = headers | |
) | |
print(req.full_url) | |
content = urllib.request.urlopen(req).read() | |
soup = bs4.BeautifulSoup(content) # BeautifulSoup | |
img_content = soup.findAll('img',attrs={'style':'width:460px'}) | |
url_list = [img['src'] for img in img_content] #列表推导 url | |
title_list = [img['alt'] for img in img_content] #图片名称 | |
for i in range(url_list.__len__()) : | |
imgurl = url_list[i] | |
filename = path + os.sep +title_list[i] + ".gif" | |
print(filename+":"+imgurl) #打印下载信息 | |
urllib.request.urlretrieve(imgurl,filename) #下载图片 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment