Last active
March 21, 2019 06:57
-
-
Save yingziwu/ce161ffbbd4adf34a9bb2e24d5e93c42 to your computer and use it in GitHub Desktop.
输入实例地址,tag名称,下载相应tag下的所有图片。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
import requests | |
import os | |
def request_status_list(instance_uri, tag_name, max_id=None): | |
api_endpoint = '/api/v1/timelines/tag/' | |
uri = instance_uri + api_endpoint + tag_name | |
if max_id: | |
parame = {'max_id':max_id} | |
resp = requests.get(uri,params=parame) | |
else: | |
resp = requests.get(uri) | |
status_list = resp.json() | |
return status_list | |
def handle_status(status_list): | |
result_list = [] | |
num = 0 | |
min_id = status_list[0]['id'] | |
for status in status_list: | |
if status['id'] < min_id: | |
min_id = status['id'] | |
else: | |
pass | |
result = {} | |
result['id'] = status['id'] | |
result['author'] = status['account']['username'] | |
result['created_at'] = status['created_at'] | |
result['media_uri_list'] = [] | |
media_attachments = status['media_attachments'] | |
for media_attachment in media_attachments: | |
result['media_uri_list'].append(media_attachment['url'].split('?')[0]) | |
if result['media_uri_list']: | |
result_list.append(result) | |
else: | |
pass | |
num = num + 1 | |
return result_list,min_id,num | |
def download_image(result): | |
filename_list = [] | |
for media_uri in result['media_uri_list']: | |
filename = result['created_at'].split('T')[0] + '-' + result['author'] + '-' + result['id'] + '-' + os.path.split(media_uri)[-1] | |
r = requests.get(media_uri) | |
with open(filename,'wb') as f: | |
f.write(r.content) | |
filename_list.append(filename) | |
return filename_list | |
if __name__ == '__main__': | |
instance_uri = 'https://cmx.social' | |
tag_name = '中午吃啥' | |
globe_result_list = [] | |
max_id = None | |
while True: | |
status_list = request_status_list(instance_uri,tag_name,max_id) | |
tmp_result_list,min_id,num = handle_status(status_list) | |
globe_result_list.extend(tmp_result_list) | |
max_id = min_id | |
if num != 20: | |
break | |
for result in globe_result_list: | |
download_image(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment