Skip to content

Instantly share code, notes, and snippets.

@nix010
Last active February 26, 2019 09:49
Show Gist options
  • Select an option

  • Save nix010/b8d658c00761098aecd8d1a09ea5e9d3 to your computer and use it in GitHub Desktop.

Select an option

Save nix010/b8d658c00761098aecd8d1a09ea5e9d3 to your computer and use it in GitHub Desktop.
Crawl pictures from Pinterest by search a keyword | 26 Jan, 2018 (TESED )
from bs4 import BeautifulSoup as BS
import requests
class BaseCrawler(object):
api_url = None
default_headers = {
'Accept-Language' :'en-US,en,q=0.9,vi;q=0.8',
'Cache-Control' :'no-cache',
'Connection' :'keep-alive',
'Content-Type' :'application/json',
'Accept' :'*/*',
'User-Agent' :'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/62.0.3202.94 Chrome/62.0.3202.94 Safari/537.36'
}
def __init__(self,email,password,user_id,**kwargs):
from django.contrib.auth.models import User
self.r = requests.Session()
def _get(self,url,params=None,headers=None,cookies=None):
if params is None:
params = {}
if cookies is None:
cookies = {}
h=self.default_headers
if headers:
h.update(headers)
return self.r.get(url,params=params,headers=h,cookies=cookies)
def _post(self,url,params=None,data=None,headers=None):
h=self.default_headers
if headers:
h.update(headers)
return self.r.post(url,data=data,headers=h)
def save_data_to_db(self):
pass
def crawl_now(self):
r = self.call_request()
self.parse_response_data(r)
self.save_data_to_db()
pass
def call_request(self):
pass
def parse_response_data(self,response):
pass
import json
from core.crawlers.base_crawler import BaseCrawler # Just some helpers to call API
class PinterestCrawler(BaseCrawler):
api_url = 'https://www.pinterest.com/resource/SearchResource/get/'
default_headers = dict({
'X-Requested-With' : 'XMLHttpRequest',
'X-Pinterest-AppState' : 'active',
},**BaseCrawler.default_headers)
def __init__(self,keyword):
self.r = requests.Session()
self.keyword = keyword
self.params = {
'source_url' : '/search/pins/?q=%s' % keyword,
'rs' : 'typed',
'data' : json.dumps({
'options' : {
'bookmarks' :['Y2JVSG81V2sxcmNHRlpWM1J5VFVad1YxWlVSbGhXTVZwSlZGWlZNVlV3TVZkalJFSlhUVzVTVkZWWGN6RldNa3BKVW14S1YxSnNjR2hYVm1ONFpXc3hWMVZ1U2xaaE0wSnpWVzAxUTJWR1pIRlVibVJXVW10d1NGa3dhRWRXVjBWNFUyeFNXbFpGV2pOV01GcExWMWRPUms5V1pGTmhNMEl5Vm1wSmQyVkdUblJXYkdScVVsWmFWMWxzYUVOaFJteFlaRVphYkdKSFVucFhhMXAzVkRGS2RHVkVRbGRXZWtJMFZrZDRXbVF4V2xWUmJGWnBWa1ZhV1ZkV1ZtRmtNVTVIVm14c1lWSlViRlJWYWtwUFRrWmFTR1ZHVGxWTmEzQlhWREZhVjFWc1pFaFVWR3hRWWtaYVNsbHVjRk5pUmtsNFkwVmFWazFxUm5wV1IzaEtaVVprZFZGc1ZtbFNNVXBOVjFaV1ZrMVdaRWRVYmxKT1ZqQmFXRlZ0ZEhkTlJscEZVbXhPYW1GNlZsZFVNVlpYVmtaa1NWRnNSbGRoTVhCSFZGWmFVMVpzY0VkVGJYaFRWa1phU2xaVVNYZGxSbEp6VjJ0YVYyRnNXbGxaYTFwTFVURndXR042VmxSU2EzQXdXVlZWTVdKSFJYZGpTR2hYVFc1U1ZGVnFTa2RXTWs1SFZteGFWMUpyY0ZKV1YzUnJWVEpPYzFWdVVtcFNWWEJ6V1Zod1IyVkdWbk5WYTA1WVlYcEdlVlJWVWtkV1YwWnlZMFpDV21KR1ZqUmFSVnBoVmxVeFJVMVVhR0ZoYTJ0M1ZGVmtVbVZHY0VoU2JURk9aVzF6ZDFkWE1WWk5SVFZGV2tkNFlXRnNSVEZVVmxKYVpWWndWV0pIYUdGaVZtdDRWMnhTYjJGc2NIRmhSM1JhWld4V00xUlZVbk5pUlRWeFZGUk9ZVlpHUlhsVU1HUkxZV3N4TmxGVVFrNVNSVlV3Vkd0U1QySkZNWEZYYldoYVZWUXdPV1pIVW1wWmVrbDZUVlJWTkU0eVVtMVpiVkUwVGpKWmVFOVVXbWhPYlVsNlRucGpNRTFIU1RKTlZFRTBXWHBDYUU5VVFYaFphbFV4V1hwV2JGa3lSbXROUjBsNlQxUmplRTVFVG14T2VrSnBUV3BhYkZwWFVUMD06VUhvNVQySXlOV3htUkU1clQxUk5NRnBVUVRKUFJHczFUVEpSZUU0eVZUVmFSRVV6VDBkT2JGbFVTVFJaVjFwdFRsUm5OVTU2U1hoYWFscG9UbnBzYkU1RVFYaGFWRUY1V2tkWk1rNHlSVFJOZW1ocVdWUmFhMDlYVVhoYVJGazl8ZmQyYWVhMzUwMjEyNzUzMTVhZTdmNDIxNzJkZjU0NDk0N2IxNjZmNTViOTkxOTQ0N2FjYTczZmE3OGJlMjliZg=='],
'filters' : "",
'query' : "%s" % keyword,
'scope' : "pins"
},
"context" : {}
})
}
def parse_response_data(self,response):
resp = response.json()['resource_response']
if resp.get('error'):
raise Exception(str(response) + resp.get('error'))
# This is the results after parse
self.parsed_data = [ pic.get('images')['orig']['url'] for pic in resp.get('data',[]) ]
def call_request(self):
return self._get(self.api_url,params=self.params)
''' Sample a Pinterest reqwuest params
:bookmarks : a string you can get from catching the request from "/resource/SearchResourceBase/get/"
in the ChomeDeveloperTools (find it in XHR section when you enter a search on the web). The one i use
is hard-cored into request because it work :v . (TESTED) 26 Jan 2018
{"options":{"bookmarks":["Y2JVSG81V2sxcmNHRlpWM1J5VFVad1YxWlVSbGhXTVZwSlZGWlZNVlV3TVZkalJFSlhUVzVTVkZWWGN6RldNa3BKVW14S1YxSnNjR2hYVm1ONFpXc3hWMVZ1U2xaaE0wSnpWVzAxUTJWR1pIRlVibVJXVW10d1NGa3dhRWRXVjBWNFUyeFNXbFpGV2pOV01GcExWMWRPUms5V1pGTmhNMEl5Vm1wSmQyVkdUblJXYkdScVVsWmFWMWxzYUVOaFJteFlaRVphYkdKSFVucFhhMXAzVkRGS2RHVkVRbGRXZWtJMFZrZDRXbVF4V2xWUmJGWnBWa1ZhV1ZkV1ZtRmtNVTVIVm14c1lWSlViRlJWYWtwUFRrWmFTR1ZHVGxWTmEzQlhWREZhVjFWc1pFaFVWR3hRWWtaYVNsbHVjRk5pUmtsNFkwVmFWazFxUm5wV1IzaEtaVVprZFZGc1ZtbFNNVXBOVjFaV1ZrMVdaRWRVYmxKT1ZqQmFXRlZ0ZEhkTlJscEZVbXhPYW1GNlZsZFVNVlpYVmtaa1NWRnNSbGRoTVhCSFZGWmFVMVpzY0VkVGJYaFRWa1phU2xaVVNYZGxSbEp6VjJ0YVYyRnNXbGxaYTFwTFVURndXR042VmxSU2EzQXdXVlZWTVdKSFJYZGpTR2hYVFc1U1ZGVnFTa2RXTWs1SFZteGFWMUpyY0ZKV1YzUnJWVEpPYzFWdVVtcFNWWEJ6V1Zod1IyVkdWbk5WYTA1WVlYcEdlVlJWVWtkV1YwWnlZMFpDV21KR1ZqUmFSVnBoVmxVeFJVMVVhR0ZoYTJ0M1ZGVmtVbVZHY0VoU2JURk9aVzF6ZDFkWE1WWk5SVFZGV2tkNFlXRnNSVEZVVmxKYVpWWndWV0pIYUdGaVZtdDRWMnhTYjJGc2NIRmhSM1JhWld4V00xUlZVbk5pUlRWeFZGUk9ZVlpHUlhsVU1HUkxZV3N4TmxGVVFrNVNSVlV3Vkd0U1QySkZNWEZYYldoYVZWUXdPV1pIVW1wWmVrbDZUVlJWTkU0eVVtMVpiVkUwVGpKWmVFOVVXbWhPYlVsNlRucGpNRTFIU1RKTlZFRTBXWHBDYUU5VVFYaFphbFV4V1hwV2JGa3lSbXROUjBsNlQxUmplRTVFVG14T2VrSnBUV3BhYkZwWFVUMD06VUhvNVQySXlOV3htUkU1clQxUk5NRnBVUVRKUFJHczFUVEpSZUU0eVZUVmFSRVV6VDBkT2JGbFVTVFJaVjFwdFRsUm5OVTU2U1hoYWFscG9UbnBzYkU1RVFYaGFWRUY1V2tkWk1rNHlSVFJOZW1ocVdWUmFhMDlYVVhoYVJGazl8ZmQyYWVhMzUwMjEyNzUzMTVhZTdmNDIxNzJkZjU0NDk0N2IxNjZmNTViOTkxOTQ0N2FjYTczZmE3OGJlMjliZg=="],"filters":"","query":"harry potter","scope":"pins"},"context":{}}
'''
"request_identifier":"874307195690",
"resource_data_cache":[...],
"resource":{...},
"client_context":{...},
"resource_response":{
"data":[
{
"domain":"Uploaded by user",
"done_by_me":false,
"requires_advertiser_attribution":false,
"videos":null,
"tracking_params":"CwABAAAADDg3NDMwNzE5NTY5MAA",
"aggregated_pin_data":{
"did_it_data":{
"recommend_scores":[
{
"count":0,
"score":1
},
{
"count":0,
"score":0.5
},
{
"count":0,
"score":0
}
],
"rating":-1,
"user_count":3,
"tags":[
],
"images_count":0,
"recommended_count":3,
"details_count":3,
"type":"aggregateddiditdata"
},
"id":"4793619930918430080",
"aggregated_stats":{
"saves":38841,
"done":3
}
},
"image_signature":"c839d59c4cf008662871ed797ee84357",
"like_count":0,
"images":{
"736x":{
"url":"https://i.pinimg.com/736x/c8/39/d5/c839d59c4cf008662871ed797ee84357--lily-potter-harry-potter-.jpg",
"width":347,
"height":498
},
"474x":{
"url":"https://i.pinimg.com/474x/c8/39/d5/c839d59c4cf008662871ed797ee84357--lily-potter-harry-potter-.jpg",
"width":347,
"height":498
},
"orig":{
"url":"https://s-media-cache-ak0.pinimg.com/originals/c8/39/d5/c839d59c4cf008662871ed797ee84357.jpg",
"width":347,
"height":498
},
"136x136":{
"url":"https://i.pinimg.com/136x136/c8/39/d5/c839d59c4cf008662871ed797ee84357--lily-potter-harry-potter-.jpg",
"width":136,
"height":136
},
"236x":{
"url":"https://i.pinimg.com/236x/c8/39/d5/c839d59c4cf008662871ed797ee84357--lily-potter-harry-potter-.jpg",
"width":236,
"height":338
}
},
"id":"324259241902931702",
"price_currency":"USD",
"is_promoted":false,
"description_html":"C",
"privacy":"public",
"grid_description":"C",
"comments":{
"bookmark":null,
"data":[
],
"uri":"/v3/pins/324259241902931702/comments/"
},
"access":[
],
"comment_count":0,
"board":{
"is_collaborative":false,
"layout":"default",
"name":"Creative",
"privacy":"public",
"url":"/phamthaominh197/creative/",
"owner":{
"id":"324259379329222479"
},
"followed_by_me":false,
"type":"board",
"id":"324259310610038895",
"image_thumbnail_url":"https://s-media-cache-ak0.pinimg.com/upload/324259310610038895_board_thumbnail_2017-12-23-05-56-05_51593_60.jpg"
},
"type":"pin",
"method":"uploaded",
"attribution":null,
"description":"C",
"price_value":0.0,
"additional_hide_reasons":[
],
"native_creator":null,
"is_playable":false,
"debug_info_html":null,
"ad_match_reason":0,
"link":null,
"has_required_attribution_provider":false,
"view_tags":[
],
"is_repin":true,
"pin360":null,
"liked_by_me":false,
"rich_summary":null,
"is_uploaded":true,
"pinner":{
"username":"phamthaominh197",
"explicitly_followed_by_me":false,
"image_xlarge_url":"https://i.pinimg.com/280x280_RS/97/2b/c4/972bc474022f188d4684f22f1032f127.jpg",
"full_name":"Minh Pham",
"image_small_url":"https://i.pinimg.com/30x30_RS/97/2b/c4/972bc474022f188d4684f22f1032f127.jpg",
"type":"user",
"id":"324259379329222479",
"image_large_url":"https://i.pinimg.com/140x140_RS/97/2b/c4/972bc474022f188d4684f22f1032f127.jpg"
},
"repin_count":0,
"created_at":"Tue, 31 May 2016 07:40:52 +0000",
"is_native":false,
"promoter":null,
"promoted_is_removable":false,
"buyable_product":null,
"dominant_color":"#232b2c",
"title":"",
"embed":null,
"is_quick_promotable":false,
"is_video":false,
"is_downstream_promotion":false
},
...
]
}
}
}
@pedroribeirodev
Copy link
Copy Markdown

Can you tell me if it works with django?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment