Skip to content

Instantly share code, notes, and snippets.

@nix010
Last active September 27, 2022 21:40
Show Gist options
  • Save nix010/7e5a7178dfdee70222bdeb9d8e8d155e to your computer and use it in GitHub Desktop.
Save nix010/7e5a7178dfdee70222bdeb9d8e8d155e to your computer and use it in GitHub Desktop.
Crawl Facebook profile of user by user fbid
import json
from pprint import pprint
import requests
from bs4 import BeautifulSoup as BS
import facebook
class FbBaseCrawler(object):
default_headers = {
'Accept' :'*/*',
'Cache-Control' :'no-cache',
'upgrade-insecure-requests' :'1',
'User-Agent' :'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/62.0.3202.94 Chrome/62.0.3202.94 Safari/537.36'
# or what ever User-agent you wnat
}
_FB_API_PROFILE_CONTACT_URL = 'https://www.facebook.com/profile/async/infopage/nav/'
def __init__(self,email,password,users_fbid:list=None):
self.r = requests.Session()
self._user = email
self._pass = password
self._users_fbid = users_fbid or []
self.fbgraph = facebook.GraphAPI('..your fb token..')
self.r.cookies.update({
'c_user' : '<user_fbid>',
... # other attributes of the cookies
})
def crawl_now(self):
print('Crawl now...')
parsed_data = []
for user_fbid in self._users_fbid:
resp = self._post(self._FB_API_PROFILE_CONTACT_URL,
params =self._param_query(user_fbid),
data =self._data_payload(user_fbid),
headers ={'Content-Type':'application/x-www-form-urlencoded'})
json_resp = json.loads(resp.text[9:])
html = json_resp.get('domops',[[{}]])[0][-1].get('__html')
if not html:
print('Id error %s'%user_fbid)
continue
data = self._extract_contract_data_from_html(html)
data.update({
'name' :self.fbgraph.get_object(user_fbid).get('name','')
})
parsed_data.append(data)
print('Export now...')
self._export_to_csv(parsed_data)
print('Export done')
def _export_to_csv(self,data):
import csv
with open('data_output.csv', 'w') as csv_file:
fieldnames = ['name','email','job', 'address', 'phone', 'website']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for dat in data:
writer.writerow(dat)
def _extract_contract_data_from_html(self,html):
tree = self.parser(html)
email = tree.select_one("span._50f9._50f7") or tree.select_one("span._c24._2ieq a[href^='mailto']")
address = tree.select_one("span.fsm")
phone = tree.select_one('span[dir="ltr"]')
website = tree.select_one('a[rel="me noopener nofollow"]')
job = tree.select_one('div._c24._50f4')
return {
'email' :email.text if email else '',
'address' :address.text.strip() if address else '',
'phone' :phone.text if phone else '',
'website' :website.text if website else '',
'job' :job.text.lstrip() if job else '',
}
def _get(self,url,params=None,headers=None,cookies=None):
if params is None:
params = {}
if cookies is None:
cookies = {}
h=self.default_headers
if headers:
h.update(headers)
return self.r.get(url,params=params,headers=h,cookies=cookies,timeout=10)
def _post(self,url,params=None,data=None,headers=None):
h=self.default_headers
if headers is not None:
h.update(headers)
return self.r.post(url,params=params,data=data,headers=h,allow_redirects=False,timeout=10)
def _fblink(self,link):
return 'https://www.facebook.com%s' % str(link)
def parser(self, html):
return BS(html, 'html.parser')
def _login_fb(self):
print('Fresh login')
try:
self._get('https://www.facebook.com')
data = {
'email': self._user,
'pass': self._pass,
}
login = self._post('https://www.facebook.com/login.php?login_attempt=1&amp;lwv=110', data=data, headers={
'Content-Type': 'application/x-www-form-urlencoded'
})
except Exception as e:
print('Error login')
raise e
self._user_fbid = self.r.cookies.get('c_user')
return login.status_code == 302 and self._user_fbid
def _data_payload(self,profile_id):
return {
'__user' : '<user_fbid>',
'__a' : '1',
'__req' : 'bg',
'__be' : '1',
'__pc' : 'PHASED:DEFAULT',
'fb_dtsg' : 'AQEOmmADmczS:',
'__spin_b': 'trunk',
}
def _param_query(self,profile_id):
return {
'viewer_id' : '<user_fbid>', #self._user_fbid,
'profile_id' : '%s'%profile_id, #profile_id,
'dom_section_id': 'u_fetchstream_21_0',
'section' : 'overview',
'dpr' : '1',
'lst' : '<user_fbid>:%s:<current_timestamp>'%profile_id,
}
crawler = FbBaseCrawler(
email='xxx',
password='yyy',
users_fbid=['10000xxx'] # list of profile facebook ids
)
crawler.crawl_now()
Copy link

ghost commented Sep 27, 2022

hey nix
How to export uploaded phone contacts from facebook messenger?
Hey,
I noticed there is someinformation about my contacts on facebook. Btw when clicking on the contact's name it shows his number, but there is 1000 contacts i don't want to go one by one. So I want to export all contacts from this webpage.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment