-
-
Save tehviking/0788906e9a49e9234119 to your computer and use it in GitHub Desktop.
Fork of fb_scrape to accommodate changes in Ruby 2, native JSON and CSV libs, and insert pictures and links of posts when applicable.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# gem install rest-client | |
require 'json' | |
require 'rest_client' | |
require 'csv' | |
require 'cgi' | |
require 'uri' | |
# To use, get an access token here, by clicking "get access token" | |
# and checking user.groups in the dialog box | |
# https://developers.facebook.com/tools/explorer?method=GET&path=209024949216061%2Ffeed | |
# | |
# Run `ruby fb_scrape.rb ACCESS_TOKEN GROUP_ID` | |
# | |
# Your CSV should show up as "fb_posts_GROUP_ID.csv" in the same directory. | |
class GroupScraper | |
def initialize(access_token, group_id) | |
@access_token = access_token | |
@group_id = group_id | |
@url = "https://graph.facebook.com/#{@group_id}/feed?access_token=#{@access_token}" | |
@data = [] | |
end | |
def start | |
scrape(@url) | |
end | |
def orig_url(fb_url) | |
uri = URI.parse(fb_url) | |
query = uri.query | |
puts "query" | |
puts query | |
if query | |
parsed_query = CGI.parse query | |
end | |
if parsed_query | |
parsed_query["url"].join(" ") | |
else | |
fb_url | |
end | |
end | |
def scrape(url) | |
json_response = RestClient.get(url) | |
resp = JSON.parse(json_response.strip) | |
if resp['data'] && resp['data'].length > 0 | |
resp['data'].each do |fb_post| | |
puts "PICTURE" | |
puts fb_post["picture"] | |
post = { | |
:fb_id => fb_post['id'], | |
:fb_author => fb_post["from"]["name"], | |
:fb_author_id => fb_post["from"]["id"], | |
:message => fb_post["message"], | |
:fb_created_time => fb_post["created_time"], | |
:fb_updated_time => fb_post["updated_time"], | |
:picture => (orig_url(fb_post["picture"]) if fb_post["picture"]), | |
:link => fb_post["link"] | |
} | |
p post | |
@data << post | |
if fb_post['comments'] && fb_post['comments']['data'] | |
fb_post['comments']['data'].each do |fb_comment| | |
comment = { | |
:fb_id => fb_comment['id'], | |
:fb_author => (fb_comment["from"]["name"] if fb_comment && fb_comment["from"]), | |
:fb_author_id => (fb_comment["from"]["id"] if fb_comment && fb_comment["from"]), | |
:message => fb_comment["message"], | |
:fb_created_time => fb_comment["created_time"], | |
:fb_likes => fb_comment['likes'], | |
:picture => (orig_url(fb_comment["picture"]) if fb_comment["picture"]), | |
:link => fb_comment["link"] | |
} | |
p comment | |
@data << comment | |
end | |
end | |
end | |
if resp['paging']['next'] | |
scrape(resp['paging']['next']) | |
end | |
else | |
return | |
end | |
end | |
def to_csv | |
CSV.open("fb_posts_#{@group_id}.csv", "w") do |csv| | |
csv << %w[name fb_id date text url picture link] | |
@data.each do |post| | |
csv << [post[:fb_author], post[:fb_id], post[:fb_created_time], post[:message], "https://www.facebook.com/groups/#{post[:fb_id].split(/_/)[0]}/permalink/#{post[:fb_id].split(/_/)[1]}", post[:picture], post[:link]] | |
end | |
end | |
end | |
end | |
if __FILE__ == $0 | |
gs = GroupScraper.new(ARGV[0], ARGV[1]) | |
gs.start | |
gs.to_csv | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I am getting this error:
Traceback (most recent call last):
4: from fb_scrape.rb:102:in
<main>' 3: from fb_scrape.rb:25:in
start'2: from fb_scrape.rb:49:in
scrape' 1: from fb_scrape.rb:49:in
each'fb_scrape.rb:54:in
block in scrape': undefined method
[]' for nil:NilClass (NoMethodError)