Skip to content

Instantly share code, notes, and snippets.

@andrewkolesnikov
Forked from sxua/get.rb
Last active December 13, 2015 23:39

Revisions

  1. andrewkolesnikov revised this gist Feb 20, 2013. 1 changed file with 26 additions and 8 deletions.
    34 changes: 26 additions & 8 deletions get.rb
    Original file line number Diff line number Diff line change
    @@ -1,29 +1,47 @@
    # Usage:

    # 1. Install required gems
    # $ sudo gem install mechanize progressbar
    #
    # 2. Run with [vk.com|vkontakte.ru] [remixsid cookie] and [your_profile_id]
    # $ ruby ./fetch.rb vk.com a528621366f13fb859a120b3a8c357dc98c1f7d93f5e66666662 8666676
    # 54% |ooooooooooooooooooooooooooooooooooooooooooooooooooooo | ETA: 0:01:49
    #
    # 3. Enjoy the output
    # $ cat ./export_vk_wall.csv


    #!/usr/bin/env ruby
    require 'rubygems'
    require 'mechanize'
    require 'fastercsv'
    require "csv"
    require 'progressbar'

    cookie, gid = ARGV
    url = URI.parse("http://vkontakte.ru/wall-#{gid}")
    sitename, cookie, gid = ARGV
    url = URI.parse("http://#{sitename}/wall#{gid}")

    agent = Mechanize.new
    agent.user_agent_alias = 'Mac Safari'
    Mechanize::Cookie.parse(url, "remixsid=" + cookie) { |c| agent.cookie_jar.add(url, c) }

    pages = agent.get(url.to_s).search('#fw_summary_wrap .pg_lnk:last').attr('href').value.split('=').last.to_i/20

    FasterCSV.open('export_vk_wall.csv','w') do |csv|
    pbar = ProgressBar.new("test", pages * 20)

    CSV.open('export_vk_wall.csv','w') do |csv|
    csv << ['name', 'message', 'time']
    (0...pages).to_a.each_with_progressbar('Progress') do |p|
    (0...pages).to_a.each do |p|
    url.query = "offset=#{p * 20}"
    page = agent.get(url.to_s).search("#page_wall_posts")
    page.search('.post .info').each do |post|
    page.search('.post_info').each do |post|
    pbar.inc
    csv << [
    post.search('a.author').text,
    post.search('.wall_text div div').text,
    post.search('.wall_post_text').text,
    post.search('.rel_date').text
    ]
    end
    sleep(5 + rand(11))
    end
    end
    end
    pbar.finish
  2. Olexandr Skrypnyk created this gist Oct 8, 2011.
    29 changes: 29 additions & 0 deletions get.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,29 @@
    #!/usr/bin/env ruby
    require 'rubygems'
    require 'mechanize'
    require 'fastercsv'
    require 'progressbar'

    cookie, gid = ARGV
    url = URI.parse("http://vkontakte.ru/wall-#{gid}")

    agent = Mechanize.new
    agent.user_agent_alias = 'Mac Safari'
    Mechanize::Cookie.parse(url, "remixsid=" + cookie) { |c| agent.cookie_jar.add(url, c) }
    pages = agent.get(url.to_s).search('#fw_summary_wrap .pg_lnk:last').attr('href').value.split('=').last.to_i/20

    FasterCSV.open('export_vk_wall.csv','w') do |csv|
    csv << ['name', 'message', 'time']
    (0...pages).to_a.each_with_progressbar('Progress') do |p|
    url.query = "offset=#{p * 20}"
    page = agent.get(url.to_s).search("#page_wall_posts")
    page.search('.post .info').each do |post|
    csv << [
    post.search('a.author').text,
    post.search('.wall_text div div').text,
    post.search('.rel_date').text
    ]
    end
    sleep(5 + rand(11))
    end
    end