Skip to content

Instantly share code, notes, and snippets.

@c7
Created January 29, 2010 03:14

Revisions

  1. @peterhellberg peterhellberg created this gist Jan 29, 2010.
    157 changes: 157 additions & 0 deletions shrug-blocket-scraper.rb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,157 @@
    #!/usr/bin/env ruby
    #
    # SHRUG Blocket Scraper
    #
    # Author: Peter Hellberg <@peterhellberg>
    # Microformat: hListing <http://microformats.org/wiki/hlisting-proposal>
    # License: WTFPL

    require 'cgi'
    require 'rubygems'
    require 'open-uri'
    require 'nokogiri'
    require 'sinatra'
    require 'haml'
    require 'sass'

    class ShrugBlocketScraper
    def self.scrape(q)
    return [] if q.nil?

    url = "http://www.blocket.se/stockholm?q=#{CGI.escape(q)}&cg=0&w=1&st=s&ca=11&md=li"

    # Development (Faking the response for fun and profit)
    if File.file?('powerbook.html')
    require 'fakeweb'
    FakeWeb.allow_net_connect = false
    FakeWeb.register_uri(:get, url, :body => IO.read('powerbook.html'))
    end

    doc = Nokogiri::HTML(open(url))
    ads = []

    doc.css('table.listing tr').each do |row|
    date_str = row.css('th.listing_date').inner_text.strip
    time_str = row.css('td.listing_time').inner_text.strip
    price_str = row.css('td.align_right').inner_text.strip

    price = (price_str.empty?) ? nil : price_str.gsub(' ', '').to_i

    subject_link = row.css('td.subject a').first

    title = subject_link.inner_text.strip
    link = subject_link['href']

    date = case date_str
    when 'Idag' then "#{Date.today} #{time_str}"
    when 'Igår' then "#{Date.today - 1} #{time_str}"
    else (Date.parse(date_str) > Date.today) ?
    "#{date_str} #{Date.today.year - 1} #{time_str}" :
    "#{date_str} #{time_str}"
    end

    ads << BlocketAd.new(title, price, link, Time.parse(date))
    end

    ads
    end
    end

    class BlocketAd
    attr_reader :title, :price, :link, :time

    def initialize(title, price, link, time)
    @title = title
    @price = price
    @link = link
    @time = time
    end
    end

    get '/' do
    @ads = ShrugBlocketScraper.scrape(params[:q])
    haml :index
    end

    get '/got_style.css' do
    sass :stylesheet
    end

    __END__

    @@ layout
    %html{ :xmlns => "http://www.w3.org/1999/xhtml", :lang => "en", "xml:lang" => "en"}
    %head
    %title= 'SHRUG Blocket Scraper'
    %meta{ "http-equiv" => "Content-Type", :content => "text/html; charset=utf-8"}
    %meta{ :name => "viewport", :content => "width=320; initial-scale=0.9; maximum-scale=2.0; user-scalable=false;"}
    %link{ :rel => :stylesheet, :type => 'text/css', :href => '/got_style.css' }
    %body
    = yield

    @@ index
    %h1.title SHRUG Blocket Scraper
    %form{:action => '/', :method => :get}
    %input{:type => :text, :name => :q, :size => 20, :value => params[:q]}
    %input{:type => :submit, :value => 'Sök'}
    %div.listings
    - @ads.each do |ad|
    %div.hlisting
    %span.item
    %a.url{:href => ad.link}
    %span.fn= ad.title

    %abbr.dtlisted{:title => ad.time.iso8601}= ad.time.strftime('%Y-%m-%d')
    - if ad.price
    %span.price= "#{ad.price}:-"

    @@ stylesheet
    body
    border-top: 8px solid #7CAF3C
    margin: 0
    padding: 2px 20px
    background: #fff
    color: #333


    h1
    color: #433C2A
    font-size: 100px
    letter-spacing: -11px
    line-height: 75px
    font-family: Helvetica Neue, cursive
    margin-top: 0.4em


    form
    input
    padding: 0.3em

    .hlisting
    margin-bottom: 1em
    background-color: #fff
    border: 1px solid #ccc
    padding: 0.7em
    max-width: 700px
    -moz-border-radius: 8px
    -webkit-border-radius: 8px

    .price
    color: #7CAF3C
    font-size: 1.8em
    margin-top: -1em
    float: right

    a
    color: #433C2A
    text-decoration: none

    .fn
    max-width: 70%
    color: #433C2A
    font-size: 1.3em
    margin-bottom: 0.6em
    display: block

    .dtlisted
    color: #ccc