Created
January 29, 2010 03:14
Revisions
-
peterhellberg created this gist
Jan 29, 2010 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,157 @@ #!/usr/bin/env ruby # # SHRUG Blocket Scraper # # Author: Peter Hellberg <@peterhellberg> # Microformat: hListing <http://microformats.org/wiki/hlisting-proposal> # License: WTFPL require 'cgi' require 'rubygems' require 'open-uri' require 'nokogiri' require 'sinatra' require 'haml' require 'sass' class ShrugBlocketScraper def self.scrape(q) return [] if q.nil? url = "http://www.blocket.se/stockholm?q=#{CGI.escape(q)}&cg=0&w=1&st=s&ca=11&md=li" # Development (Faking the response for fun and profit) if File.file?('powerbook.html') require 'fakeweb' FakeWeb.allow_net_connect = false FakeWeb.register_uri(:get, url, :body => IO.read('powerbook.html')) end doc = Nokogiri::HTML(open(url)) ads = [] doc.css('table.listing tr').each do |row| date_str = row.css('th.listing_date').inner_text.strip time_str = row.css('td.listing_time').inner_text.strip price_str = row.css('td.align_right').inner_text.strip price = (price_str.empty?) ? nil : price_str.gsub(' ', '').to_i subject_link = row.css('td.subject a').first title = subject_link.inner_text.strip link = subject_link['href'] date = case date_str when 'Idag' then "#{Date.today} #{time_str}" when 'Igår' then "#{Date.today - 1} #{time_str}" else (Date.parse(date_str) > Date.today) ? "#{date_str} #{Date.today.year - 1} #{time_str}" : "#{date_str} #{time_str}" end ads << BlocketAd.new(title, price, link, Time.parse(date)) end ads end end class BlocketAd attr_reader :title, :price, :link, :time def initialize(title, price, link, time) @title = title @price = price @link = link @time = time end end get '/' do @ads = ShrugBlocketScraper.scrape(params[:q]) haml :index end get '/got_style.css' do sass :stylesheet end __END__ @@ layout %html{ :xmlns => "http://www.w3.org/1999/xhtml", :lang => "en", "xml:lang" => "en"} %head %title= 'SHRUG Blocket Scraper' %meta{ "http-equiv" => "Content-Type", :content => "text/html; charset=utf-8"} %meta{ :name => "viewport", :content => "width=320; initial-scale=0.9; maximum-scale=2.0; user-scalable=false;"} %link{ :rel => :stylesheet, :type => 'text/css', :href => '/got_style.css' } %body = yield @@ index %h1.title SHRUG Blocket Scraper %form{:action => '/', :method => :get} %input{:type => :text, :name => :q, :size => 20, :value => params[:q]} %input{:type => :submit, :value => 'Sök'} %div.listings - @ads.each do |ad| %div.hlisting %span.item %a.url{:href => ad.link} %span.fn= ad.title %abbr.dtlisted{:title => ad.time.iso8601}= ad.time.strftime('%Y-%m-%d') - if ad.price %span.price= "#{ad.price}:-" @@ stylesheet body border-top: 8px solid #7CAF3C margin: 0 padding: 2px 20px background: #fff color: #333 h1 color: #433C2A font-size: 100px letter-spacing: -11px line-height: 75px font-family: Helvetica Neue, cursive margin-top: 0.4em form input padding: 0.3em .hlisting margin-bottom: 1em background-color: #fff border: 1px solid #ccc padding: 0.7em max-width: 700px -moz-border-radius: 8px -webkit-border-radius: 8px .price color: #7CAF3C font-size: 1.8em margin-top: -1em float: right a color: #433C2A text-decoration: none .fn max-width: 70% color: #433C2A font-size: 1.3em margin-bottom: 0.6em display: block .dtlisted color: #ccc