Skip to content

Instantly share code, notes, and snippets.

@wagurano
Last active August 29, 2015 13:57
Show Gist options
  • Save wagurano/9578979 to your computer and use it in GitHub Desktop.
Save wagurano/9578979 to your computer and use it in GitHub Desktop.
desc "Get AED Places from web pages"
task :get_aeds, [:begin_n, :thru_n] => :environment do | t, args |
args.with_defaults(:begin_n => "1", :thru_n => "1277") # [..) 1277
type = :aed
Place.type(type).delete
begin_n = args.begin_n.to_i; thru_n = args.thru_n.to_i
n = begin_n
begin
cnt = 0
puts "EGEN,FETCH,#{n}"
# fetch list
aed_places = egen_get_list
unless aed_places.nil?
aed_places.each do |el|
aed = el['href']
unless aed.empty?
cnt = cnt + 1
aed = aed.split("'")
name = aed[1].gsub(/,/,'_')
sn = aed[9]
begin
info = {}
# jobs
info[:category], address, phone, info[:address_desc], nop, info[:created_at], info[:model] = egen_print_info sn
info[:sn] = sn
print type, ",", name, ",", address[13..-1], ",", phone, ",", info, "\n" if Rake.application.options.trace == true
Place.create!(
type: type,
name: name,
address: address,
phone: phone,
info: info
)
rescue OpenURI::HTTPError
puts "ERROR,#{n},#{cnt},NOT_FOUND,-"
next
end
end
sleep 0.42
end #aed_places
end
end
end
module DatabaseHelper
require 'net/http'
require 'nokogiri'
require 'open-uri'
require 'timeout'
TIMEOUT_CNT = 42
def egen_print_info code
retries = TIMEOUT_CNT
begin
Timeout::timeout(5) {
ret = []
doc = Nokogiri::HTML(open("http://www.e-gen.or.kr/egen/inf.AED2.do?yearSeq=#{code}?HPID="))
doc.xpath('//ul/li').each do |a|
header, data = a.text.gsub(/[\t]/, '').gsub(/\r\n/, ' ').gsub(/,/,'_').split(': ')
ret.push(data)
end
ret
}
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep 1.42
retry
else
puts "raise"
raise
end
end
end # def print_info
def egen_get_list n
retries = TIMEOUT_CNT
elements = nil
begin
cnt = 0
Timeout::timeout(5) {
doc = Nokogiri::HTML(open("http://www.e-gen.or.kr/egen/inf.AED1.do?lon=&lat=&cnt=2444&str_cnt=0&page_num=1&page_size=100000&radius=400000&x=20&y=15&page=#{n}"))
elements = doc.xpath('//td/b/a')
}
rescue Timeout::Error
retries -= 1
if retries > 0
puts "sleep"
sleep 1.42
retry
else
puts "raise"
raise
end
end
elements
end
end
module GeocodeHelper
DAUM_APIKEY = YAML.load_file(Rails.root.join('config','apikey.yml'))['daum']
def geocode(address)
url = "http://apis.daum.net/local/geo/addr2coord?apikey=#{DAUM_APIKEY}&output=json&q=#{address}"
items = JSON.parse(open(URI.encode(url)).read)['channel']['item']
unless items.blank?
item = items.first
[item['lng'], item['lat']]
end
end
end
class Place
include Mongoid::Document
include Mongoid::Timestamps
include Geocoder::Model::Mongoid
field :type
field :category
field :category_desc
field :name
field :description
field :info, type: Hash
field :zipcode
field :address
field :coordinates, type: Array
field :phone
geocoded_by :address
after_validation :geocode
index({ coordinates: "2d" })
# scope
def self.type(type)
where(type: type)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment