Last active
July 28, 2019 18:10
-
-
Save Bijendra/ef101920ec25e2e7c78554e64847e378 to your computer and use it in GitHub Desktop.
Web crawler to fetch job detail data and build a json to consume. This crawls through multiple pages available to fetch all job listing.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require 'httparty' | |
require 'byebug' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'nokogiri' | |
require 'httparty' | |
require 'byebug' | |
def scraper | |
url = "https://in.fashionjobs.com/s/" | |
unparsed_page = HTTParty.get(url) | |
parsed_page = Nokogiri::HTML(unparsed_page) | |
total = parsed_page.css('div.page-nav__results').text.strip.split("\n").first.to_i | |
puts "******total jobs #{total}*******" | |
page = 1 | |
jobs = [] | |
all_jobs = parsed_page.css('div.media') | |
per_page_jobs = all_jobs.count | |
total_pages = (total.to_f/per_page_jobs.to_f).ceil | |
puts "**** total pages: #{total_pages} ****" | |
while page <= total_pages | |
page_url = "https://in.fashionjobs.com/s/#{page}.html" | |
job_unparsed_page = HTTParty.get(page_url) | |
job_parsed_page = Nokogiri::HTML(job_unparsed_page) | |
page_all_jobs = job_parsed_page.css('div.media') | |
page_all_jobs.each do |job| | |
hsh = {} | |
hsh["Company"] = job.css('div.media__img a span').text | |
hsh["title"] = job.css('div.media__body a span').text | |
hsh["job_url"] = job.css('div.media__body a').first['href'] | |
jobs << hsh | |
end | |
page +=1 | |
puts "******** Page No Display: #{page}********" | |
end | |
end | |
scraper |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment