This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"single": "14.56", | |
"avg": "15.61", | |
"round": "Final", | |
"competition": "Kingscube Toulouse Open 2014" | |
}, | |
{ | |
"single": "13.50", | |
"avg": "13.70", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
url = 'http://localhost:9080/crawl.json' + '?' | |
url += 'start_requests=1' + '&' | |
url += 'spider_name=wca' | |
data = requests.get(url) | |
data = json.loads(data.content) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class WcaSpider(scrapy.Spider): | |
name = 'wca' | |
start_urls = ['https://www.worldcubeassociation.org/persons/2012LAMO01'] | |
allowed_domains = ['worldcubeassociation.org'] | |
def parse(self, response): | |
# Identify all rows from the desired table | |
rows = response.css('div.results-by-event table tbody.event-333 tr.result') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scrapy | |
class WcaSpider(scrapy.Spider): | |
name = 'wca' | |
allowed_domains = ['worldcubeassociation.org'] | |
start_urls = ['http://worldcubeassociation.org/'] | |
def parse(self, response): | |
pass |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class airbnbSpider(scrapy.Spider): | |
name = "airbnbSpider" | |
def __init__(self): | |
# Urls of the cities to scrap | |
self.start_urls = [url_city_1, url_city_2] | |
# Trackers | |
self.page = 0 | |
self.object = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse_hotel(self, response): | |
# Get hotel informations | |
info_1 = response.css('info_1_selector') | |
info_2 = response.css('info_2_selector') | |
# ... | |
yield { | |
"info_1":info_1, | |
"info_2":info_2 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse(self, response): | |
# Getting the hotels list | |
hotels = response.css('hotels_selector') | |
# Following hotels pages | |
for hotel in hotels: | |
info_1 = hotel.css('info_1_selector') | |
info_2 = hotel.css('info_2_selector') | |
yield { |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def parse(self, response): | |
# Getting the hotels list | |
hotel_links = response.css('hotel_selector') | |
# Following hotels pages | |
for hotel in hotel_links: | |
yield response.follow(url=link, callback=self.parse_hotel) | |
# Get Next Page of hotels |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def prepare_request(city, checkin=None, checkout=None, price_min=None, price_max=None, currency='USD'): | |
""" Given a city and eventual dates, and eventual prices, returns the airbnb url to scrap | |
Both dates must be strings formatted 'YYYY-MM-DD' | |
""" | |
url = f'https://www.airbnb.com/s/{city}/homes/?' | |
if checkin and checkout: | |
url += f'&checkin={checkin}&checkout={checkout}' | |
if price_min and price_max: | |
url+= f'&price_min={price_min}&price_max={price_max}&display_currency={currency}' | |
return url |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
price_info = annonce.css(' div._ls0e43 ::text').extract() | |
['Price:', '$140', ' / night', 'Price:', '$974 total', 'Show details'] |
NewerOlder