Created
July 7, 2016 10:28
-
-
Save sroy8091/73b39453e97ccac27814d809116a8016 to your computer and use it in GitHub Desktop.
Script to fetch Hotel names, price and ratings to be inserted into a MySQL table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##Importing Required Libraries | |
from bs4 import BeautifulSoup | |
import requests | |
from __future__ import print_function | |
from sqlalchemy import * | |
import MySQLdb as my | |
import urllib2 | |
##URl of Booking.Com to be fetched | |
##Default Location : Kolkata | |
##Date taken 20th June | |
url="http://www.booking.com/searchresults.html?label=gen173nr-1FCAEoggJCAlhYSDNiBW5vcmVmaGyIAQGYATG4AQ_IAQ_YAQHoAQH4AQKoAgM;sid=4de818f29a8ff3af42baceb8abc8c66e;dcid=1;checkin_monthday=20&checkin_year_month=2016-6&checkout_monthday=21&checkout_year_month=2016-6&class_interval=1&dest_id=-2092511&dest_type=city&dtdisc=0&group_adults=2&group_children=0&hlrd=0&hyb_red=0&inac=0&label_click=undef&nha_red=0&no_rooms=1&offset=0&postcard=0&redirected_from_city=0&redirected_from_landmark=0&redirected_from_region=0&review_score_group=empty&room1=A%2CA&sb_price_type=total&score_min=0&src=index&src_elem=sb&ss=Kolkata%2C%20West%20Bengal%2C%20India&ss_all=0&ss_raw=Kolkata&ssb=empty&sshis=0&" | |
##Set of Commands Used to Scrape data as follows :- | |
##hotels:- contain full data | |
##name:-Contains Name of hotels in Kolkata(minimum 10,max 16) | |
##score:- Contains Ratings of all the Hotels | |
##price:- Contains the prices of Hotels as on booking to be done on 20th June,2016 | |
##details:- A list that contains name,rating,price accordingly for all the hotels | |
page=urllib2.urlopen(url) | |
soup=BeautifulSoup(page) | |
head = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36"} | |
res = requests.get(url, headers=head) | |
soup = BeautifulSoup(res.text,"html.parser") | |
hotels = soup.select("#hotellist_inner div.sr_item.sr_item_new") | |
details = [] | |
##details is a list that stores the details which fetches the required data##and stores them together to be accessed serially | |
##name,rating and price of hotels scraped all together | |
for hotel in hotels: | |
name = hotel.select("span.sr-hotel__name") | |
score = hotel.select("span.average.js--hp-scorecard-scoreval") | |
price = hotel.select("table div.sr-prc--num.sr-prc--final") | |
for i in range(0,len(name)): | |
#print name[i] | |
#print type(name[i].getText().encode("ascii", "ignore")) | |
details.append(name[i].getText().encode("ascii", "ignore")) | |
#print score[i].getText() | |
details.append(score[i].getText().encode("ascii", "ignore")) | |
for i in range(0,len(price)): | |
#print price[i].getText() | |
details.append(price[i].getText().encode("ascii", "ignore")) | |
##print(details) | |
##details = map(lambda s: s.strip(), details) | |
##print(details) | |
###print(details[0]+details[1]+details[2]+details[3]+details[4]) | |
##Using SQLAlchemy | |
##engine for mysql database created | |
engine = create_engine('mysql://harimohan:root@localhost/foo',echo=True) | |
##MetaData() stores MetaData for the table to be created | |
metadata = MetaData() | |
##Hotel table created | |
hotel = Table('hotel', metadata, | |
Column('Hotel_Name', String(40), nullable=False), | |
Column('Hotel_Rating', String(6), nullable=False), | |
Column('Hotel_Price', String(20), nullable=False) | |
) | |
hotel=hotel.create(engine) | |
##MetaData of Table gathered | |
hotel = Table('hotel', metadata, autoload=True) | |
##Insert Query generated from metadata like INSERT INTO ..... Values('') | |
##Using hotel.insert() method | |
i = hotel.insert() | |
##Data inserted into the Table serially from the details list | |
k=0 | |
if k < range(0,len(details))): | |
i.execute(Hotel_Name=details[k], Hotel_Rating=details[k+1], Hotel_Price=details[k+2]) | |
k=k+3 | |
##After Completion of Insertion Process all the data is selected like | |
##SELECT * FROM command using hotel.select() method.....and then using fetchall() method | |
s = hotel.select() | |
rs = s.execute() | |
rs.fetchall() | |
##Using MySQL DB | |
db = my.connect(host="127.0.0.1",user="root",passwd="",db="world") | |
data=details | |
cursor = db.cursor() | |
sql = "insert into hotel(Hotel_Name, Hotel_Rating, Hotel_Price) VALUES(%s, %s, %s)" | |
number_of_rows = cursor.executemany(sql, data) | |
db.commit() | |
db.close() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment