This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from selenium import webdriver | |
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary | |
import time | |
caps = webdriver.DesiredCapabilities().FIREFOX | |
caps["marionette"] = True | |
binary = FirefoxBinary(r'f:\firefox\firefox.exe') | |
driver = webdriver.Firefox(firefox_binary=binary) | |
for i in range(0,18): | |
driver.get("https://zh.airbnb.com/s/shenzhen--china/homes?refinement_paths%5B%5D=%2Fhomes&page1=&cdn_cn=1&s_tag=_bVwCdeI&allow_override%5B%5D=§ion_offset=" + str(i)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#BeautifulSoup爬虫实践:房屋价格数据 | |
import requests | |
import pandas as pd | |
import csv | |
from bs4 import BeautifulSoup | |
import MySQLdb | |
conn = MySQLdb.connect(host ='localhost',user = 'root',passwd = 'yourpasswd',db = 'ershoufang',charset = "utf8") | |
cur = conn.cursor() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
link = "http://www.santostang.com/" | |
headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'} | |
r = requests.get(link, headers= headers) | |
soup = BeautifulSoup(r.text, "html.parser") | |
first_title = soup.find("h1", class_="post-title").a.text.strip() | |
print("第一篇文章标题是:", first_title) |