Created
February 29, 2020 08:00
-
-
Save hongdonghyun/26e8e7a0340f8e7be346a2a90be41888 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from bs4 import BeautifulSoup | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.support.ui import WebDriverWait | |
if __name__ == '__main__': | |
browser = webdriver.Chrome('./chromedriver') | |
browser.implicitly_wait(5) | |
browser.get("http://prod.danawa.com/list/?cate=112758&15main_11_02") | |
WebDriverWait(browser, 3).until( | |
EC.presence_of_element_located((By.XPATH, '//*[@id="frmProductList"]/div/div[1]/div[2]/button'))).click() | |
WebDriverWait(browser, 3).until( | |
EC.presence_of_element_located((By.XPATH, '// *[ @ id = "dlMaker_extend"] / dd / div[2] / button[1]'))).click() | |
WebDriverWait(browser, 3).until( | |
EC.presence_of_element_located((By.XPATH, '//*[@id="selectMaker_extend_priceCompare_A"]/li[13]/label'))).click() | |
time.sleep(4) | |
soup = BeautifulSoup(browser.page_source, 'html.parser') | |
pro_list = soup.select("div.main_prodlist.main_prodlist_list > ul > li") | |
for v in pro_list: | |
if not v.find('div', class_="ad_header"): | |
print(v.select('p.prod_name > a')[0].text.strip()) | |
img_tag = v.select('a.thumb_link > img')[0] | |
# print(img_tag) | |
# 해당 print문을 찍어보시면 구조가 어떻게 바뀌었는지 확인 가능합니다. | |
# 웹페이지의 구조는 계속 변경되기때문에 변경되는 구조에 맞추어서 크롤링을 해주셔야합니다. | |
if "data-original" in img_tag.attrs.keys(): | |
print(img_tag['data-original']) | |
else: | |
print(img_tag['src']) | |
print(v.select('p.price_sect > a')[0].text) | |
print() | |
browser.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment