Skip to content

Instantly share code, notes, and snippets.

@kobakou
Last active January 1, 2018 11:01
Show Gist options
  • Save kobakou/b172f30214472f6713e46e7a949d9b08 to your computer and use it in GitHub Desktop.
Save kobakou/b172f30214472f6713e46e7a949d9b08 to your computer and use it in GitHub Desktop.
get-jdebuit-shoplist
from bs4 import BeautifulSoup
import urllib.request as req
import pandas as pd
import time
import re
shoplist = []
url = 'http://jdebit.jp/pc/shoplist/search?name=&address=&area_id[]&page='
maxpage = 1114
addrpattern = u'^([^市区町村]{2}[都道府県]|[^市区町村]{3}県)'
repatter = re.compile(addrpattern)
for page in range(1, maxpage+1):
print(url+str(page))
res = req.urlopen(url+str(page))
soup = BeautifulSoup(res, 'html.parser')
trs = soup.select("#search_tbl")[0].find_all('tr')
for tr in trs:
tds = tr.find_all('td')
if len(tds)>0:
shoplist.append([tds[0].text.strip(),
tds[1].text.strip(),
tds[2].text.strip(), #addr
repatter.match(tds[2].text).group(), #pref
tds[3].text.strip(), #tel
tds[4].text.strip()])
time.sleep(2)
pd.DataFrame(shoplist, columns=['shopname','category','addr','pref','tel','url']).to_csv('shoplist.csv', index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment