Skip to content

Instantly share code, notes, and snippets.

@littmus
Forked from darjeeling/gist:fe58e76e2ed7af6fa1b9
Last active August 29, 2015 14:14
Show Gist options
  • Save littmus/04e936e5261b4417b670 to your computer and use it in GitHub Desktop.
Save littmus/04e936e5261b4417b670 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import bs4
import requests
import time
def main():
r = requests.get("http://www.caffebene.co.kr/sub03/searchStore/map")
b = bs4.BeautifulSoup(r.text)
sido_data = b.find_all("map", id="Map")[0].find_all("area")
for sido in [s.get("class")[0] for s in sido_data]:
page_idx = 0
while True:
time.sleep(2)
page_idx += 1
r = requests.get("http://www.caffebene.co.kr/sub03/searchStore/map/page/%d/stx/%s" %
(page_idx, sido))
r.encoding = "utf-8"
b = bs4.BeautifulSoup(r.text)
items = b.find_all("table", class_="table_default03")[0].find_all("tr")
if len(items) == 2:
if len(items[1].find_all('td')) == 1:
break
# skip thead
for item in items[1:]:
addr = item.find_all("td")[2].text
open("cafebene.text",'a').write(addr.encode('utf-8') + "\n")
if __name__ == '__main__':
main()
#!/usr/bin/env python
import bs4
import requests
import time
import json
def main():
for sido_idx in range(10, 27):
sido = str(sido_idx).ljust(3, '0')
r = requests.post("http://www.ediya.com/board/gugun",
{'sido': sido})
gugun_data = json.loads(r.text)['data']
for gugun in [g['CODE'] for g in gugun_data]:
page_idx = 0
while True:
time.sleep(2)
page_idx += 1
r = requests.get("http://www.ediya.com/board/listing/brd/store/sido/%s/gugun/%s/page/%d" %
(sido, gugun, page_idx))
r.encoding = "utf-8"
b = bs4.BeautifulSoup(r.text)
items = b.find_all("table", class_="list")[0].find_all("tr")
if len(items) == 2:
if len(items[1].find_all('td')) == 1:
break
# skip thead
for item in items[1:]:
addr = item.find_all("td")[2].text
open("ediya.text",'a').write(addr.encode('utf-8') + "\n")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment