redutan · September 21, 2017 07:54
diff --git a/requirements.txt b/requirements.txt
 requests==2.18.4
 BeautifulSoup4==4.6.0
 lxml==3.8.0
diff --git a/top_instagram_hashtags_crawler.py b/top_instagram_hashtags_crawler.py
 import requests
 from bs4 import BeautifulSoup
 import re


 def crawl(max_pages):
    page = 0
    while page < max_pages:
        url = 'https://top-hashtags.com/instagram/' + str(page * 100 + 1) + '/'
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'lxml')

        for tagLink in soup.select('div.tht-tag > a'):
            tag_string = tagLink.string[1:]
            if not re.match('[가-힣]+', tag_string):
                continue
            if '그램' in tag_string:
                continue
            print(tag_string)
        page += 1

 crawl(1000)
	import requests
	from bs4 import BeautifulSoup
	import re


	def crawl(max_pages):
	page = 0
	while page < max_pages:
	url = 'https://top-hashtags.com/instagram/' + str(page * 100 + 1) + '/'
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'lxml')

	for tagLink in soup.select('div.tht-tag > a'):
	tag_string = tagLink.string[1:]
	if not re.match('[가-힣]+', tag_string):
	continue
	if '그램' in tag_string:
	continue
	print(tag_string)
	page += 1

	crawl(1000)