Last active
June 6, 2019 12:31
-
-
Save picwellwisher12pk/e41f6c9ac75990131145eed5042308ed to your computer and use it in GitHub Desktop.
Python based scrapper for smartybro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
from bs4 import BeautifulSoup | |
from contextlib import closing | |
import requests | |
from requests import get | |
from requests.exceptions import RequestException | |
import time | |
import sys | |
import os | |
import webbrowser | |
import urllib.request | |
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} | |
url = 'https://smartybro.com/' | |
smartybro1 =[] | |
udemylinks = [] | |
udemyFree = [] | |
udemyPaid = [] | |
def simple_get(url): | |
""" | |
Attempts to get the content at `url` by making an HTTP GET request. | |
If the content-type of response is some kind of HTML/XML, return the | |
text content, otherwise return None. | |
""" | |
try: | |
with closing(get(url, stream=True,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'})) as resp: | |
if is_good_response(resp): | |
return resp.content | |
else: | |
return None | |
except RequestException as e: | |
log_error('Error during requests to {0} : {1}'.format(url, str(e))) | |
return None | |
def is_good_response(resp): | |
""" | |
Returns True if the response seems to be HTML, False otherwise. | |
""" | |
content_type = resp.headers['Content-Type'].lower() | |
return (resp.status_code == 200 | |
and content_type is not None | |
and content_type.find('html') > -1) | |
def log_error(e): | |
""" | |
It is always a good idea to log errors. | |
This function just prints them, but you can | |
make it do anything. | |
""" | |
print(e) | |
def fetch(url): | |
raw_html = simple_get(url) | |
html = BeautifulSoup(raw_html, 'html.parser') | |
for item in html.select('h2'): | |
print (item.a['href']) | |
smartybro1.append(item.a['href']) | |
# Detect if a udemy course is free or not | |
def detectFree(url): | |
raw_html = simple_get(url) | |
html = BeautifulSoup(raw_html, 'html.parser') | |
print ("detectFree") | |
a = html.select('a:is(.course-cta)') | |
if a[0].contents.find('Enroll now') > -1: | |
print (a) | |
print (button) | |
# Open Udemy link in new tab | |
def openUdemy(url): | |
if sys.platform=='win32': | |
os.startfile(url) | |
elif sys.platform=='darwin': | |
subprocess.Popen(['open', url]) | |
else: | |
try: | |
subprocess.Popen(['xdg-open', url]) | |
except OSError: | |
print ('Please open a browser on: '+url) | |
fetch(url) | |
print ("___________________") | |
for url in smartybro1: | |
raw_html2 = simple_get(url) | |
html2 = BeautifulSoup(raw_html2, 'html.parser') | |
title = html2.select('span:is(.entry-title)') | |
print (title[0].contents) | |
for a in html2.select('div:is(.sing-spacer) p a'): | |
# print (a['href']) | |
udemylinks.append(a['href']) | |
for url in udemylinks: | |
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} | |
with closing(get(url, stream=True,headers=headers)) as resp: | |
if is_good_response(resp): | |
html = BeautifulSoup(resp.content, 'html.parser') | |
a = html.select('a:is(.course-cta)') | |
print (a[0].text+" : "+ url) | |
if a[0].text.find('Enroll now') > -1: | |
udemyFree.append(url) | |
if a[0].text.find('Buy now') > -1: | |
udemyPaid.append(url) | |
else: | |
print(' None') | |
print ('a. Open All links.') | |
print ('b. Open only Free links.') | |
print ('c. Open only Paid links.') | |
print ('d. Open no link. and exit') | |
openLinks = input ("What you want to do : ") | |
if openLinks == "a": | |
for url in udemylinks: | |
openUdemy(url) | |
elif openLinks == "b": | |
for url in udemyFree: | |
openUdemy(url) | |
elif openLinks == "c": | |
for url in udemyPaid: | |
openUdemy(url) | |
else : | |
print ('exiting') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Basic attempt. Gather all links from the first page of Smartybro site and get relevant Udemy URLs and then store them as free, or paid. Finally, you can either open only free or paid or all links in your active/default browser. You should be logged in your Udemy account to Enroll in your desired courses.