Skip to content

Instantly share code, notes, and snippets.

@picwellwisher12pk
Last active June 6, 2019 12:31
Show Gist options
  • Save picwellwisher12pk/e41f6c9ac75990131145eed5042308ed to your computer and use it in GitHub Desktop.
Save picwellwisher12pk/e41f6c9ac75990131145eed5042308ed to your computer and use it in GitHub Desktop.
Python based scrapper for smartybro
# Import libraries
from bs4 import BeautifulSoup
from contextlib import closing
import requests
from requests import get
from requests.exceptions import RequestException
import time
import sys
import os
import webbrowser
import urllib.request
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
url = 'https://smartybro.com/'
smartybro1 =[]
udemylinks = []
udemyFree = []
udemyPaid = []
def simple_get(url):
"""
Attempts to get the content at `url` by making an HTTP GET request.
If the content-type of response is some kind of HTML/XML, return the
text content, otherwise return None.
"""
try:
with closing(get(url, stream=True,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'})) as resp:
if is_good_response(resp):
return resp.content
else:
return None
except RequestException as e:
log_error('Error during requests to {0} : {1}'.format(url, str(e)))
return None
def is_good_response(resp):
"""
Returns True if the response seems to be HTML, False otherwise.
"""
content_type = resp.headers['Content-Type'].lower()
return (resp.status_code == 200
and content_type is not None
and content_type.find('html') > -1)
def log_error(e):
"""
It is always a good idea to log errors.
This function just prints them, but you can
make it do anything.
"""
print(e)
def fetch(url):
raw_html = simple_get(url)
html = BeautifulSoup(raw_html, 'html.parser')
for item in html.select('h2'):
print (item.a['href'])
smartybro1.append(item.a['href'])
# Detect if a udemy course is free or not
def detectFree(url):
raw_html = simple_get(url)
html = BeautifulSoup(raw_html, 'html.parser')
print ("detectFree")
a = html.select('a:is(.course-cta)')
if a[0].contents.find('Enroll now') > -1:
print (a)
print (button)
# Open Udemy link in new tab
def openUdemy(url):
if sys.platform=='win32':
os.startfile(url)
elif sys.platform=='darwin':
subprocess.Popen(['open', url])
else:
try:
subprocess.Popen(['xdg-open', url])
except OSError:
print ('Please open a browser on: '+url)
fetch(url)
print ("___________________")
for url in smartybro1:
raw_html2 = simple_get(url)
html2 = BeautifulSoup(raw_html2, 'html.parser')
title = html2.select('span:is(.entry-title)')
print (title[0].contents)
for a in html2.select('div:is(.sing-spacer) p a'):
# print (a['href'])
udemylinks.append(a['href'])
for url in udemylinks:
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
with closing(get(url, stream=True,headers=headers)) as resp:
if is_good_response(resp):
html = BeautifulSoup(resp.content, 'html.parser')
a = html.select('a:is(.course-cta)')
print (a[0].text+" : "+ url)
if a[0].text.find('Enroll now') > -1:
udemyFree.append(url)
if a[0].text.find('Buy now') > -1:
udemyPaid.append(url)
else:
print(' None')
print ('a. Open All links.')
print ('b. Open only Free links.')
print ('c. Open only Paid links.')
print ('d. Open no link. and exit')
openLinks = input ("What you want to do : ")
if openLinks == "a":
for url in udemylinks:
openUdemy(url)
elif openLinks == "b":
for url in udemyFree:
openUdemy(url)
elif openLinks == "c":
for url in udemyPaid:
openUdemy(url)
else :
print ('exiting')
@picwellwisher12pk
Copy link
Author

Basic attempt. Gather all links from the first page of Smartybro site and get relevant Udemy URLs and then store them as free, or paid. Finally, you can either open only free or paid or all links in your active/default browser. You should be logged in your Udemy account to Enroll in your desired courses.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment