picwellwisher12pk · June 6, 2019 12:31 · picwellwisher12pk · Jun 6, 2019
diff --git a/scrapper.py b/scrapper.py
 # Import libraries
 from bs4 import BeautifulSoup
 from contextlib import closing
 import requests
 from requests import get
 from requests.exceptions import RequestException
 import time
 import sys
 import os
 import webbrowser
 import urllib.request

 headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
 url = 'https://smartybro.com/'
 smartybro1 =[]
 udemylinks = []
 udemyFree = []
 udemyPaid = []

 def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'})) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None

 def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200
            and content_type is not None
            and content_type.find('html') > -1)

 def log_error(e):
    """
    It is always a good idea to log errors.
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

 def fetch(url):
  raw_html = simple_get(url)
  html = BeautifulSoup(raw_html, 'html.parser')
  for item in html.select('h2'):
    print (item.a['href'])
    smartybro1.append(item.a['href'])

 # Detect if a udemy course is free or not
 def detectFree(url):
  raw_html = simple_get(url)
  html = BeautifulSoup(raw_html, 'html.parser')
  print ("detectFree")
  a  = html.select('a:is(.course-cta)')
  if a[0].contents.find('Enroll now') > -1:
    print (a)
  print (button)

 # Open Udemy link in new tab
 def openUdemy(url):
  if sys.platform=='win32':
    os.startfile(url)
  elif sys.platform=='darwin':
      subprocess.Popen(['open', url])
  else:
      try:
          subprocess.Popen(['xdg-open', url])
      except OSError:
          print ('Please open a browser on: '+url)

 fetch(url)
 print ("___________________")

 for url in smartybro1:
  raw_html2 = simple_get(url)
  html2 = BeautifulSoup(raw_html2, 'html.parser')
  title = html2.select('span:is(.entry-title)')
  print (title[0].contents)
  for a in html2.select('div:is(.sing-spacer) p a'):
    # print (a['href'])
    udemylinks.append(a['href'])


 for url in udemylinks:
  headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
  with closing(get(url, stream=True,headers=headers)) as resp:
    if is_good_response(resp):
      html = BeautifulSoup(resp.content, 'html.parser')
      a  = html.select('a:is(.course-cta)')
      print (a[0].text+" : "+ url)
      if a[0].text.find('Enroll now') > -1:
        udemyFree.append(url)
      if a[0].text.find('Buy now') > -1:
        udemyPaid.append(url)
    else:
      print(' None')

 print ('a. Open All links.')
 print ('b. Open only Free links.')
 print ('c. Open only Paid links.')
 print ('d. Open no link. and exit')
 openLinks = input ("What you want to do : ")
 if openLinks == "a":
    for url in udemylinks:
      openUdemy(url)
 elif openLinks == "b":
    for url in udemyFree:
      openUdemy(url)
 elif openLinks == "c":
    for url in udemyPaid:
      openUdemy(url)
 else :
  print ('exiting')
	# Import libraries
	from bs4 import BeautifulSoup
	from contextlib import closing
	import requests
	from requests import get
	from requests.exceptions import RequestException
	import time
	import sys
	import os
	import webbrowser
	import urllib.request

	headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
	url = 'https://smartybro.com/'
	smartybro1 =[]
	udemylinks = []
	udemyFree = []
	udemyPaid = []

	def simple_get(url):
	"""
	Attempts to get the content at `url` by making an HTTP GET request.
	If the content-type of response is some kind of HTML/XML, return the
	text content, otherwise return None.
	"""
	try:
	with closing(get(url, stream=True,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'})) as resp:
	if is_good_response(resp):
	return resp.content
	else:
	return None

	except RequestException as e:
	log_error('Error during requests to {0} : {1}'.format(url, str(e)))
	return None

	def is_good_response(resp):
	"""
	Returns True if the response seems to be HTML, False otherwise.
	"""
	content_type = resp.headers['Content-Type'].lower()
	return (resp.status_code == 200
	and content_type is not None
	and content_type.find('html') > -1)

	def log_error(e):
	"""
	It is always a good idea to log errors.
	This function just prints them, but you can
	make it do anything.
	"""
	print(e)

	def fetch(url):
	raw_html = simple_get(url)
	html = BeautifulSoup(raw_html, 'html.parser')
	for item in html.select('h2'):
	print (item.a['href'])
	smartybro1.append(item.a['href'])

	# Detect if a udemy course is free or not
	def detectFree(url):
	raw_html = simple_get(url)
	html = BeautifulSoup(raw_html, 'html.parser')
	print ("detectFree")
	a = html.select('a:is(.course-cta)')
	if a[0].contents.find('Enroll now') > -1:
	print (a)
	print (button)

	# Open Udemy link in new tab
	def openUdemy(url):
	if sys.platform=='win32':
	os.startfile(url)
	elif sys.platform=='darwin':
	subprocess.Popen(['open', url])
	else:
	try:
	subprocess.Popen(['xdg-open', url])
	except OSError:
	print ('Please open a browser on: '+url)

	fetch(url)
	print ("___________________")

	for url in smartybro1:
	raw_html2 = simple_get(url)
	html2 = BeautifulSoup(raw_html2, 'html.parser')
	title = html2.select('span:is(.entry-title)')
	print (title[0].contents)
	for a in html2.select('div:is(.sing-spacer) p a'):
	# print (a['href'])
	udemylinks.append(a['href'])


	for url in udemylinks:
	headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
	with closing(get(url, stream=True,headers=headers)) as resp:
	if is_good_response(resp):
	html = BeautifulSoup(resp.content, 'html.parser')
	a = html.select('a:is(.course-cta)')
	print (a[0].text+" : "+ url)
	if a[0].text.find('Enroll now') > -1:
	udemyFree.append(url)
	if a[0].text.find('Buy now') > -1:
	udemyPaid.append(url)
	else:
	print(' None')

	print ('a. Open All links.')
	print ('b. Open only Free links.')
	print ('c. Open only Paid links.')
	print ('d. Open no link. and exit')
	openLinks = input ("What you want to do : ")
	if openLinks == "a":
	for url in udemylinks:
	openUdemy(url)
	elif openLinks == "b":
	for url in udemyFree:
	openUdemy(url)
	elif openLinks == "c":
	for url in udemyPaid:
	openUdemy(url)
	else :
	print ('exiting')