Skip to content

Instantly share code, notes, and snippets.

@swablueme
Last active July 3, 2018 11:01
Show Gist options
  • Save swablueme/d9c71e9a8f7def6d3d98fb0f659a803f to your computer and use it in GitHub Desktop.
Save swablueme/d9c71e9a8f7def6d3d98fb0f659a803f to your computer and use it in GitHub Desktop.
downloads unimelb echo lectures
import requests
import os.path
import json
import re
#URL for echo
baseurl="https://echo360.org.au"
#list of subject names with which to name folders
subjectname=["COMP10001", "another_subject"]
#sectionurl is the special name that echo gives to each subject (check image)
sectionurl= ["url_for_COMP10001_here", "URL_for_another_subject_here"]
#cookies are required to make this script work but they never seem to expire once you've recorded them (check image)
available_cookie = ["cookie_for_COMP10001_here", "cookie_for_another_subject_here"]
#disable logging warnings so I can have Fiddler4 open when I run the script
requests.packages.urllib3.disable_warnings()
for idx, subject in enumerate(subjectname):
print("%s in progress..." % subject)
#header dict
h={}
#headers to add
h['User-Agent']="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit \
/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36"
h['Host']="echo360.org.au"
h['Cookie']= available_cookie[idx]
#contains the list of lecture urls
lecturelist=[]
#contains a list of lecture dates to hopefully avoid downloading dupe lectures
#it doesn't actually work because the second comp10001 lecture has an incremented date
#and checking for this would probably break other subjects or miss advanced lectures
nodupes_pls=[]
#opens section url to obtain the json containing the video files
r = requests.get('%s%s' % (baseurl,sectionurl[idx]), headers=h, verify=False)
#open resulting json
val = r.json()
for i in range(len(val["data"])):
datefor = val["data"][i]['lesson']['startTimeUTC']
#make a nice looking date
datefor = re.split("(?<=-[0-9]{2})T(?=[0-9]{2}:)", datefor)
#add the url for lecture downloading to a list after checking if the date has already been downloaded
if datefor[0] not in nodupes_pls:
try:
video = val["data"][i]['lesson']['video']['media']['media']['current']['primaryFiles'][1]['s3Url']
nodupes_pls.append(datefor[0])
lecturelist.append(video)
except KeyError:
pass
#check if the subject folder exists if not make one
if not os.path.exists('%s' % subject):
os.mkdir('%s' % subject)
#function downloads lectures and saves them in the folder of the subject name
for i, lecture in enumerate(lecturelist):
h['Cookie']="DNT:1"
h['Accept']="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
h['Accept-Encoding']="gzip,deflate,br"
h['Accept-Language']="en-AU,en;q=0.9"
h['Host']="content.echo360.org.au"
if not os.path.exists(os.path.join('%s' % subject, nodupes_pls[i]+".mp4")):
print("Downloading the lecture for %s"% nodupes_pls[i])
r = requests.get('%s' % lecturelist[i], headers=h, verify=False)
with open(os.path.join('%s' % subject, nodupes_pls[i]+".mp4"),"wb") as nodupes_pls[i]:
nodupes_pls[i].write(r.content)
else:
print("%s lecture file already exists so skipping!"% nodupes_pls[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment