Last active
July 3, 2018 11:01
-
-
Save swablueme/d9c71e9a8f7def6d3d98fb0f659a803f to your computer and use it in GitHub Desktop.
downloads unimelb echo lectures
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import os.path | |
import json | |
import re | |
#URL for echo | |
baseurl="https://echo360.org.au" | |
#list of subject names with which to name folders | |
subjectname=["COMP10001", "another_subject"] | |
#sectionurl is the special name that echo gives to each subject (check image) | |
sectionurl= ["url_for_COMP10001_here", "URL_for_another_subject_here"] | |
#cookies are required to make this script work but they never seem to expire once you've recorded them (check image) | |
available_cookie = ["cookie_for_COMP10001_here", "cookie_for_another_subject_here"] | |
#disable logging warnings so I can have Fiddler4 open when I run the script | |
requests.packages.urllib3.disable_warnings() | |
for idx, subject in enumerate(subjectname): | |
print("%s in progress..." % subject) | |
#header dict | |
h={} | |
#headers to add | |
h['User-Agent']="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit \ | |
/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36" | |
h['Host']="echo360.org.au" | |
h['Cookie']= available_cookie[idx] | |
#contains the list of lecture urls | |
lecturelist=[] | |
#contains a list of lecture dates to hopefully avoid downloading dupe lectures | |
#it doesn't actually work because the second comp10001 lecture has an incremented date | |
#and checking for this would probably break other subjects or miss advanced lectures | |
nodupes_pls=[] | |
#opens section url to obtain the json containing the video files | |
r = requests.get('%s%s' % (baseurl,sectionurl[idx]), headers=h, verify=False) | |
#open resulting json | |
val = r.json() | |
for i in range(len(val["data"])): | |
datefor = val["data"][i]['lesson']['startTimeUTC'] | |
#make a nice looking date | |
datefor = re.split("(?<=-[0-9]{2})T(?=[0-9]{2}:)", datefor) | |
#add the url for lecture downloading to a list after checking if the date has already been downloaded | |
if datefor[0] not in nodupes_pls: | |
try: | |
video = val["data"][i]['lesson']['video']['media']['media']['current']['primaryFiles'][1]['s3Url'] | |
nodupes_pls.append(datefor[0]) | |
lecturelist.append(video) | |
except KeyError: | |
pass | |
#check if the subject folder exists if not make one | |
if not os.path.exists('%s' % subject): | |
os.mkdir('%s' % subject) | |
#function downloads lectures and saves them in the folder of the subject name | |
for i, lecture in enumerate(lecturelist): | |
h['Cookie']="DNT:1" | |
h['Accept']="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8" | |
h['Accept-Encoding']="gzip,deflate,br" | |
h['Accept-Language']="en-AU,en;q=0.9" | |
h['Host']="content.echo360.org.au" | |
if not os.path.exists(os.path.join('%s' % subject, nodupes_pls[i]+".mp4")): | |
print("Downloading the lecture for %s"% nodupes_pls[i]) | |
r = requests.get('%s' % lecturelist[i], headers=h, verify=False) | |
with open(os.path.join('%s' % subject, nodupes_pls[i]+".mp4"),"wb") as nodupes_pls[i]: | |
nodupes_pls[i].write(r.content) | |
else: | |
print("%s lecture file already exists so skipping!"% nodupes_pls[i]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment