Skip to content

Instantly share code, notes, and snippets.

@rdbuf
Last active October 13, 2019 21:49
Some dumb lecture downloader for mipt.lectoriy.ru
import argparse
import shutil
import os.path
import sys
import requests
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from selenium import webdriver
from requests_html import HTML
import logging
parser = argparse.ArgumentParser()
parser.add_argument('url', help='example: https://lectoriy.mipt.ru/course/LinearAlgebra/lectures')
parser.add_argument('--outdir', default='')
args = parser.parse_args()
course_url = args.url
outdir = args.outdir
driver = webdriver.Chrome()
driver.get(course_url)
content = driver.page_source
driver.quit()
s = requests.Session()
s.mount('', HTTPAdapter(max_retries=Retry(connect=500, read=500, redirect=500, status=500)))
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
html = HTML(html=content, url=course_url)
course_title = html.find('h1', first=True).text
baseoutdir = os.path.join(outdir, course_title)
if not os.path.exists(baseoutdir):
os.makedirs(baseoutdir)
for i, block in enumerate(html.find('.lecture-title')):
lecture_title = block.text
page_url = list(block.absolute_links)[0]
video_url = HTML(html=s.get(page_url).text).find('video > source', first=True).attrs['src'].replace(r'\"', '')
filename = os.path.join(baseoutdir, f'{i+1:02d}. {lecture_title}.mp4')
if os.path.exists(filename):
startbyte = os.path.getsize(filename)
else:
startbyte = 0
with open(filename, 'ab') as outfile:
print(f'\033[92mdownloading: {filename}\033[0m')
sys.stdout.flush()
r = s.get(video_url, stream=True, headers={'Range': f'bytes={startbyte}-'})
shutil.copyfileobj(r.raw, outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment