Skip to content

Instantly share code, notes, and snippets.

@zorbax
Created September 29, 2023 09:30
Show Gist options
  • Select an option

  • Save zorbax/5e7e8e815d0dbe90cd88b7f9b2656274 to your computer and use it in GitHub Desktop.

Select an option

Save zorbax/5e7e8e815d0dbe90cd88b7f9b2656274 to your computer and use it in GitHub Desktop.
import json
import time
from pathlib import Path
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
with open(Path.home() / "ysblf.txt", "r", encoding="utf-8") as file:
ysblf_links = file.read().split()
def chrome_driver():
s = Service(str(Path.home() / "bin/opt/chromedriver"))
options = webdriver.ChromeOptions()
options.binary_location = r"/usr/bin/brave-browser"
options.add_argument("-incognito")
options.add_argument("headless")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
browser = webdriver.Chrome(service=s, options=options)
return browser
def get_video_url(chapter: str):
browser = chrome_driver()
browser.get(chapter)
time.sleep(30)
html = browser.page_source
browser.close()
soup = bs(html, "html.parser")
iframe = soup.find_all("iframe")
url_video = f"https:{iframe[2]['src']}"
script = soup.find_all("script")
with open("script.json", "r", encoding="utf-8") as f:
f.write(script[42].text)
json_data = json.loads(f.read().split("=", 1)[1])
ysblf_key = list(json_data["pages"].keys())[1]
season = json_data["pages"][ysblf_key]["metadata"]["title"].split(" | ")[0]
title = json_data["pages"][ysblf_key]["analytics"]["convivaAssetName"]
file_name = f"{season} - {title}"
return file_name, url_video
data_ysblf = {}
for x in ysblf_links:
print(f"Processing: {x}")
for i in range(5):
try:
time.sleep(20)
filename, urlvideo = get_video_url(x)
data_ysblf[filename] = urlvideo
break
except TypeError:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment