Created
September 29, 2023 09:30
-
-
Save zorbax/5e7e8e815d0dbe90cd88b7f9b2656274 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import time | |
| from pathlib import Path | |
| from bs4 import BeautifulSoup as bs | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.service import Service | |
| with open(Path.home() / "ysblf.txt", "r", encoding="utf-8") as file: | |
| ysblf_links = file.read().split() | |
| def chrome_driver(): | |
| s = Service(str(Path.home() / "bin/opt/chromedriver")) | |
| options = webdriver.ChromeOptions() | |
| options.binary_location = r"/usr/bin/brave-browser" | |
| options.add_argument("-incognito") | |
| options.add_argument("headless") | |
| options.add_experimental_option("excludeSwitches", ["enable-automation"]) | |
| browser = webdriver.Chrome(service=s, options=options) | |
| return browser | |
| def get_video_url(chapter: str): | |
| browser = chrome_driver() | |
| browser.get(chapter) | |
| time.sleep(30) | |
| html = browser.page_source | |
| browser.close() | |
| soup = bs(html, "html.parser") | |
| iframe = soup.find_all("iframe") | |
| url_video = f"https:{iframe[2]['src']}" | |
| script = soup.find_all("script") | |
| with open("script.json", "r", encoding="utf-8") as f: | |
| f.write(script[42].text) | |
| json_data = json.loads(f.read().split("=", 1)[1]) | |
| ysblf_key = list(json_data["pages"].keys())[1] | |
| season = json_data["pages"][ysblf_key]["metadata"]["title"].split(" | ")[0] | |
| title = json_data["pages"][ysblf_key]["analytics"]["convivaAssetName"] | |
| file_name = f"{season} - {title}" | |
| return file_name, url_video | |
| data_ysblf = {} | |
| for x in ysblf_links: | |
| print(f"Processing: {x}") | |
| for i in range(5): | |
| try: | |
| time.sleep(20) | |
| filename, urlvideo = get_video_url(x) | |
| data_ysblf[filename] = urlvideo | |
| break | |
| except TypeError: | |
| continue |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment