Last active
March 10, 2026 01:25
-
-
Save luckylittle/58363570af7c432ad3e429bcd4b2155f to your computer and use it in GitHub Desktop.
AutoBrr List that contains "New on Netflix" from three genres (true crime, documentary films, documentary series)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/python | |
| import json | |
| from typing import List, Set | |
| import requests | |
| from bs4 import BeautifulSoup | |
| TRUE_CRIME_URL = "https://www.netflix.com/browse/genre/108820" | |
| DOCUMENTARY_URL = "https://www.netflix.com/browse/genre/2243108" | |
| DOCUMENTARY_SERIES_URL = "https://www.netflix.com/browse/genre/10105" | |
| # Stable section title in the UI (not content-dependent). | |
| SECTION_TITLE = "New on Netflix" | |
| # Button/label text we must not treat as titles when scraping the row. | |
| _NON_TITLE_TEXT: Set[str] = frozenset( | |
| {"Join now", "Play", "More Info", "Add to My List", "Next", "Previous"} | |
| ) | |
| def _titles_from_html(soup: BeautifulSoup) -> List[str]: | |
| """Extract titles from the 'New on Netflix' section using the section heading only.""" | |
| for h2 in soup.find_all("h2"): | |
| if h2.get_text(strip=True) != SECTION_TITLE: | |
| continue | |
| # Row can be: next sibling of h2, or next sibling of h2's parent (current Netflix layout). | |
| row = h2.find_next_sibling() | |
| if not row: | |
| parent = h2.find_parent() | |
| if parent is not None: | |
| row = parent.find_next_sibling() | |
| if not row: | |
| continue | |
| titles = [ | |
| p.get_text(strip=True) | |
| for p in row.find_all("p") | |
| if (p.get_text(strip=True) and p.get_text(strip=True) not in _NON_TITLE_TEXT) | |
| ] | |
| if titles: | |
| return titles | |
| return [] | |
| def _titles_from_json_ld(soup: BeautifulSoup) -> List[str]: | |
| """Use JSON-LD only when an ItemList is explicitly named 'New on Netflix'.""" | |
| for script in soup.find_all("script", type="application/ld+json"): | |
| try: | |
| data = json.loads(script.string) | |
| except (TypeError, json.JSONDecodeError): | |
| continue | |
| if data.get("@type") != "ItemList" or data.get("name") != SECTION_TITLE: | |
| continue | |
| elements = data.get("itemListElement") or [] | |
| return [ | |
| e["item"]["name"] | |
| for e in elements | |
| if isinstance(e.get("item"), dict) and e.get("item", {}).get("name") | |
| ] | |
| return [] | |
| def get_new_on_netflix_titles(html: str) -> List[str]: | |
| soup = BeautifulSoup(html, "html.parser") | |
| # 1) Prefer HTML: section identified by the stable heading "New on Netflix". | |
| titles = _titles_from_html(soup) | |
| if titles: | |
| return titles | |
| # 2) Fallback: JSON-LD ItemList named "New on Netflix" (if Netflix adds it). | |
| return _titles_from_json_ld(soup) | |
| def main() -> None: | |
| headers = {"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) Chrome/141.0.7150.0"} | |
| all_titles: List[str] = [] | |
| for url in (TRUE_CRIME_URL, DOCUMENTARY_URL, DOCUMENTARY_SERIES_URL): | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| all_titles.extend(get_new_on_netflix_titles(response.text)) | |
| for title in sorted(set(all_titles), key=str.lower): | |
| print(title.lower()) | |
| if __name__ == "__main__": | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| a friend, a murderer | |
| bad boy billionaires: india | |
| ballkids | |
| being gordon ramsay | |
| bill bailey’s wild west australia | |
| breakdown: 1975 | |
| careless | |
| cover-up | |
| deeper | |
| depeche mode: m | |
| elway | |
| evil influencer: the jodi hildebrandt story | |
| famous last words: eric dane | |
| first weapons | |
| forever auckland fc | |
| formula 1: drive to survive | |
| glitter & gold: ice dancing | |
| kidnapped: elizabeth smart | |
| koshien: japan’s field of dreams | |
| magda’s big national health check | |
| mark rober's crunchlabs | |
| matter of time | |
| miracle: the boys of '80 | |
| murder in monaco | |
| one last adventure: the making of stranger things 5 | |
| paparazzi king | |
| people’s republic of mallacoota | |
| queen of chess | |
| reality check: inside america's next top model | |
| scotty james: pipe dream | |
| simon cowell: the next act | |
| starto countdown 2025→2026 | |
| take that | |
| the dinosaurs | |
| the homes that built australia | |
| the investigation of lucy letby | |
| the museum of innocence: inside the story | |
| the philosopher's kitchen jeong kwan | |
| the tiktok killer | |
| the whiteley art scandal | |
| timelesz project -real- | |
| unlocked: a jail experiment | |
| wwe: unreal |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How to use this in AutoBrr?
https://<AUTOBRR_IP>:<AUTOBRR_PORT>/settings/listshttps://gist.githubusercontent.com/luckylittle/58363570af7c432ad3e429bcd4b2155f/raw/netflix.txt