Created
February 28, 2022 06:51
-
-
Save TheWhatis/9b85eebf4d187211c5ddf1c81cb59742 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import | |
import re | |
import os | |
import sys | |
import json | |
import js2py | |
import random | |
import requests | |
import tldextract | |
# From | |
from time import sleep | |
from translate import Translator | |
from bs4 import BeautifulSoup as bs | |
from user_agent import generate_navigator as get_uagent | |
headers = { | |
"Host": "rt.pornhub.com", | |
"User-Agent": get_uagent()['user_agent'], #"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0", | |
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", | |
"Accept-Language": "ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3", | |
"Accept-Encoding": "gzip, deflate, br", | |
"Referer": "https://duckduckgo.com/", | |
"DNT": "1", | |
"Connection": "keep-alive", | |
"Upgrade-Insecure-Requests": "1", | |
"Sec-Fetch-Dest": "document", | |
"Sec-Fetch-Mode": "navigate", | |
"Sec-Fetch-Site": "cross-site", | |
"Sec-GPC": "1", | |
"Cache-Control": "max-age=0", | |
"TE": "trailers" | |
} | |
proxies = False | |
def get_domain(url): | |
domain = url.split("?")[0] | |
domain_path = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', domain) | |
domain_path = domain_path[0] | |
tld = tldextract.extract(domain_path) | |
return tld | |
def get_video_urls(page = 1): | |
url = "https://rt.pornhub.com" | |
domain = "https://rt.pornhub.com" | |
if page > 1: | |
url = url+"/video?page="+str(page) | |
response = requests.get(url) | |
if response.status_code == 200: | |
soup = bs(response.text, 'html.parser')#'html5lib') | |
links = [] | |
a_html = soup.select(".sectionWrapper li .wrap .title a") | |
for item in a_html: | |
links.append(domain+item.get("href").strip()) | |
return links | |
else: | |
return False | |
def get_video(url, allow_quality = False): | |
response = requests.get(url=url, headers=headers, proxies=proxies) | |
soup = bs(response.text, 'html.parser')#'html5lib') | |
scripts = soup.select("#player script") | |
name_title = soup.select("h1.title .inlineFree") | |
if allow_quality: | |
allows = ['240', '480', '720', '1080'] | |
if isinstance(allow_quality, str): | |
if allow_quality == 'all': | |
allow_quality = allows | |
elif isinstance(allow_quality, list): | |
add_default = True | |
for quality in allows: | |
if quality in allow_quality: | |
add_default = False | |
break | |
else: | |
allow_quality = ['720'] | |
else: | |
allow_quality = ['720'] | |
script = "" | |
for item in scripts: | |
script = item.text | |
break | |
exclude_chars = ['[', ']', '(', ')', "'", '"', "\\", "/", "|", "&", "*", "%", "$"] | |
for item in name_title: | |
# name = Translator(to_lang="Russian").translate(item.text.strip()) | |
name = item.text | |
for char in exclude_chars: | |
name = name.replace(char, "") | |
name = name.strip() | |
break | |
arr = script.split("\n") | |
for line in arr: | |
var_media = re.match(r'.*var flashvars.* =', line) | |
if var_media: | |
var_media = var_media.group(0).replace("var ", '').replace("=", "").replace("{", "").strip() | |
break | |
script = "function get_elem(){\n var playerObjList = {};\n"+script+"\n"+ "return("+var_media+"['mediaDefinitions']);\n}" | |
if script: | |
result = js2py.eval_js(script) | |
videos = result().to_list() | |
if not os.path.exists('./videos'): | |
os.mkdir("./videos") | |
if not os.path.exists("./logs_downloads"): | |
os.mkdir("./logs_downloads") | |
for video in videos: | |
if not 'get_media' in video['videoUrl'] and isinstance(video['quality'], str): | |
if video['quality'] not in allow_quality: continue | |
print("Downloading video with name '"+name+"' and quality '"+video['quality']+"'") | |
video['quality'] = video['quality'].strip() | |
path_video = './videos/'+video['quality'].strip() | |
if not os.path.exists(path_video): | |
os.mkdir(path_video) | |
path_video = path_video+"/"+name+".mp4" | |
if os.path.exists(path_video): | |
return False | |
else: | |
dowloaded = os.system('ffmpeg -i "'+video['videoUrl']+'" -c copy -bsf:a aac_adtstoasc "'+path_video+'" 2> ./logs_downloads/'+name.replace(" ", "").replace(".", "")+'_downloads.log') | |
print(dowloaded) | |
if dowloaded == 0: | |
return True | |
else: | |
return False | |
def get_arg(param, onlyvalue = False): | |
arguments = sys.argv | |
length = len(arguments) | |
result = False | |
get_param = False | |
for i in range(1, length): | |
if get_param: | |
if onlyvalue: | |
result = arguments[i].strip() | |
else: | |
result = { | |
'key': param, | |
'value': arguments[i].strip() | |
} | |
get_param = False | |
if arguments[i] == '--'+param: | |
get_param = True | |
if i == length-1: | |
if onlyvalue: | |
result = True | |
else: | |
result = { | |
'key': param, | |
'value': True | |
} | |
return result | |
if __name__ == '__main__': | |
help_string = """ | |
--limit - use if to wont limit count videos (example: download_pornhub --limit 10) | |
--start-page - use if to want to start from current page (example: download_pornhub --start-page 5) | |
--quality - use if to want choose quality of video (example: download_pornhub --quality 'all/240, 720/240') | |
""" | |
# Prepare args | |
limit = int(get_arg('limit', True)) | |
start_page = int(get_arg('start-page', True)) | |
quality = get_arg('quality', True) | |
help_v = get_arg('help', True) | |
if help_v: | |
print(help_string) | |
else: | |
if not limit and not start_page and not quality: | |
print("If you want to close, click Ctrl-c") | |
print("Print main.py --help for help") | |
if quality: | |
if ',' in quality: | |
quality_arr = quality.split(",") | |
quality = [] | |
for qual in quality_arr: | |
quality.append(qual.strip()) | |
elif not quality == 'all': | |
quality = [quality] | |
if start_page: | |
page = start_page-1 | |
else: | |
page = 0 | |
# Start main | |
x = 0 | |
break_while = False | |
while True: | |
page = page+1 | |
if break_while: | |
break | |
urls_video = get_video_urls(page) | |
length = len(urls_video) | |
if urls_video: | |
for i in range(0, length): | |
x = x+1 | |
url = urls_video[i] | |
video = get_video(url, quality) | |
if not video: | |
print("Downloaded!") | |
x = x-1 | |
if limit: | |
if limit == x: | |
break_while = True | |
break | |
else: | |
print("Page '"+page+"' not found") | |
break_while = True | |
break |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
backports.zoneinfo==0.2.1 | |
beautifulsoup4==4.10.0 | |
bs4==0.0.1 | |
certifi==2021.10.8 | |
charset-normalizer==2.0.12 | |
click==8.0.4 | |
filelock==3.6.0 | |
idna==3.3 | |
iso8601==1.0.2 | |
Js2Py==0.71 | |
libretranslatepy==2.1.1 | |
lxml==4.8.0 | |
pycryptodome==3.14.1 | |
pyee==9.0.4 | |
pyjsparser==2.7.1 | |
pytz-deprecation-shim==0.1.0.post0 | |
requests==2.27.1 | |
requests-file==1.5.1 | |
six==1.16.0 | |
soupsieve==2.3.1 | |
tldextract==3.2.0 | |
translate==3.6.1 | |
typing-extensions==4.1.1 | |
tzdata==2021.5 | |
tzlocal==4.1 | |
urllib3==1.26.8 | |
user-agent==0.1.10 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment