Last active
December 27, 2015 01:18
-
-
Save neoadventist/7243507 to your computer and use it in GitHub Desktop.
Downloads a Naruto video from www.narutonine.com! Run: python getNaruto.py <a> <b> where a<b and a and b is an episode number in Naruto.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#BELEIVE IT!! | |
print("Jesus Saves"); | |
#import libaries | |
import sys; | |
import time; | |
import urllib; | |
from urllib import FancyURLopener; | |
from random import choice; | |
#declare global variables | |
global num; | |
global numEnd; | |
num = int(sys.argv[1]); | |
if(len(sys.argv)==2): | |
numEnd = num; | |
else: | |
numEnd = int(sys.argv[2]); | |
#use random user agent so that the server doesn't kick us out! | |
user_agents = [ | |
'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', | |
'Opera/9.25 (Windows NT 5.1; U; en)', | |
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', | |
'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', | |
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12', | |
'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9' | |
]; | |
def getWebPage(episode): | |
naruto = 'http://www.narutonine.com/NarutoEpisode'+str(episode)+'EnglishDubbed.html'; | |
print("Naruto URL: "+naruto); | |
class MyOpener(FancyURLopener, object): | |
version = choice(user_agents) | |
fileName = 'file'; | |
myopener = MyOpener() | |
myopener.retrieve(naruto, fileName) | |
return fileName | |
def verifyPage(fileName): | |
f=open(fileName, 'r'); | |
lines = f.readlines(); | |
f.close(); | |
lineArray = []; | |
for x in lines: | |
lineArray.append(x); | |
escaped = False; | |
for idx, val in enumerate(lineArray): | |
msg = str(idx)+' = '+str(lineArray[idx]); | |
if (msg.find("unescape")!=-1): | |
escaped = str(lineArray[idx]); | |
elif (msg.find("Error 406")!=-1): | |
print("ERROR 406 FOUND!!!"); | |
return(406); | |
if(escaped==False): #if for some reason it doesn't work, return an error! | |
return("ERROR"); | |
unescape = escaped.split("unescape"); | |
needtoclean = unescape[1]; | |
needtoclean = needtoclean.replace("(",""); | |
needtoclean = needtoclean.replace(")",""); | |
iframe = urllib.unquote(needtoclean); | |
src = iframe.split("src="); | |
url = src[1].replace('"></iframe><br>',''); | |
url = url.replace('"',''); | |
url = url.split("&"); | |
return url[0]; | |
def getMp4(url): | |
global num; | |
class MyOpener(FancyURLopener, object): | |
version = choice(user_agents) | |
myopener = MyOpener() | |
myopener.retrieve(url, 'closer'); | |
f=open('closer', 'r'); | |
lines = f.readlines(); | |
f.close(); | |
lineArray = []; | |
for x in lines: | |
lineArray.append(x); | |
for idx, val in enumerate(lineArray): | |
msg = str(idx)+' = '+str(lineArray[idx]); | |
if(msg.find("Bad Request")!=-1): | |
print ("ERROR: "+msg); | |
return (400); | |
elif(msg.find("so.addVariable('file',")!=1): | |
doc = str(lineArray[idx]); | |
doc = doc.split("so.addVariable('file','"); | |
for i, v in enumerate(doc): | |
if(doc[i].find("mp4")!=-1): | |
mp4 = doc[i].replace("');",""); | |
print("MP4 URL: "+str(mp4)); | |
name = str(num)+'.mp4'; | |
myopener.retrieve(mp4, name); | |
print(name+" DONE!!"); | |
def main(): | |
global num; | |
global numEnd; | |
print("Downloading Naruto Episodes "+str(num)+" to "+str(numEnd)); | |
completedCount =0; | |
while(num<=numEnd): | |
c=0; | |
url=''; | |
while (True): | |
f = getWebPage(num); | |
url = verifyPage(f); | |
if(url!=406): | |
break; | |
if(c>10): | |
break; | |
c+=1; | |
if(url==406): | |
time.sleep(c*10); #if we can't get the webpage, wait and retry--we might be asking for too much too fast! | |
if(url!=''): | |
getMp4(url); | |
num+=1; | |
completedCount+=1; | |
print("Finished! Downloaded "+str(completedCount)+" Episodes!"); | |
main(); #go! |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment