Nosgoroth · February 9, 2019 16:23 · Nosgoroth · Feb 28, 2019
diff --git a/taishooo.py b/taishooo.py
 import os, sys, zipfile, re, requests, json, tempfile, time
 from pprint import pprint

 class Chapter:
 	url = None
 	title = None
 	def __init__(self, tup): self.url, self.title = tup


 def getImageListForChapterWithUrl(url):
 	#var pages = 
 	x = requests.get(url)
 	r = re.search(r'var pages = (\[.*\]);', x.text)
 	if not r:
 		print "Couldn't retrieve image list from page"
 		return None
 	try:
 		pages = json.loads(r.group(1))
 	except:
 		print "Couldn't parse image list JSON"
 		return None

 	return [x["url"] for x in pages]


 def getChapterListFromBaseUrl(url):
 	#<a href="https://reader.seaotterscans.com/read/taishau_wotome_otogibanashi/en/0/2/" title="Chapter 2: Memory of Spring on a Winter Night ">
 	x = requests.get(url)
 	# Using regex to parse html. Zalgo is Tony the Pony he COMES
 	r = re.findall(r'<a href="(http[^"]+\/read\/[^"]+)" title="([^"]+)"', x.text)
 	if not r:
 		print "Couldn't retrieve chapter list from page"
 		return None
 	return [Chapter(x) for x in r]

 def downloadImageListToZip(title, images):
 	tempdir = tempfile.mkdtemp("taishoo")
 	success = True
 	zipname = None
 	try:
 		zipname = re.sub(r'[^\d\w\.\-\_]', ' ', title)+".cbz"
 		zipname = re.sub(r'[\s]+', ' ', zipname)
 		zipname = re.sub(r'[\s]+\.cbz$', '.cbz', zipname)
 		if os.path.exists(zipname):
 			print "File already exists"
 			return False
 		with zipfile.ZipFile(zipname, 'w') as zipobj:
 			i = 0
 			for imageurl in images:
 				i += 1
 				filepath = None
 				try:
 					root, ext = os.path.splitext(imageurl)
 					filepath = os.path.join(tempdir,str(i).zfill(3)+ext)
 					r = requests.get(imageurl, allow_redirects=True)
 					open(filepath, 'wb').write(r.content)
 					zipobj.write(filepath)
 					os.remove(filepath)
 					print "Downloaded image", i
 				except KeyboardInterrupt:
 					if filepath:
 						try: os.remove(filepath)
 						except: pass
 					raise
 				except:
 					if filepath:
 						try: os.remove(filepath)
 						except: pass
 					print "Error getting image", i
 					success = False
 				time.sleep(2)
 	except KeyboardInterrupt:
 		if zipname:
 			try: os.remove(zipname)
 			except: pass
 		raise
 	except:
 		if zipname:
 			try: os.remove(zipname)
 			except: pass
 		try: os.rmdir(tempdir)
 		except: pass
 		print "An error ocurred"
 		success = False
 	finally:
 		try: os.rmdir(tempdir)
 		except: pass

 	return success


 def main():

 	try:
 		print "Retrieving chapter list..."
 		chapters = getChapterListFromBaseUrl("https://reader.seaotterscans.com/series/taishau_wotome_otogibanashi/")
 		print "Found", len(chapters), "chapters."

 		errors = False

 		for chapter in chapters:
 			print
 			print "Processing chapter:", chapter.title
 			images = getImageListForChapterWithUrl(chapter.url)
 			print "Found", len(images), "images."
 			res = downloadImageListToZip(chapter.title, images)
 			if res:
 				print "Chapter downloaded successfully"
 			else:
 				print "Chapter downloaded with errors, or not downloaded"
 				errors = True

 		print
 		print
 		if errors:
 			print "Finished with errors"
 		else:
 			print "Finished successfully!"

 	except KeyboardInterrupt:
 		pass


 if __name__ == '__main__':
 	main()
	import os, sys, zipfile, re, requests, json, tempfile, time
	from pprint import pprint

	class Chapter:
	url = None
	title = None
	def __init__(self, tup): self.url, self.title = tup


	def getImageListForChapterWithUrl(url):
	#var pages =
	x = requests.get(url)
	r = re.search(r'var pages = (\[.*\]);', x.text)
	if not r:
	print "Couldn't retrieve image list from page"
	return None
	try:
	pages = json.loads(r.group(1))
	except:
	print "Couldn't parse image list JSON"
	return None

	return [x["url"] for x in pages]


	def getChapterListFromBaseUrl(url):
	#<a href="https://reader.seaotterscans.com/read/taishau_wotome_otogibanashi/en/0/2/" title="Chapter 2: Memory of Spring on a Winter Night ">
	x = requests.get(url)
	# Using regex to parse html. Zalgo is Tony the Pony he COMES
	r = re.findall(r'<a href="(http[^"]+\/read\/[^"]+)" title="([^"]+)"', x.text)
	if not r:
	print "Couldn't retrieve chapter list from page"
	return None
	return [Chapter(x) for x in r]

	def downloadImageListToZip(title, images):
	tempdir = tempfile.mkdtemp("taishoo")
	success = True
	zipname = None
	try:
	zipname = re.sub(r'[^\d\w\.\-\_]', ' ', title)+".cbz"
	zipname = re.sub(r'[\s]+', ' ', zipname)
	zipname = re.sub(r'[\s]+\.cbz$', '.cbz', zipname)
	if os.path.exists(zipname):
	print "File already exists"
	return False
	with zipfile.ZipFile(zipname, 'w') as zipobj:
	i = 0
	for imageurl in images:
	i += 1
	filepath = None
	try:
	root, ext = os.path.splitext(imageurl)
	filepath = os.path.join(tempdir,str(i).zfill(3)+ext)
	r = requests.get(imageurl, allow_redirects=True)
	open(filepath, 'wb').write(r.content)
	zipobj.write(filepath)
	os.remove(filepath)
	print "Downloaded image", i
	except KeyboardInterrupt:
	if filepath:
	try: os.remove(filepath)
	except: pass
	raise
	except:
	if filepath:
	try: os.remove(filepath)
	except: pass
	print "Error getting image", i
	success = False
	time.sleep(2)
	except KeyboardInterrupt:
	if zipname:
	try: os.remove(zipname)
	except: pass
	raise
	except:
	if zipname:
	try: os.remove(zipname)
	except: pass
	try: os.rmdir(tempdir)
	except: pass
	print "An error ocurred"
	success = False
	finally:
	try: os.rmdir(tempdir)
	except: pass

	return success


	def main():

	try:
	print "Retrieving chapter list..."
	chapters = getChapterListFromBaseUrl("https://reader.seaotterscans.com/series/taishau_wotome_otogibanashi/")
	print "Found", len(chapters), "chapters."

	errors = False

	for chapter in chapters:
	print
	print "Processing chapter:", chapter.title
	images = getImageListForChapterWithUrl(chapter.url)
	print "Found", len(images), "images."
	res = downloadImageListToZip(chapter.title, images)
	if res:
	print "Chapter downloaded successfully"
	else:
	print "Chapter downloaded with errors, or not downloaded"
	errors = True

	print
	print
	if errors:
	print "Finished with errors"
	else:
	print "Finished successfully!"

	except KeyboardInterrupt:
	pass


	if __name__ == '__main__':
	main()