Last active
November 1, 2016 14:20
-
-
Save wyyqyl/9a5ea9b5ffe675514585 to your computer and use it in GitHub Desktop.
Download subtitles from shooter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import os | |
import hashlib | |
import sys | |
import math | |
import json | |
import urllib2 | |
import urllib | |
try: | |
import chardet | |
except ImportError: | |
print 'Install chardet via `pip install chardet`' | |
sys.exit(1) | |
def download(url, headers): | |
req = urllib2.Request(url, '', headers) | |
return urllib2.urlopen(req).read() | |
def get_hash(name): | |
L = list() | |
with open(name, 'rb') as f: | |
e = 4096 | |
f.seek(0, os.SEEK_END) | |
size = f.tell() | |
# first 4k | |
start = min(size, 4096) | |
end = min(start + e, size) | |
f.seek(int(start)) | |
data = f.read(int(end - start)) | |
digest = hashlib.md5(data).hexdigest() | |
L.append(digest) | |
# second 4k | |
start = math.floor(size / 3 * 2) | |
end = min(start + e, size) | |
f.seek(int(start)) | |
data = f.read(int(end - start)) | |
digest = hashlib.md5(data).hexdigest() | |
L.append(digest) | |
# third 4k | |
start = math.floor(size / 3) | |
end = min(start + e, size) | |
f.seek(int(start)) | |
data = f.read(int(end - start)) | |
digest = hashlib.md5(data).hexdigest() | |
L.append(digest) | |
# fourth 4k | |
start = max(0, size - 8192) | |
end = min(start + e, size) | |
f.seek(int(start)) | |
data = f.read(int(end - start)) | |
digest = hashlib.md5(data).hexdigest() | |
L.append(digest) | |
return L | |
def sub_downloader(path, lang): | |
dirname = os.path.dirname(path) | |
filename = os.path.basename(path) | |
name, ext = os.path.splitext(filename) | |
if ext not in [".mkv", ".avi", ".mp4"]: | |
return | |
hash = get_hash(path) | |
headers = {'User-Agent': 'wyyqyl'} | |
filehash = hash[0] + '%3B' + hash[1] + '%3B' + hash[2] + '%3B' + hash[3] | |
url = 'http://www.shooter.cn/api/subapi.php?filehash=' + filehash + '&format=json&pathinfo=' + urllib.pathname2url(filename) + '&lang=' + lang | |
print url | |
response = download(url, headers) | |
if ord(response[0]) == 0xff: | |
print 'β [{}] It doesn\'t exist'.format(lang) | |
return | |
print 'π’ [{}] It exists, downloading'.format(lang) | |
sub_infos = json.loads(response) | |
idx = 0 | |
for sub_info in sub_infos: | |
if 'Files' in sub_info: | |
success = False | |
for file_info in sub_info['Files']: | |
try: | |
url = file_info['Link'].replace('\u0026', '&') | |
content = download(url, headers) | |
encoding = chardet.detect(content).get('encoding') | |
if encoding == None: | |
# Is it UTF-16LE without BOM? | |
encoding = chardet.detect('\xFF\xFE' + content).get('encoding') | |
if encoding == None: | |
# Is it UTF-16BE without BOM? | |
encoding = chardet.detect('\xFE\xFF' + content).get('encoding') | |
if encoding == None: | |
# I got no idea, skip downloading subtitle | |
continue | |
content = content.decode(encoding, 'ignore').encode('utf-8') | |
subtitle_name = name + '.' + lang + "." + str(idx) + "." + file_info['Ext'] | |
with open(os.path.join(dirname, subtitle_name), 'wb') as subtitle: | |
subtitle.write(content) | |
success = True | |
except Exception as e: | |
print e | |
print 'β [{}] Error occured'.format(lang) | |
if success: | |
idx += 1 | |
print 'β [{}] {} subtitles downloaded'.format(lang, idx) | |
def main(): | |
for root, _, files in os.walk(sys.argv[1]): | |
for file in files: | |
path = os.path.join(root, file) | |
print 'πͺ Processing {}'.format(path) | |
for lang in ['chn', 'eng']: | |
sub_downloader(path, lang) | |
print '\n' | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment