Created
August 19, 2020 02:32
-
-
Save piyh/ad2c45091228f71ee18a1ed707145639 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#sometimes the "best" format that youtube-dl selects changes. | |
#This causes a second file of the same vid to download if you're archiving entire channels. | |
#When that happens, this script will clear out the duplicates if the youtube id was included in the filename. | |
import os | |
import pandas as pd | |
from pathlib import Path | |
from datetime import datetime | |
from pprint import pprint as pp | |
def returnDupes(li): | |
seen = set() | |
dupe = set() | |
for ele in li: | |
if ele not in seen: | |
seen.add(ele) | |
else: | |
dupe.add(ele) | |
return dupe | |
wd = Path('D:/kingcobrajfs/') | |
os.chdir(wd) | |
videos = [] | |
ids = [] | |
for file in wd.iterdir(): | |
ext = file.suffix | |
lenSuffixes = sum([len(x) for x in file.suffixes]) | |
if ext in ('.mkv', '.webm', '.mp4'): | |
ytID = file.name[len(file.name)-lenSuffixes-11:-lenSuffixes] #11 is youtube id len | |
ids.append(ytID) | |
videos.append({'ytID':ytID #could merge the dupe dictValue append logic with this, but performance doens't matter and I don't want to rewrite | |
, 'path':file | |
, 'size':file.stat().st_size | |
, 'mtime':file.stat().st_mtime}) | |
dupeSet = returnDupes(ids) | |
dupeDict = {} | |
for file in videos: | |
if file['ytID'] in dupeSet: | |
ytID = file['ytID'] | |
appendValues = {'path':file['path'], 'mtime':file['mtime']} | |
dictValue = dupeDict.get(ytID) | |
if dictValue: | |
dictValue.append(appendValues) | |
dictValue.sort(key = lambda x: x.get('mtime')) | |
deleting = dictValue[0] | |
saving = dictValue[1] | |
print(f"deleting {deleting} as it's older than {saving}") | |
dictValue[0]['path'].unlink() | |
del dictValue[0] | |
else: | |
dupeDict[ytID] = [appendValues] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment