Skip to content

Instantly share code, notes, and snippets.

@gamblore
Created April 3, 2013 23:40
Show Gist options
  • Save gamblore/5306465 to your computer and use it in GitHub Desktop.
Save gamblore/5306465 to your computer and use it in GitHub Desktop.
Find folder based on filename. Needs Season Episode stripping.
#!/usr/bin/env python
import os, sys
import re, collections
from operator import itemgetter
PATH = "/TV"
DICTIONARY = os.listdir(PATH)
def damerau_levenshtein_distance(s1, s2):
d = {}
lenstr1 = len(s1)
lenstr2 = len(s2)
for i in xrange(-1,lenstr1+1):
d[(i,-1)] = i+1
for j in xrange(-1,lenstr2+1):
d[(-1,j)] = j+1
for i in xrange(lenstr1):
for j in xrange(lenstr2):
if s1[i] == s2[j]:
cost = 0
else:
cost = 1
d[(i,j)] = min(
d[(i-1,j)] + 1, # deletion
d[(i,j-1)] + 1, # insertion
d[(i-1,j-1)] + cost, # substitution
)
if i and j and s1[i]==s2[j-1] and s1[i-1] == s2[j]:
d[(i,j)] = min (d[(i,j)], d[i-2,j-2] + cost) # transposition
return d[lenstr1-1,lenstr2-1]
def find_match(filename):
distances = map(lambda x: (x, damerau_levenshtein_distance(x, filename)), DICTIONARY)
sorted_distances = sorted(distances, key=itemgetter(1))
print sorted_distances[0][0]
def main():
if len(sys.argv) < 2:
print "Need a parameter"
sys.exit(0)
find_match(sys.argv[1])
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment