Last active
August 29, 2015 14:07
-
-
Save 5263/7e7b74c43b4bb500eff1 to your computer and use it in GitHub Desktop.
blobhash recursive
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
def blobhash(str1): | |
import hashlib | |
hash1=hashlib.sha1("blob %d\0" % len(str1)) | |
hash1.update(str1) | |
return hash1.hexdigest() | |
def walk(dir1,extension=".py"): | |
import os | |
for root, dirs, files in os.walk(dir1): | |
for name in files: | |
if name.lower().endswith(extension): | |
filepath=os.path.join(root,name) | |
h1=blobhash(open(filepath,'rb').read()) | |
print "%s %s" % (h1, filepath.replace('\\','/')) | |
def findblobs(hashes,ref='HEAD',stopat=None): | |
firsts={} | |
lasts={} | |
import os | |
if ref is not None: | |
logcmd="git log %s --pretty=format:\"%%T %%H %%s\"" % ref | |
else: | |
logcmd="git reflog --pretty=format:\"%T %H %s\"" | |
mbfh=os.popen(logcmd) | |
commit =None | |
for cline in mbfh.read().split('\n'): | |
if cline.strip(): | |
try: | |
tree,commit,subject = cline.split(' ',2) | |
lstr=os.popen("git ls-tree -r %s" % tree) | |
for bline in lstr.read().split('\n'): | |
if bline.strip(): | |
odata,opath = bline.split('\t',1) | |
oflags,otype,ohash = odata.split(' ',2) | |
for h1 in hashes: | |
if ohash.startswith(h1): | |
firsts[h1] = commit | |
if lasts.get(h1) is None: | |
lasts[h1] = commit | |
except ValueError: | |
print cline | |
#print commit, subject, bline | |
lastcommit = commit | |
lastsubject = subject | |
lastbline = bline | |
if all ((lasts.get(h1) is not None and firsts.get(h1) != commit) for h1 in hashes): | |
#print 'finished' | |
#print firsts,lasts | |
break | |
if stopat is True and all ((lasts.get(h1) is not None and \ | |
firsts.get(h1) == commit) for h1 in hashes): | |
break | |
if stopat is not None and stopat is not True and \ | |
commit.startswith(stopat): | |
break | |
lstr.close | |
mbfh.close() | |
return (dict((h1,(firsts.get(h1),lasts.get(h1))) for h1 in hashes),commit) | |
def findblob(h1,ref='HEAD'): | |
"""search through all commits and trees | |
returns the last commit in which the blob is present and the | |
previous commit in which the the blob was introduced. | |
This does all apearanches since the search is stopped afterwards""" | |
first = None | |
last = None | |
import os | |
mbfh=os.popen("git log %s --pretty=format:\"%%T %%h %%s\"" % ref) | |
for cline in mbfh.read().split('\n'): | |
tree,commit,subject = cline.split(' ',2) | |
lstr=os.popen("git ls-tree -r %s" % tree) | |
for bline in lstr.read().split('\n'): | |
if bline.strip(): | |
odata,opath = bline.split('\t',1) | |
oflags,otype,ohash = odata.split(' ',2) | |
if ohash.startswith(h1): | |
first = commit | |
if last is None: | |
last = commit | |
print commit, subject, bline | |
lastcommit = commit | |
lastsubject = subject | |
lastbline = bline | |
if last is not None and first != commit: | |
print 'first appeard in ' | |
print lastcommit, lastsubject, lastbline | |
return (first,last) | |
lstr.close | |
mbfh.close() | |
def refcount(ref): | |
import os | |
mbfh=os.popen("git --no-replace-objects rev-list --count %s" % ref) | |
try: | |
return int(mbfh.read().strip()) | |
except ValueError: | |
pass | |
def refparseverify(ref): | |
import os | |
rpv=os.popen("git rev-parse --verify %s" % ref) | |
ref=rpv.read().strip() | |
if rpv.close() is None: | |
return ref | |
def gitfiletype(ref): | |
import os | |
rpv=os.popen("git cat-file -t %s" % ref) | |
ref=rpv.read().strip() | |
if rpv.close() is None: | |
return ref | |
def searchblob(str1,ref='HEAD',stopat=None): | |
hashes=[] | |
nonpresentblobs=[] | |
names={} | |
for line in str1.split('\n'): | |
if line.strip(): | |
h1,name = line.strip().split(' ',1) | |
if gitfiletype(h1) != 'blob': | |
print '%s %s not in repo' % (h1,name) | |
else: | |
hashes.append(h1) | |
if h1 not in names: | |
names[h1] = name | |
else: | |
names[h1] +=','+name | |
ref = refparseverify(ref) | |
if ref is None: | |
raise ValueError('invalid ref') | |
d1,stoppedat=findblobs(hashes,ref=ref,stopat=stopat) | |
notfound=[] | |
for h1 in hashes: | |
value=d1.get(h1,(None,None)) | |
if value == (None,None): | |
notfound.append(h1) | |
print '%s %s..%s %s' % (h1[:8],\ | |
refcount(value[0]),refcount(value[1]),names.get(h1,'?')) | |
print 'searched %s..%s starting with %s' % \ | |
(refcount(stoppedat),refcount(ref),ref) | |
#if len(notfound) > 0: | |
# print 'notfound ',notfound | |
# d2,stoppedat=findblobs(notfound,ref='--all',stopat=stopat) | |
# print '--all ',d2 | |
# d3,stoppedat=findblobs(notfound,ref=None) | |
# print 'reflog ',d3 | |
def findbaseforpath(patch): | |
shorthashes=set() | |
fullhashes=set() | |
"index fec529f..4b20705 100644" | |
for line in patch.split('\n'): | |
sline=line.strip() | |
if sline.startswith('index '): | |
shorthash=sline[6:13] | |
if shorthash != '0000000': | |
shorthashes.add(shorthash) | |
fullhash=refparseverify(shorthash) | |
if fullhash is not None: | |
fullhashes.add(fullhash) | |
else: | |
raise ValueError('Blob %s not present' % shorthash) | |
print fullhashes | |
print shorthashes | |
fbresults,lastcommit= findblobs(tuple(fullhashes),ref='HEAD',\ | |
stopat=True)#'aaeba38e781103663e772e8757f0203af38498b9') | |
print lastcommit | |
testdata="""cee38a2c1830bfde8b8d4dd46aeb41a64a71303f Draft.py | |
c9272acba2a0243a5f8eda6f57c04908a20de462 importDWG.py | |
00f845039587cd0ce43be796b610b0263186bc57 importDXF.py | |
f9f48bd8ad766841cb90049ab5c4b7c257909378 importSVG.py | |
6c2f613bcfc0379ac1f9bb90a01efd24df3f33c1 TestDraft.py | |
""" | |
if __name__ == '__main__': | |
import sys | |
if len(sys.argv): | |
findbaseforpath(sys.stdin.read()) | |
#for dir1 in (sys.argv[1:] or (".")): | |
# walk(dir1) | |
#print findblob(*sys.argv[1:]) | |
#for key,value in findblobs(sys.argv[1].split(','),sys.argv[2]).iteritems(): | |
# print '%s %s..%s' % (key,refcount(value[0]),refcount(value[1])) | |
#searchblob(testdata,ref='origin/master',\ | |
# stopat='691fd1128672c8bd472cece87c9e9d07b71d6fee') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment