Last active
June 12, 2018 17:59
-
-
Save swang373/1b71ae6c4800ce7489127f35ccfcbb8d to your computer and use it in GitHub Desktop.
Check the integrity of files in an AnalysisTools output directory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import ROOT | |
import concurrent.futures | |
def check_file(path): | |
f = ROOT.TFile.Open(path) | |
# The file is somehow unreadable. | |
if not f: | |
return path, None | |
# The file is incomplete or corrupt. | |
if f.TestBit(ROOT.TFile.kRecovered) or f.IsZombie(): | |
return path, None | |
try: | |
events = f.Get('Events') | |
n_entries = events.GetEntriesFast() | |
return path, n_entries | |
except Exception: | |
return path, None | |
finally: | |
f.Close() | |
if __name__ == '__main__': | |
# Output directory globbing pattern. | |
SEARCH_PATTERN = '/path/to/the/output/dir/{0}/*.root' | |
# The sample names. | |
SAMPLES = ['ZH', 'DYToLL', 'QCDHT300'] | |
for sample in SAMPLES: | |
print 'Checking {0}'.format(sample) | |
with concurrent.futures.ThreadPoolExecutor(42) as executor: | |
futures = [executor.submit(check_file, path) for path in glob.glob(SEARCH_PATTERN.format(sample))] | |
bad_paths = [] | |
for future in concurrent.futures.as_completed(futures): | |
path, n_entries = future.result() | |
if n_entries is None: | |
bad_paths.append(path) | |
elif n_entries == 0: | |
print '{0} has a tree with no events!'.format(path) | |
print 'There are {0!s} good files out of a total of {1!s}'.format(len(futures) - len(bad_paths), len(futures)) | |
print 'The following files are likely corrupt: {0}'.format('\n'.join(bad_paths)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment