Created
March 20, 2025 09:28
-
-
Save ram1123/e365b0ed7d8b7a0fc8fc3e628d41880c to your computer and use it in GitHub Desktop.
script to remove the duplicates in CMS dataset
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <set> | |
#include <TString.h> | |
#include <TFile.h> | |
#include <TTree.h> | |
void removeDuplicates() { | |
//TString prefix = "/raid/raid9/qguo/Run2/after/Run2_2/new/CMSSW_10_2_18/src/data_2018_NotBestMelaCand"; | |
//TString prefix = "/publicfs/cms/data/hzz/guoqy/newNTuple_UL/2018/Data/DataUL2018_all"; | |
//TString prefix = "/publicfs/cms/user/qyguo/lacked_Data1718/18/DoubleMuon_Run18A_1"; | |
TString prefix = "/afs/cern.ch/work/q/qguo/public/NTuple/CMSSW_10_6_26/src/MuonEG_Run18D_4_test"; | |
TString filename = prefix+".root"; | |
std::cout<<filename<<std::endl; | |
TFile *oldfile = new TFile(filename); | |
oldfile->cd("Ana"); | |
//TTree *oldtree = (TTree*)oldfile->Get("Ana/passedEvents"); | |
TTree *oldtree = (TTree*)gDirectory->Get("passedEvents"); | |
//TTree *oldtree = (TTree*)oldfile->Get("passedEvents"); | |
Long64_t nentries = oldtree->GetEntries(); | |
std::cout<<nentries<<" total entries."<<std::endl; | |
ULong64_t Run, LumiSect, Event; | |
bool passedZ4lSelection; | |
oldtree->SetBranchAddress("Run",&Run); | |
oldtree->SetBranchAddress("LumiSect",&LumiSect); | |
oldtree->SetBranchAddress("Event",&Event); | |
//Create a new file + a clone of old tree in new file | |
TFile *newfile = new TFile( | |
prefix+"_noDuplicates.root" | |
,"recreate"); | |
TTree *newtree = oldtree->CloneTree(0); | |
std::set<TString> runlumieventSet; | |
int nremoved = 0; | |
for (Long64_t i=0;i<nentries; i++) { | |
if (i%100000==0) std::cout<<i<<"/"<<nentries<<std::endl; | |
oldtree->GetEntry(i); | |
TString s_Run = std::to_string(Run); | |
TString s_Lumi = std::to_string(LumiSect); | |
TString s_Event = std::to_string(Event); | |
TString runlumievent = s_Run+":"+s_Lumi+":"+s_Event; | |
if (runlumieventSet.find(runlumievent)==runlumieventSet.end()) { | |
runlumieventSet.insert(runlumievent); | |
newtree->Fill(); | |
} else { | |
nremoved++; | |
} | |
//if (passedZ4lSelection) newtree->Fill(); | |
} | |
std::cout<<nremoved<<" duplicates."<<std::endl; | |
newtree->Print(); | |
newtree->AutoSave(); | |
//delete oldfile; | |
delete newfile; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment