Skip to content

Instantly share code, notes, and snippets.

@ram1123
Created March 20, 2025 09:28
Show Gist options
  • Save ram1123/e365b0ed7d8b7a0fc8fc3e628d41880c to your computer and use it in GitHub Desktop.
Save ram1123/e365b0ed7d8b7a0fc8fc3e628d41880c to your computer and use it in GitHub Desktop.
script to remove the duplicates in CMS dataset
#include <iostream>
#include <set>
#include <TString.h>
#include <TFile.h>
#include <TTree.h>
void removeDuplicates() {
//TString prefix = "/raid/raid9/qguo/Run2/after/Run2_2/new/CMSSW_10_2_18/src/data_2018_NotBestMelaCand";
//TString prefix = "/publicfs/cms/data/hzz/guoqy/newNTuple_UL/2018/Data/DataUL2018_all";
//TString prefix = "/publicfs/cms/user/qyguo/lacked_Data1718/18/DoubleMuon_Run18A_1";
TString prefix = "/afs/cern.ch/work/q/qguo/public/NTuple/CMSSW_10_6_26/src/MuonEG_Run18D_4_test";
TString filename = prefix+".root";
std::cout<<filename<<std::endl;
TFile *oldfile = new TFile(filename);
oldfile->cd("Ana");
//TTree *oldtree = (TTree*)oldfile->Get("Ana/passedEvents");
TTree *oldtree = (TTree*)gDirectory->Get("passedEvents");
//TTree *oldtree = (TTree*)oldfile->Get("passedEvents");
Long64_t nentries = oldtree->GetEntries();
std::cout<<nentries<<" total entries."<<std::endl;
ULong64_t Run, LumiSect, Event;
bool passedZ4lSelection;
oldtree->SetBranchAddress("Run",&Run);
oldtree->SetBranchAddress("LumiSect",&LumiSect);
oldtree->SetBranchAddress("Event",&Event);
//Create a new file + a clone of old tree in new file
TFile *newfile = new TFile(
prefix+"_noDuplicates.root"
,"recreate");
TTree *newtree = oldtree->CloneTree(0);
std::set<TString> runlumieventSet;
int nremoved = 0;
for (Long64_t i=0;i<nentries; i++) {
if (i%100000==0) std::cout<<i<<"/"<<nentries<<std::endl;
oldtree->GetEntry(i);
TString s_Run = std::to_string(Run);
TString s_Lumi = std::to_string(LumiSect);
TString s_Event = std::to_string(Event);
TString runlumievent = s_Run+":"+s_Lumi+":"+s_Event;
if (runlumieventSet.find(runlumievent)==runlumieventSet.end()) {
runlumieventSet.insert(runlumievent);
newtree->Fill();
} else {
nremoved++;
}
//if (passedZ4lSelection) newtree->Fill();
}
std::cout<<nremoved<<" duplicates."<<std::endl;
newtree->Print();
newtree->AutoSave();
//delete oldfile;
delete newfile;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment