-
Notifications
You must be signed in to change notification settings - Fork 5
/
fileReader.py
314 lines (287 loc) · 10.3 KB
/
fileReader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
import os, sys, argparse
from ROOT import TFile, TTree, TH1F
defaultPath = '/afs/cern.ch/work/j/jrgonzal/public/Run2017G/skim2l'
def isdigit(a):
''' Redefinition of str.isdigit() that takes into account negative numbers '''
if a.isdigit(): return True
m = a[0]; n = a[1:]
if m == '-' and n.isdigit(): return True
return False
def findValidRootfiles(path, sampleName = '', getOnlyNumberedFiles = False, verbose = False, FullPaths = False):
''' Find rootfiles in path with a given name '''
files = []
if ',' in sampleName:
sl = sampleName.replace(' ', '').split(',')
return findValidRootfiles(path, sl, getOnlyNumberedFiles, verbose, FullPaths)
elif isinstance(sampleName, list):
for s in sampleName:
files += findValidRootfiles(path, s, getOnlyNumberedFiles, verbose, FullPaths)
#if len(files) == 0:
# files += findValidRootfiles(path, 'Tree_'+s, getOnlyNumberedFiles, verbose, FullPaths)
return files
if not path[-1] == '/': path += '/'
if verbose: print ' >> Looking for files in path: ' + path
for f in os.listdir(path):
if not f[-5:] == '.root': continue
if not '_' in f: continue
n = f[:-5].split('_')[-1]
s = f[:-5].split('_')[:-1]
if not isdigit(n): s.append(n)
fname = ''
for e in s: fname+=e+'_'
if fname[-1] == '_': fname = fname[:-1]
if getOnlyNumberedFiles and not n.isdigit(): continue
if sampleName != '' and fname != sampleName and (fname+'_'+n) != sampleName: continue
if verbose: print ' >> Adding file: ', f
files.append(f)
if FullPaths: files = [path + x for x in files]
if len(files) == 0:
files = findValidRootfiles(path, 'Tree_' + sampleName, getOnlyNumberedFiles, verbose, FullPaths)
if len(files) == 0:
print '[ERROR]: Not files "' + sampleName + '" found in: ' + path
return files
def GetFiles(path, name, verbose = False):
''' Get all rootfiles in path for a given process name'''
return findValidRootfiles(path, name, False, verbose, FullPaths = True)
def GetNGenEvents(fname):
''' Returns number of events from the 'Count' histograms '''
if isinstance(fname, list):
c = 0
for f in fname: c+=GetNGenEvents(f)
return c
elif isinstance(fname, str):
f = TFile.Open(fname)
h = f.Get('Count')
return h.GetBinContent(1)
else: print '[ERROR] [GetNGenEvents]: wrong input'
def GetHisto(fname, hname):
''' Returns a histogram from files fname '''
if isinstance(fname, list):
h0 = GetHisto(fname[0], hname)
for fi in fname[1:]: h0.Add(GetHisto(fi, hname))
return h0
else:
f = TFile.Open(fname)
h = f.Get(hname)
h.SetDirectory(0)
f.Close()
return h
def GetSumWeights(fname):
''' Returns number of events from the 'SumWeights' histograms '''
if isinstance(fname, list):
c = 0
for f in fname: c+=GetSumWeights(f)
return c
elif isinstance(fname, str):
f = TFile.Open(fname)
h = f.Get('SumWeights')
return h.GetBinContent(1)
else: print '[ERROR] [GetSumWeights]: wrong input'
def GetHistoFromSetOfFiles(fname, histoname):
''' Returns the sum of a histo with a name in a list of files '''
if isinstance(fname, list):
h = GetHistoFromSetOfFiles(fname[0], histoname)
for f in fname[1:]: h.Add(GetHistoFromSetOfFiles(f, histoname))
return h
elif isinstance(fname, str):
f = TFile.Open(fname)
if not hasattr(f, histoname):
print '[ERROR] [GetHistoFromSetOfFiles] Histogram \'%s\' does not exist in file %s !!'%(hitoname, fnmae)
h = f.Get(histoname)
h.SetDirectory(0)
return h
else: print '[ERROR] [GetHistoFromSetOfFiles]: wrong input'
def GetEntries(fname, treeName = 'Events'):
''' Returns number of events from the tree 'Events' in a file '''
if isinstance(fname, list):
c = 0
for f in fname: c+=GetEntries(f, treeName)
return c
elif isinstance(fname, str):
f = TFile.Open(fname)
t = f.Get(treeName)
return t.GetEntries()
else: print '[ERROR] [GetEntries]: wrong input'
def GuessIsData(fname):
''' Guess if a tree is data or simulation '''
if isinstance(fname, list): fname = fname[0] # Assume all files are the same process/dataset
f = TFile.Open(fname)
t = f.Get('Events')
if hasattr(t,'genWeight'): return False
return True
def guessPathAndName(p):
''' Guess path and sample name for a given rootfile '''
path = ''; n = -1
while '/' in p:
path += p[:p.index('/')+1]
p = p[p.index('/')+1:]
if p[-5:] == '.root': p = p[:-5]
elif os.path.isdir(path + p):
path = path + p
p = ''
if len(path) > 0 and not path[-1] == '/': path += '/'
if '_' in p:
n = p.split('_')[-1]
s = p.split('_')[:-1]
if not isdigit(n):
s.append(n)
n = '-1'
p = ''
for e in s: p+=e+'_'
if p[-1] == '_': p = p[:-1]
return path, p, n
def guessProcessName(fileName):
''' Guess the name of the process for a given file '''
if isinstance(fileName, list):
path, name, n = guessPathAndName(fileName[0])
fileName = name
if fileName[-5:] == '.root': fileName = fileName[:-5]
while '/' in fileName: fileName = fileName[fileName.index('/')+1:]
return fileName
def groupFilesInDic(listOfFiles, name, i=-1, verbose = False):
''' Manages a dictionary with sample names and lists of samples '''
if isinstance(name, list):
for e in name:
path, nam, n = guessPathAndName(e)
groupFilesInDic(listOfFiles, nam, n)
return
fname = name + '_' + str(i) + '.root' if str(i).isdigit() else name + '.root'
if name in listOfFiles: listOfFiles[name].append(fname)
else:
newList = [fname]
listOfFiles[name] = newList
if verbose: print ' >> Sample found: ' + name
def getDicFiles(inFolder):
''' Get a dictionary with sample names and lists of files '''
listOfFiles = {}
files = findValidRootfiles(inFolder)
groupFilesInDic(listOfFiles,files)
return listOfFiles
def GetAllInfoFromFile(fname, treeName = 'Events'):
''' Returns a list with all the info of a file '''
if isinstance(fname, list):
nEvents = 0
nGenEvents = 0
nSumOfWeights = 0
isData = False
for f in fname:
iE, iG, iS, isData = GetAllInfoFromFile(f, treeName)
nEvents += iE
nGenEvents += iG
nSumOfWeights += iS
return [nEvents, nGenEvents, nSumOfWeights, isData]
elif isinstance(fname, str):
f = TFile.Open(fname)
t = f.Get(treeName)
hs = f.Get('SumWeights')
hc = f.Get('Count')
nEvents = t.GetEntries()
nGenEvents = hc.GetBinContent(1) if isinstance(hc,TH1F) else 1
nSumOfWeights = hs.GetBinContent(1) if isinstance(hs,TH1F) else 1
isData = not hasattr(t,'genWeight')
return [nEvents, nGenEvents, nSumOfWeights, isData]
else: print '[ERROR] [GetAllInfoFromFile]: wrong input'
def GetSumOfLHEweights(fname, treeName = 'Runs'):
slist = lambda X,Y : [x+y for x,y in zip(X, Y)]
if isinstance(fname, str) and ',' in fname:
fname = fname.replace(' ', '').split(',')
vME = []; vPDF = [];
if isinstance(fname, list):
for f in fname:
ME, PDF = GetSumOfLHEweights(f, treeName)
if vME == []: vME = ME
else : vME = slist(vME, ME)
if vPDF == []: vPDF = PDF
else : vPDF = slist(vPDF, PDF)
else:
f = TFile.Open(fname)
t = f.Get(treeName)
for e in t:
if vME == [] or vPDF == []:
nPDF = e.nLHEPdfSumw_
nME = e.nLHEScaleSumw_
vPDF = [0]*nPDF
vME = [0]*nME
vME = slist(vME , e.LHEScaleSumw_)
vPDF = slist(vPDF, e.LHEPdfSumw_)
return vME, vPDF
def GetProcessInfo(path, process='', treeName = 'Events'):
''' Prints all info from a process in path '''
if isinstance(path, list):
files = path
path, process, k = guessPathAndName(files[0])
else: files = GetFiles(path, process)
nEvents, nGenEvents, nSumOfWeights, isData = GetAllInfoFromFile(files, treeName)
fileType = '(Data)' if isData else ('(MC)')
print '\n##################################################################'
print ' path: ' + path
print ' Process: ' + process + ' ' + fileType
print ' Number of files: ' + str(len(files))
print ' Total entries: ' + str(nEvents)
if isData:
print ' Triggered events: ' + str(nGenEvents)
else:
print ' Generated events: ' + str(nGenEvents)
print ' Sum of gen weights: ' + str(nSumOfWeights)
print '##################################################################\n'
def IsVarInTree(fname, var, treeName = 'Events'):
''' Check if a given file and tree contains a branch '''
if not os.path.isfile(fname):
print 'ERROR: %s does not exists!'%fname
return False
f = TFile.Open(fname)
t = f.Get(treeName)
return hasattr(t, var)
def GetValOfVarInTree(fname, var, treeName = 'Events'):
''' Check the value of a var in a tree '''
if not os.path.isfile(fname):
print 'ERROR: %s does not exists!'%fname
return False
f = TFile.Open(fname)
t = f.Get(treeName)
t.GetEntry(0)
return getattr(t,var)
##################################
# Extra functions to work check .root files from terminal
def addToListOfFiles(listOfFiles, name, i):
''' Manages a dictionary with sample names and lists of samples '''
fname = name + '_' + str(i) + '.root'
if name in listOfFiles: listOfFiles[name].append(fname)
else:
newList = [fname]
listOfFiles[name] = newList
if verbose: print ' >> Sample found: ' + name
def main():
# Parsing options
path = defaultPath
sample = ''
pr = argparse.ArgumentParser()
pr.add_argument('path', help='Input folder', type = str, default = defaultPath)
pr.add_argument('--sample', type = str, default = '')
pr.add_argument('-p','--inspect', action='store_true', help='Print branches')
pr.add_argument('-t','--treeName', default='Events', help='Name of the tree')
args = pr.parse_args()
if args.sample: sample = args.sample
treeName = args.treeName
printb = args.inspect
path = args.path
if os.path.isdir(path) and not path[-1] == '/': path += '/'
if sample == '':
origpath = path
path, sample, n = guessPathAndName(path)
if sample == '':
d = getDicFiles(path)
for c in d:
print ' >> ' + c + ': ', d[c]
else:
totfile = path + sample + '_' + n + '.root' if int(n) >= 0 else path + sample + '.root'
if os.path.isfile(totfile):
GetProcessInfo([totfile], treeName = treeName)
exit()
else:
GetProcessInfo(path, sample, treeName)
else:
GetProcessInfo(path, sample, treeName)
exit()
if __name__ == '__main__':
main()