forked from jhcepas/sge-tweaks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
qsummary
executable file
·213 lines (183 loc) · 6.04 KB
/
qsummary
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#!/usr/bin/env python
import re
import commands
import sys
from string import strip
import argparse
from collections import defaultdict
import time
import datetime
mK = 1024
mM = 1024*1024
mG = 1024*1024*1024
mk = 1000
mm = 1000*1000
mg = 1000*1000*1000
DAY = 3600 * 24
HOUR = 3600
MINUTE = 60
def tm2sec(tm):
try:
return time.mktime(time.strptime(tm))
except Exception:
return 0.0
def mem2bytes(mem):
try:
bytes = float(mem)
except ValueError:
mod = mem[-1]
mem = mem[:-1]
if mod == "K":
bytes = float(mem) * mK
elif mod == "M":
bytes = float(mem) * mM
elif mod == "G":
bytes = float(mem) * mG
elif mod == "k":
bytes = float(mem) * mk
elif mod == "m":
bytes = float(mem) * mm
elif mod == "g":
bytes = float(mem) * mg
return bytes
def bytes2mem(bytes):
if bytes > mG:
return "%0.2fG" %(float(bytes)/mG)
elif bytes > mM:
return "%0.2fM" %(float(bytes)/mM)
elif bytes > mK:
return "%0.2fK" %(float(bytes)/mK)
else:
return str(bytes)
def dictcount(d):
v2count = defaultdict(int)
for v in d.itervalues():
v2count[v] += 1
return '\n'.join(sorted(["% 17d %s" %(k,c) for k,c in v2count.iteritems()]))
def sec2tm(sec):
return str(datetime.timedelta(seconds=sec))
def get_summary(JOBID):
text = commands.getoutput("qacct -j %s" %JOBID)
## Get status of JOBID to decide whether it is still running or not
running = True
for line in map(strip, commands.getoutput("qstat -j %s" %JOBID).split("\n")):
if line.strip() == "Following jobs do not exist:":
running = False
break
hosts = set()
task2status = {}
task2duration = {}
task2cpu = {}
task2mem = {}
task2fail = {}
## New code
jobName = set()
endTime = None
startTime = None
for task_block in re.findall("(qname.*?arid\s+[^\n]+)", text, re.DOTALL):
tinfo = {}
for line in task_block.split("\n"):
key, value = strip(line[0:12]), strip(line[12:])
try:
value = float(value)
except ValueError:
try:
value = int(value)
except ValueError:
pass
tinfo[key]=value
hosts.add(tinfo["hostname"])
tid = int(tinfo["taskid"])
task2status[tid] = int(tinfo["exit_status"])
task2fail[tid] = tinfo["failed"]
task2duration[tid] = int(tm2sec(tinfo["end_time"]) - tm2sec(tinfo["start_time"]))
task2mem[tid] = mem2bytes(tinfo["maxvmem"])
task2cpu[tid] = tinfo["cpu"]
jobName.add(tinfo["jobname"])
## Determine the starting and ending, if the job is already finished,
## times for the input job ID
if not startTime:
startTime = time.strptime(tinfo["start_time"])
startTime = startTime if startTime < time.strptime(tinfo["start_time"]) \
else time.strptime(tinfo["start_time"])
if not endTime:
endTime = time.strptime(tinfo["end_time"])
endTime = endTime if endTime > time.strptime(tinfo["end_time"]) \
else time.strptime(tinfo["end_time"])
## Format adequantely data-time variables
startingTime = time.strftime("%d/%b/%y %H:%M:%S", startTime)
endingTime = time.strftime("%d/%b/%y %H:%M:%S", endTime)
summary = ''
if task2mem:
summary += "Job Name: " + ", ".join(sorted(jobName)) + "\n"
summary += "Started at: " + startingTime + "\n"
if not running:
summary += "Ended at: " + endingTime + "\n"
summary += "Max mem usage: " + bytes2mem(max(task2mem.values())) + "\n"
summary += "Avg mem usage: " + bytes2mem(sum(task2mem.values()) / float(len(task2mem))) +"\n"
summary += "Max job duration: " + sec2tm(max(task2duration.values())) + "\n"
summary += "Avg job duration: " + sec2tm(sum(task2duration.values()) / float(len(task2duration))) +"\n"
summary += '\n'
summary += " EXIT STATUS COUNT \n"
summary += "================= =========\n"
summary += dictcount(task2status) + "\n\n"
summary += 'You can get more info by running "qacct -j %s"\n' %JOBID
#summary += '\n'.join(map(str, [k for k, v in task2status.iteritems() if v!=0]))
if args.dumpfile:
print "Dumping info into", args.dumpfile
OUT = open(args.dumpfile, "w")
print >>OUT, "#" + "\t".join(map(str, ["TaksID (line number)", "exit status", "total mem", "duration", "cpu usage"]))
for tid in sorted(task2status):
print >>OUT, "\t".join(map(str, [tid, task2status[tid], bytes2mem(task2mem[tid]), sec2tm(task2duration[tid]), task2cpu[tid]]))
OUT.close()
return summary
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Dump summary of array-job')
parser.add_argument(dest='jobid', help="jobid of the array job")
parser.add_argument('--dumpfile', dest='dumpfile', type=str, help='dump extended info about job exit status')
args = parser.parse_args()
print get_summary(args.jobid)
"""
qname t99999_m128_c64.q
hostname trantor4.crg.es
group nogroup
owner jhuerta
project NONE
department defaultdepartment
jobname test_job.sh
jobnumber 175
taskid 1442
account sge
priority 0
qsub_time Wed Apr 4 18:20:44 2012
start_time Wed Apr 4 18:28:00 2012
end_time Wed Apr 4 18:28:41 2012
granted_pe NONE
slots 1
failed 100 : assumedly after job
exit_status 137
ru_wallclock 41
ru_utime 0.004
ru_stime 0.012
ru_maxrss 1636
ru_ixrss 0
ru_ismrss 0
ru_idrss 0
ru_isrss 0
ru_minflt 1245
ru_majflt 0
ru_nswap 0
ru_inblock 0
ru_oublock 16
ru_msgsnd 0
ru_msgrcv 0
ru_nsignals 0
ru_nvcsw 19
ru_nivcsw 3
cpu 7.460
mem 0.044
io 0.000
iow 0.000
maxvmem 30.965M
arid undefi
"""