-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathresman.py
executable file
·279 lines (223 loc) · 8.59 KB
/
resman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/usr/bin/env python3
import argparse
import datetime
import json
import os
import sys
import time
import getpass
LOG_FILE = "/etc/res_log"
LOCK_FILE = "/etc/res_lock"
BLANK_DAT = {"username": "", "reason": [""], "start_time": 0, "duration": 0}
class cols:
WARNING = "\033[91m"
ENDC = "\033[0m"
OKGREEN = "\033[92m"
# Write reservation info into a given file, and also return the data as a
# dictionary.
def dumpdat(file, username, start, duration, reason):
dat = {"start_time": start, "duration": duration, "username": username, "reason": reason}
json.dump(dat, file)
return dat
# Check if the lock file is reserved right now.
# If it is, return (True, dictionary of JSON data from file)
# Otherwise, return (False, blank dictionary of same schema)
def is_locked():
try:
file = open(LOCK_FILE, "r")
dat = json.load(file)
if dat["duration"] == -1:
return True, dat
end_time = dat["start_time"] + dat["duration"]
now = time.time()
return now <= end_time, dat
except (KeyError, json.JSONDecodeError):
return False, BLANK_DAT
except OSError:
return False, BLANK_DAT
# Try to lock the file with given reservation info, but fail and return False
# if it's already locked
def try_lock(username, start, duration=3600, reason=""):
locked, dat = is_locked()
if locked:
return False, dat
else:
with open(LOCK_FILE, "w") as file:
return True, dumpdat(file, username, start, duration, reason)
# Force unlock the lock file, by clearing it
def release():
with open(LOCK_FILE, "w") as file:
file.truncate(0)
# Parse a time string in a form like:
# '1h30m' = 1 hour, 30 minutes
# '25m' = 0 hours, 25 minutes
# '10h' = 10 hours, 0 minutes
def timestring2secs(ts: str):
if "h" not in ts:
ts = "0h" + ts
if "m" not in ts:
ts = ts + "0m"
h_ind = ts.index("h")
try:
h = int(ts[:h_ind])
m = int(ts[h_ind + 1 : -1])
return h * 3600 + m * 60
except ValueError:
return None
def secs2timestring(snds: int):
hours = snds // 3600
mins = (snds := snds % 3600) // 60
secs = snds % 60
timestring = ""
if hours:
timestring += f"{hours}h "
if hours or mins:
timestring += f"{mins}m "
timestring += f"{secs}s"
return timestring
# Give a human readable explanation of data taken from the lock file.
def explain_failure(dat):
print(f"{cols.WARNING}Already locked by {dat['username']}!{cols.ENDC}")
print(f"Reason: '{dat['reason']}'")
if dat["duration"] == -1:
print("Reservation is until further notice, likely waiting for a command to finish running.")
else:
snds = int(dat["start_time"] + dat["duration"] - time.time())
print(f"{secs2timestring(snds)} left on reservation")
def log_lock(user, reason, dur="", cmd=""):
try:
with open(LOG_FILE, "a") as log_file:
now = datetime.datetime.now().strftime("%x %X")
if dur:
log_file.write(f"[{now}] '{user}' locked server for {dur}. Reason = {reason}\n")
elif cmd:
log_file.write(f"[{now}] '{user}' locked server until `{cmd}` finishes. Reason = {reason}\n")
else:
print("[warning] `log_lock` called with no duration or command.")
log_file.write(f"[{now}] '{user}' locked server. Reason = {reason}\n")
except:
print("[warning] Failed to open or write log file at {LOG_FILE}. Check permissions.")
def main():
parser = argparse.ArgumentParser(
prog="resman",
description=(
f"A simple system resource allocation program. Persistent state stored in {LOCK_FILE}. This "
"program does not impose any actual lock on system resource usage; it only keeps track of which "
"user (if any) has laid claim to it for the time being, and if applicable how long they have it "
"reserved for. The purpose of this is to prevent two people accidentally running experiments at "
"the same time."
),
epilog=(
"Can also be run with no arguments, to check status. Exits with 0 if not reserved, or 1 if currently "
"reserved. Just `echo $?` afterwards."
),
)
parser.add_argument(
"-d",
"--duration",
help="how long to reserve server use for (e.g. 1h30m, 25m, 4h)",
default=-1,
)
parser.add_argument(
"-x",
"--run",
metavar="COMMAND",
help=(
"reserve the server until COMMAND finishes. quotes (') are needed around COMMAND if it contains '-'s, "
"otherwise they're optional."
),
nargs="*",
)
parser.add_argument(
"-r",
"--reason",
help="what experiment are you running? default is blank. completely optional.",
default=["<no reason given>"],
nargs="*",
)
parser.add_argument(
"-R",
"--release",
help=(
"unlocks an existing reservation. you should probably only use this if you're the person who made the "
"reservation in the first place, and you want to free the server earlier than your allocated time slot. "
"may also be used if this script crashes :)"
),
action="store_true",
)
parser.add_argument(
"-c",
"--confirm",
help=(
"interactively tell the user the current status of things. if the server is reserved, they are prompted to "
"press enter to confirm that they understand this."
),
action="store_true",
)
parser.add_argument(
"-u",
"--user",
help=f"who is using the server during the reservation? Default is {getpass.getuser()}.",
default=getpass.getuser(),
)
args = parser.parse_args()
args.reason = " ".join(args.reason)
locked, dat = is_locked()
if args.confirm:
if locked:
print(
cols.WARNING + "Be careful!" + cols.ENDC + " Someone is running an experiment right now.\n"
"Please avoid any significant CPU or memory usage for the time being."
)
print(f"User: {dat['username']}")
print(f"Reason: {dat['reason']}")
if dat["duration"] == -1:
print("Time remaining: indeterminate (waiting for command to terminate)")
runtime = int(time.time()) - dat["start_time"]
print(f"Time since job started: {secs2timestring(runtime)}")
else:
snds = int(dat["start_time"] + dat["duration"] - time.time())
print(f"Time remaining: {secs2timestring(snds)}")
# Only prompt for read confirmation if in interactive session
if sys.stdout.isatty():
input(cols.WARNING + "Please press ENTER" + cols.ENDC + " to confirm that you've read this :) ")
return
else:
print(cols.OKGREEN + "No one is running any experiment right now, do what you like :)" + cols.ENDC)
return
if args.release:
if locked:
release()
print("Released the lock file, new reservations can now be made.")
return
# If no action params given, just return the lock status
if args.duration == -1 and args.run is None:
sys.exit(locked)
# If locked, nicely explain why
if locked:
explain_failure(dat)
sys.exit(os.EX_TEMPFAIL)
if args.run: # Lock for indeterminate time until command finishes
successful, _ = try_lock(args.user, int(time.time()), -1, args.reason)
if not successful:
print("Server locked by someone else. Race condition edge case.")
sys.exit(1)
else:
cmd = " ".join(args.run)
log_lock(args.user, args.reason, "", cmd)
res = os.system(cmd)
print(f"Finished with exit code {res}; unlocking.")
release()
else: # Lock for certain time and then exit
dur_secs = timestring2secs(args.duration)
if dur_secs is None:
print("Failed to pass time string", file=sys.stderr)
sys.exit(os.EX_DATAERR)
print(f"Allocating server for {args.user} for {args.duration} ({dur_secs}s):\nReason: {args.reason}")
successful, _ = try_lock(args.user, int(time.time()), dur_secs, args.reason)
if not successful:
print("Server locked by someone else. Race condition edge case.")
else:
log_lock(args.user, args.reason, args.duration, "")
if __name__ == "__main__":
main()