Skip to content

Commit

Permalink
Cleaned up warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
maouw committed Sep 20, 2023
1 parent 7bbf3d0 commit 3961481
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 20 deletions.
18 changes: 8 additions & 10 deletions hyakvnc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_apptainer_vnc_instances(read_apptainer_config: bool = False):
for p, name_meta in running_hyakvnc_json_files.items():
with open(p, 'r') as f:
d = json.load(f)
assert needed_keys <= d.keys(), f"Missing keys {needed_keys - d.keys()} in {jf}"
assert needed_keys <= d.keys(), f"Missing keys {needed_keys - d.keys()} in {d}"

logOutPath = Path(d['logOutPath']).expanduser()
if not logOutPath.exists():
Expand All @@ -51,19 +51,18 @@ def get_apptainer_vnc_instances(read_apptainer_config: bool = False):
else:
d['config'] = json.loads(base64.b64decode(d['config']).decode('utf-8'))

d['slurm_compute_node'] = slurm_compute_node = p.relative_to(app_dir).parts[0]
d['slurm_job_id'] = name_meta['jobid']

with open(logOutPath, 'r') as f:
logOutFile_contents = f.read()
with open(logOutPath, 'r') as lf:
logOutFile_contents = lf.read()
rfbports = re.findall(r'\s+-rfbport\s+(?P<rfbport>\d+)\b', logOutFile_contents)
if not rfbports:
continue

vnc_port = rfbports[-1]

vnc_log_file_paths = re.findall(
rf'(?m)Log file is\s*(?P<logfilepath>.*[/]{d["slurm_compute_node"]}.*:{vnc_port}\.log)$',
rf'(?m)Log file is\s*(?P<logfilepath>.*/{d["slurm_compute_node"]}.*:{vnc_port}\.log)$',
logOutFile_contents)
if not vnc_log_file_paths:
continue
Expand Down Expand Up @@ -125,7 +124,7 @@ def cmd_create(container_path):
# needs to match rf'(?P<prefix>{app_config.apptainer_instance_prefix})(?P<jobid>\d+)-(?P<appinstance>.*)'):
apptainer_instance_name = rf"{app_config.apptainer_instance_prefix}-\$SLURM_JOB_ID-{container_name}"

apptainer_cmd = apptainer_env_vars_string + rf"apptainer instance start {container_path} {container_name}"
apptainer_cmd = apptainer_env_vars_string + rf"apptainer instance start {container_path} {apptainer_instance_name}"
apptainer_cmd_with_rest = rf"{apptainer_cmd} && while true; do sleep 10; done"

cmds += ["--wrap", apptainer_cmd_with_rest]
Expand All @@ -148,7 +147,7 @@ def cmd_create(container_path):
logging.info("Waiting for job to start running")

try:
state = wait_for_job_status(job_id, states={"RUNNING"}, timeout=app_config.sbatch_post_timeout,
wait_for_job_status(job_id, states=["RUNNING"], timeout=app_config.sbatch_post_timeout,
poll_interval=app_config.sbatch_post_poll_interval)
except TimeoutError:
raise TimeoutError(f"Job {job_id} did not start running within {app_config.sbatch_post_timeout} seconds")
Expand Down Expand Up @@ -200,8 +199,7 @@ def create_arg_parser():
parser_create.add_argument('-c', '--cpus', dest='cpus', metavar='<num_cpus>', help='Subnode cpu count', default=1,
type=int)
parser_create.add_argument('-G', '--gpus', dest='gpus', metavar='[type:]<num_gpus>', help='Subnode gpu count',
default="0"
type = str)
default="0", type = str)
parser_create.add_argument('--mem', dest='mem', metavar='<NUM[K|M|G|T]>', help='Subnode memory amount with units',
type=str)
parser_create.add_argument('--container', dest='container', metavar='<path_to_container.sif>',
Expand All @@ -223,7 +221,7 @@ def create_arg_parser():


arg_parser = create_arg_parser()
args = (arg_parser).parse_args()
args = arg_parser.parse_args()

if args.debug:
os.environ["HYAKVNC_LOG_LEVEL"] = "DEBUG"
Expand Down
4 changes: 1 addition & 3 deletions hyakvnc/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import json
import json
import logging
import os
from dataclasses import dataclass, asdict
Expand Down Expand Up @@ -50,7 +49,6 @@ class HyakVncConfig:
ssh_host = "klone.hyak.uw.edu" # intermediate host address between local machine and compute node

# slurm attributes
## sbatch environment variables
account: Optional[str] = None # account to use for sbatch jobs | -A, --account, SBATCH_ACCOUNT
partition: Optional[str] = None # partition to use for sbatch jobs | -p, --partition, SBATCH_PARTITION
cluster: Optional[str] = None # cluster to use for sbatch jobs | --clusters, SBATCH_CLUSTERS
Expand Down Expand Up @@ -88,7 +86,7 @@ def __post_init__(self) -> None:
self.apptainer_env_vars["APPTAINER_WRITABLE_TMPFS"] = "1" if self.apptainer_use_writable_tmpfs else "0"

if self.apptainer_cleanenv is not None:
self.apptainer_env_vars[["APPTAINER_CLEANENV"] = "1" if self.apptainer_cleanenv else "0"
self.apptainer_env_vars["APPTAINER_CLEANENV"] = "1" if self.apptainer_cleanenv else "0"

if self.apptainer_set_bind_paths is not None:
self.apptainer_env_vars["APPTAINER_BINDPATH"] = self.apptainer_set_bind_paths
Expand Down
18 changes: 11 additions & 7 deletions hyakvnc/slurmutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def get_default_account(user: Optional[str] = None, cluster: Optional[str] = Non
raise LookupError(f"Could not find default account for user '{user}' on cluster '{cluster}'")


def get_partitions(user: Optional[str] = None, account: Optional[str] = None, cluster: Optional[str] = None) -> set[
str]:
def get_partitions(user: Optional[str] = None,
account: Optional[str] = None,
cluster: Optional[str] = None) -> set[str]:
"""
Gets the SLURM partitions for the specified user and account on the specified cluster.
Expand Down Expand Up @@ -89,7 +90,7 @@ def node_range_to_list(s: str) -> list[str]:
:return: list of SLURM nodes
:raises ValueError: if the node range could not be converted to a list of nodes
"""
output = subproces.run(f"scontrol show hostnames {s}", stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
output = subprocess.run(f"scontrol show hostnames {s}", stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if output.returncode != 0:
raise ValueError(f"Could not convert node range '{s}' to list of nodes:\n{output.stderr}")
return output.stdout.rstrip().splitlines()
Expand Down Expand Up @@ -150,9 +151,11 @@ def from_squeue_line(line: str, field_order=None) -> "SlurmJob":
return SlurmJob(**field_dict)


def get_job(user: Optional[str] = os.getlogin(), jobs: Optional[Union[int, list[int]]] = None,
cluster: Optional[str] = None, field_names: Optional[Container[str]] = None) -> Union[
SlurmJob, list[SlurmJob], None]:
def get_job(jobs: Optional[Union[int, list[int]]] = None,
user: Optional[str] = os.getlogin(),
cluster: Optional[str] = None,
field_names: Optional[Container[str]] = None
) -> Union[SlurmJob, list[SlurmJob], None]:
"""
Gets the specified slurm job(s).
:param user: User to get jobs for
Expand All @@ -177,7 +180,8 @@ def get_job(user: Optional[str] = os.getlogin(), jobs: Optional[Union[int, list[
cmds += ['--jobs', jobs]

slurm_job_fields = [f for f in fields(SlurmJob) if f.name in field_names]
squeue_format_fields = [f.metadata.get("squeue_field", "") for f in slurm_job_fields].join()
assert len(slurm_job_fields) > 0, "Must specify at least one field to get for slurm jobs"
squeue_format_fields = ",".join([f.metadata.get("squeue_field", "") for f in slurm_job_fields])

cmds += ['--Format', squeue_format_fields]
res = subprocess.run(cmds, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
Expand Down

0 comments on commit 3961481

Please sign in to comment.