Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add code for generating site-packages #595

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
126 changes: 114 additions & 12 deletions scripts/available_software/available_software.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,19 +59,29 @@ def main():
)
path_data_dir = os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/data")

# Generate the JSON overviews and detail markdown pages.
# Generate the JSON overviews
if args.eessi:
modules = modules_eesi()
modules = modules_eessi()
else:
modules = modules_ugent()
modules, paths = modules_ugent()

print(paths)
print(modules)
print("Generate JSON overview... ", end="", flush=True)
generate_json_overview(modules, path_data_dir)
print("Done!")

# Generate the JSON detail
json_data = generate_json_detailed_data(modules)
if args.eessi:
json_data = json_data
else:
json_data = get_extra_info_ugent(json_data, paths)
print("Generate JSON detailed... ", end="", flush=True)
json_path = generate_json_detailed(modules, path_data_dir)
json_path = generate_json_detailed(json_data, path_data_dir)
print("Done!")

# Generate detail markdown pages
print("Generate detailed pages... ", end="", flush=True)
generate_detail_pages(json_path, os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/detail"))
print("Done!")
Expand Down Expand Up @@ -168,6 +178,35 @@ def module_whatis(name: str) -> dict:
return whatis


def module_info(info: str) -> dict:
"""
Function to parse through lua file.

@param info: String with the contents of the lua file.
"""
whatis = {}
data = np.array(info.split("\n"))
# index of start description to handle multi lined description
i = np.flatnonzero(np.char.startswith(data, "whatis([==[Description"))[0]
if np.char.endswith(data[i], "]==])"):
content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=data[i]).strip('"')
else:
description = re.sub(pattern=r'whatis\(\[==\[(.*)', repl='\\1', string=data[i]).strip('"')
while not np.char.endswith(data[i], "]==])"):
i += 1
description += data[i]
content = re.sub(pattern=r'(.*)\]==\]\)', repl='\\1', string=description).strip('"')
key, value = tuple(content.split(":", maxsplit=1))
whatis[key.strip()] = value.strip()

for line in data[np.char.startswith(data, "whatis")]:
if not np.char.startswith(line, "whatis([==[Description"):
content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=line).strip('"')
key, value = tuple(content.split(":", maxsplit=1))
whatis[key.strip()] = value.strip()
return whatis


# --------------------------------------------------------------------------------------------------------
# Fetch data EESSI
# --------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -199,7 +238,7 @@ def clusters_eessi() -> np.ndarray:
return clusters


def modules_eesi() -> dict:
def modules_eessi() -> dict:
"""
Returns names of all software module that are installed on EESSI.
They are grouped by cluster.
Expand Down Expand Up @@ -250,6 +289,15 @@ def filter_fn_gent_modules(data: np.ndarray) -> np.ndarray:
]


def filter_fn_gent_software_path(data: np.ndarray) -> np.ndarray:
"""
Filter function for the software path of the cluster
@param data: Output
@return: Filtered output
"""
return data[np.char.endswith(data, "/modules/all:")]


def clusters_ugent() -> np.ndarray:
"""
Returns all the cluster names of the HPC at UGent.
Expand All @@ -259,6 +307,45 @@ def clusters_ugent() -> np.ndarray:
return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster)


def get_extra_info_ugent(json_data, paths) -> dict:
"""
add a list of extentions to all modules with extensions
@return: Dictionary with all the modules and their site_packages
"""
modules = json_data['software']
for software in modules:
for mod in modules[software]['versions']:
cluster = modules[software]['versions'][mod]['clusters'][0]
if software == "Java":
# Java has a strange naming sceme which causes probplems
continue
if mod in ["imkl/2020.4.304-NVHPC-21.2"]:
base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/"
elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0',
'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0',
'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0',
'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0',
'Triton/1.1.1-foss-2022a-CUDA-11.7.0']:
base_path = "/apps/gent/RHEL8/cascadelake-ampere-ib/modules/all/"
elif cluster == "donphan":
base_path = "/apps/gent/RHEL8/cascadelake-ib/modules/all/"
elif cluster == "joltik":
base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/"
else:
base_path = paths[cluster][0][:-1] + "/"
path = base_path + mod + ".lua"
file = open(path, "r")
info = file.read()
if info != "":
whatis = module_info(info)
json_data['software'][software]['description'] = whatis['Description']
if "Homepage" in whatis.keys():
json_data['software'][software]['homepage'] = whatis['Homepage']
if "Extensions" in whatis.keys():
json_data["software"][software]["versions"][mod]["extensions"] = whatis['Extensions']
return json_data


def modules_ugent() -> dict:
"""
Returns names of all software module that are installed on the HPC on UGent.
Expand All @@ -267,15 +354,17 @@ def modules_ugent() -> dict:
"""
print("Start collecting modules:")
data = {}
mapping = {}
for cluster in clusters_ugent():
print(f"\t Collecting available modules for {cluster}... ", end="", flush=True)
module_swap(cluster)
cluster_name = cluster.split("/", maxsplit=1)[1]
mapping[cluster_name] = module_avail(filter_fn=filter_fn_gent_software_path)
data[cluster_name] = module_avail(filter_fn=filter_fn_gent_modules)
print(f"found {len(data[cluster_name])} modules!")

print("All data collected!\n")
return data
return data, mapping


# --------------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -343,7 +432,7 @@ def generate_software_table_data(software_data: dict, clusters: list) -> list:
row = [module_name]

for cluster in clusters:
row += ("x" if cluster in available else "-")
row += ("x" if cluster in available["clusters"] else "-")
table_data += row

return table_data
Expand All @@ -370,6 +459,13 @@ def generate_software_detail_page(

filename = f"{path}/{software_name}.md"
md_file = MdUtils(file_name=filename, title=f"{software_name}")
if 'description' in software_data.keys():
description = software_data['description']
md_file.new_paragraph(f"{description}")
if 'homepage' in software_data.keys():
homepage = software_data['homepage']
md_file.new_paragraph(f"{homepage}")

md_file.new_header(level=1, title="Available modules")

md_file.new_paragraph(f"The overview below shows which {software_name} installations are available per HPC-UGent "
Expand All @@ -386,6 +482,13 @@ def generate_software_detail_page(
text=generate_software_table_data(sorted_versions, clusters)
)

for version, details in list(sorted_versions.items())[::-1]:
if 'extensions' in details:
md_file.new_paragraph(f"### {version}")
md_file.new_paragraph("This is a list of extensions included in the module:")
packages = details['extensions']
md_file.new_paragraph(f"{packages}")

md_file.create_md_file()

# Remove the TOC
Expand Down Expand Up @@ -570,28 +673,27 @@ def generate_json_detailed_data(modules: dict) -> dict:

# If the version is not yet present, add it.
if mod not in json_data["software"][software]["versions"]:
json_data["software"][software]["versions"][mod] = []
json_data["software"][software]["versions"][mod] = {'clusters': []}

# If the cluster is not yet present, add it.
if cluster not in json_data["software"][software]["clusters"]:
json_data["software"][software]["clusters"].append(cluster)

# If the cluster is not yet present, add it.
if cluster not in json_data["software"][software]["versions"][mod]:
json_data["software"][software]["versions"][mod].append(cluster)
if cluster not in json_data["software"][software]["versions"][mod]["clusters"]:
json_data["software"][software]["versions"][mod]["clusters"].append(cluster)

return json_data


def generate_json_detailed(modules: dict, path_data_dir: str) -> str:
def generate_json_detailed(json_data: dict, path_data_dir: str) -> str:
"""
Generate the detailed JSON.

@param modules: Dictionary with all the modules per cluster. Keys are the cluster names.
@param path_data_dir: Path to the directory where the JSON will be placed.
@return: Absolute path to the json file.
"""
json_data = generate_json_detailed_data(modules)
filepath = os.path.join(path_data_dir, "json_data_detail.json")
with open(filepath, 'w') as outfile:
json.dump(json_data, outfile)
Expand Down
2 changes: 1 addition & 1 deletion scripts/available_software/test.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#!/bin/bash

PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s
PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": ["dialga", "pikachu"], "cfd/2.0": ["dialga", "pikachu"], "cfd/24": ["dialga", "pikachu"], "cfd/5.0": ["dialga", "pikachu"], "cfd/2.0afqsdf": ["dialga", "pikachu"], "cfd/3.0": ["pikachu"]}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": ["dialga"], "Markov/hidden-1.0.10": ["dialga"]}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": ["dialga", "pikachu"], "science/7.2.0": ["dialga", "pikachu"]}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": ["pikachu"]}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"}
{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0": {"clusters": ["dialga", "pikachu"]}, "cfd/24": {"clusters": ["dialga", "pikachu"]}, "cfd/5.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0afqsdf": {"clusters": ["dialga", "pikachu"]}, "cfd/3.0": {"clusters": ["pikachu"]}}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": {"clusters": ["dialga"]}, "Markov/hidden-1.0.10": {"clusters": ["dialga"]}}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": {"clusters": ["dialga", "pikachu"]}, "science/7.2.0": {"clusters": ["dialga", "pikachu"]}}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": {"clusters": ["pikachu"]}}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"}
2 changes: 1 addition & 1 deletion scripts/available_software/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def setup_class(cls):
# ---------------------------

def test_data_ugent(self):
sol = modules_ugent()
sol = modules_ugent()[0]
assert len(sol) == 2
assert len(sol["dialga"]) == 13
assert len(sol["pikachu"]) == 15
Expand Down
12 changes: 7 additions & 5 deletions scripts/available_software/tests/test_json.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from available_software import (generate_json_overview_data,
generate_json_overview,
modules_ugent,
generate_json_detailed)
generate_json_detailed,
generate_json_detailed_data)
import os
import json

Expand Down Expand Up @@ -32,7 +33,7 @@ def teardown_class(cls):
# ---------------------------

def test_json_generate_simple(self):
modules = modules_ugent()
modules = modules_ugent()[0]
json_data = generate_json_overview_data(modules)
assert len(json_data.keys()) == 3
assert list(json_data["clusters"]) == ["dialga", "pikachu"]
Expand All @@ -44,7 +45,7 @@ def test_json_generate_simple(self):
}

def test_json_simple(self):
modules = modules_ugent()
modules = modules_ugent()[0]
json_path = generate_json_overview(modules, ".")
with open(json_path) as json_data:
data_generated = json.load(json_data)
Expand All @@ -57,8 +58,9 @@ def test_json_simple(self):
assert data_generated["clusters"] == data_solution["clusters"]

def test_json_detail_simple(self):
modules = modules_ugent()
json_path = generate_json_detailed(modules, ".")
modules = modules_ugent()[0]
json_data = generate_json_detailed_data(modules)
json_path = generate_json_detailed(json_data, ".")
assert os.path.exists("json_data_detail.json")

with open(json_path) as json_data:
Expand Down
4 changes: 2 additions & 2 deletions scripts/available_software/tests/test_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ def teardown_class(cls):
# ---------------------------

def test_table_generate_simple(self):
simple_data = get_unique_software_names(modules_ugent())
simple_data = get_unique_software_names(modules_ugent()[0])
table_data, col, row = generate_table_data(simple_data)
assert col == 3
assert row == 5
assert len(table_data) == 15

def test_md_simple(self):
md_file = MdUtils(file_name='test_simple', title='Overview Modules')
simple_data = get_unique_software_names(modules_ugent())
simple_data = get_unique_software_names(modules_ugent()[0])
generate_module_table(simple_data, md_file)
md_file.create_md_file()
assert os.path.exists("test_simple.md")
Expand Down
Loading