hpcugent · laraPPr · Mar 6, 2024 · Mar 6, 2024 · Mar 6, 2024 · Mar 6, 2024
diff --git a/scripts/available_software/available_software.py b/scripts/available_software/available_software.py
@@ -59,19 +59,29 @@ def main():
     )
     path_data_dir = os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/data")
 
-    # Generate the JSON overviews and detail markdown pages.
+    # Generate the JSON overviews
     if args.eessi:
-        modules = modules_eesi()
+        modules = modules_eessi()
     else:
-        modules = modules_ugent()
+        modules, paths = modules_ugent()
 
+    print(paths)
     print(modules)
     print("Generate JSON overview... ", end="", flush=True)
     generate_json_overview(modules, path_data_dir)
     print("Done!")
+
+    # Generate the JSON detail
+    json_data = generate_json_detailed_data(modules)
+    if args.eessi:
+        json_data = json_data
+    else:
+        json_data = get_extra_info_ugent(json_data, paths)
     print("Generate JSON detailed... ", end="", flush=True)
-    json_path = generate_json_detailed(modules, path_data_dir)
+    json_path = generate_json_detailed(json_data, path_data_dir)
     print("Done!")
+
+    # Generate detail markdown pages
     print("Generate detailed pages... ", end="", flush=True)
     generate_detail_pages(json_path, os.path.join(root_dir, "mkdocs/docs/HPC/only/gent/available_software/detail"))
     print("Done!")
@@ -168,6 +178,35 @@ def module_whatis(name: str) -> dict:
     return whatis
 
 
+def module_info(info: str) -> dict:
+    """
+    Function to parse through lua file.
+
+    @param info: String with the contents of the lua file.
+    """
+    whatis = {}
+    data = np.array(info.split("\n"))
+    # index of start description to handle multi lined description
+    i = np.flatnonzero(np.char.startswith(data, "whatis([==[Description"))[0]
+    if np.char.endswith(data[i], "]==])"):
+        content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=data[i]).strip('"')
+    else:
+        description = re.sub(pattern=r'whatis\(\[==\[(.*)', repl='\\1', string=data[i]).strip('"')
+        while not np.char.endswith(data[i], "]==])"):
+            i += 1
+            description += data[i]
+        content = re.sub(pattern=r'(.*)\]==\]\)', repl='\\1', string=description).strip('"')
+    key, value = tuple(content.split(":", maxsplit=1))
+    whatis[key.strip()] = value.strip()
+
+    for line in data[np.char.startswith(data, "whatis")]:
+        if not np.char.startswith(line, "whatis([==[Description"):
+            content = re.sub(pattern=r'whatis\(\[==\[(.*)\]==\]\)', repl='\\1', string=line).strip('"')
+            key, value = tuple(content.split(":", maxsplit=1))
+            whatis[key.strip()] = value.strip()
+    return whatis
+
+
 # --------------------------------------------------------------------------------------------------------
 # Fetch data EESSI
 # --------------------------------------------------------------------------------------------------------
@@ -199,7 +238,7 @@ def clusters_eessi() -> np.ndarray:
     return clusters
 
 
-def modules_eesi() -> dict:
+def modules_eessi() -> dict:
     """
     Returns names of all software module that are installed on EESSI.
     They are grouped by cluster.
@@ -250,6 +289,15 @@ def filter_fn_gent_modules(data: np.ndarray) -> np.ndarray:
                 ]
 
 
+def filter_fn_gent_software_path(data: np.ndarray) -> np.ndarray:
+    """
+    Filter function for the software path of the cluster
+    @param data: Output
+    @return: Filtered output
+    """
+    return data[np.char.endswith(data, "/modules/all:")]
+
+
 def clusters_ugent() -> np.ndarray:
     """
     Returns all the cluster names of the HPC at UGent.
@@ -259,6 +307,45 @@ def clusters_ugent() -> np.ndarray:
     return module_avail(name="cluster/", filter_fn=filter_fn_gent_cluster)
 
 
+def get_extra_info_ugent(json_data, paths) -> dict:
+    """
+    add a list of extentions to all modules with extensions
+    @return: Dictionary with all the modules and their site_packages
+    """
+    modules = json_data['software']
+    for software in modules:
+        for mod in modules[software]['versions']:
+            cluster = modules[software]['versions'][mod]['clusters'][0]
+            if software == "Java":
+                # Java has a strange naming sceme which causes probplems
+                continue
+            if mod in ["imkl/2020.4.304-NVHPC-21.2"]:
+                base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/"
+            elif mod in ['OpenFold/1.0.1-foss-2022a-CUDA-11.7.0',
+                         'OpenMM/7.7.0-foss-2022a-CUDA-11.7.0',
+                         'PyTorch-Lightning/1.7.7-foss-2022a-CUDA-11.7.0',
+                         'PyTorch/1.12.1-foss-2022a-CUDA-11.7.0',
+                         'Triton/1.1.1-foss-2022a-CUDA-11.7.0']:
+                base_path = "/apps/gent/RHEL8/cascadelake-ampere-ib/modules/all/"
+            elif cluster == "donphan":
+                base_path = "/apps/gent/RHEL8/cascadelake-ib/modules/all/"
+            elif cluster == "joltik":
+                base_path = "/apps/gent/RHEL8/cascadelake-volta-ib/modules/all/"
+            else:
+                base_path = paths[cluster][0][:-1] + "/"
+            path = base_path + mod + ".lua"
+            file = open(path, "r")
+            info = file.read()
+            if info != "":
+                whatis = module_info(info)
+                json_data['software'][software]['description'] = whatis['Description']
+                if "Homepage" in whatis.keys():
+                    json_data['software'][software]['homepage'] = whatis['Homepage']
+                if "Extensions" in whatis.keys():
+                    json_data["software"][software]["versions"][mod]["extensions"] = whatis['Extensions']
+    return json_data
+
+
 def modules_ugent() -> dict:
     """
     Returns names of all software module that are installed on the HPC on UGent.
@@ -267,15 +354,17 @@ def modules_ugent() -> dict:
     """
     print("Start collecting modules:")
     data = {}
+    mapping = {}
     for cluster in clusters_ugent():
         print(f"\t Collecting available modules for {cluster}... ", end="", flush=True)
         module_swap(cluster)
         cluster_name = cluster.split("/", maxsplit=1)[1]
+        mapping[cluster_name] = module_avail(filter_fn=filter_fn_gent_software_path)
         data[cluster_name] = module_avail(filter_fn=filter_fn_gent_modules)
         print(f"found {len(data[cluster_name])} modules!")
 
     print("All data collected!\n")
-    return data
+    return data, mapping
 
 
 # --------------------------------------------------------------------------------------------------------
@@ -343,7 +432,7 @@ def generate_software_table_data(software_data: dict, clusters: list) -> list:
         row = [module_name]
 
         for cluster in clusters:
-            row += ("x" if cluster in available else "-")
+            row += ("x" if cluster in available["clusters"] else "-")
         table_data += row
 
     return table_data
@@ -370,6 +459,13 @@ def generate_software_detail_page(
 
     filename = f"{path}/{software_name}.md"
     md_file = MdUtils(file_name=filename, title=f"{software_name}")
+    if 'description' in software_data.keys():
+        description = software_data['description']
+        md_file.new_paragraph(f"{description}")
+    if 'homepage' in software_data.keys():
+        homepage = software_data['homepage']
+        md_file.new_paragraph(f"{homepage}")
+
     md_file.new_header(level=1, title="Available modules")
 
     md_file.new_paragraph(f"The overview below shows which {software_name} installations are available per HPC-UGent "
@@ -386,6 +482,13 @@ def generate_software_detail_page(
         text=generate_software_table_data(sorted_versions, clusters)
     )
 
+    for version, details in list(sorted_versions.items())[::-1]:
+        if 'extensions' in details:
+            md_file.new_paragraph(f"### {version}")
+            md_file.new_paragraph("This is a list of extensions included in the module:")
+            packages = details['extensions']
+            md_file.new_paragraph(f"{packages}")
+
     md_file.create_md_file()
 
     # Remove the TOC
@@ -570,28 +673,27 @@ def generate_json_detailed_data(modules: dict) -> dict:
 
                 # If the version is not yet present, add it.
                 if mod not in json_data["software"][software]["versions"]:
-                    json_data["software"][software]["versions"][mod] = []
+                    json_data["software"][software]["versions"][mod] = {'clusters': []}
 
                 # If the cluster is not yet present, add it.
                 if cluster not in json_data["software"][software]["clusters"]:
                     json_data["software"][software]["clusters"].append(cluster)
 
                 # If the cluster is not yet present, add it.
-                if cluster not in json_data["software"][software]["versions"][mod]:
-                    json_data["software"][software]["versions"][mod].append(cluster)
+                if cluster not in json_data["software"][software]["versions"][mod]["clusters"]:
+                    json_data["software"][software]["versions"][mod]["clusters"].append(cluster)
 
     return json_data
 
 
-def generate_json_detailed(modules: dict, path_data_dir: str) -> str:
+def generate_json_detailed(json_data: dict, path_data_dir: str) -> str:
     """
     Generate the detailed JSON.
 
     @param modules: Dictionary with all the modules per cluster. Keys are the cluster names.
     @param path_data_dir: Path to the directory where the JSON will be placed.
     @return: Absolute path to the json file.
     """
-    json_data = generate_json_detailed_data(modules)
     filepath = os.path.join(path_data_dir, "json_data_detail.json")
     with open(filepath, 'w') as outfile:
         json.dump(json_data, outfile)

diff --git a/scripts/available_software/test.sh b/scripts/available_software/test.sh
@@ -1,3 +1,3 @@
 #!/bin/bash
 
-PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s
+PYTHONPATH=$PWD:$PYTHONPATH pytest -v -s
diff --git a/scripts/available_software/tests/data/test_json_simple_sol_detail.json b/scripts/available_software/tests/data/test_json_simple_sol_detail.json
@@ -1 +1 @@
-{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": ["dialga", "pikachu"], "cfd/2.0": ["dialga", "pikachu"], "cfd/24": ["dialga", "pikachu"], "cfd/5.0": ["dialga", "pikachu"], "cfd/2.0afqsdf": ["dialga", "pikachu"], "cfd/3.0": ["pikachu"]}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": ["dialga"], "Markov/hidden-1.0.10": ["dialga"]}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": ["dialga", "pikachu"], "science/7.2.0": ["dialga", "pikachu"]}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": ["pikachu"]}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"}
+{"clusters": ["dialga", "pikachu"], "software": {"cfd": {"clusters": ["dialga", "pikachu"], "versions": {"cfd/1.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0": {"clusters": ["dialga", "pikachu"]}, "cfd/24": {"clusters": ["dialga", "pikachu"]}, "cfd/5.0": {"clusters": ["dialga", "pikachu"]}, "cfd/2.0afqsdf": {"clusters": ["dialga", "pikachu"]}, "cfd/3.0": {"clusters": ["pikachu"]}}}, "Markov": {"clusters": ["dialga"], "versions": {"Markov/hidden-1.0.5": {"clusters": ["dialga"]}, "Markov/hidden-1.0.10": {"clusters": ["dialga"]}}}, "science": {"clusters": ["dialga", "pikachu"], "versions": {"science/5.3.0": {"clusters": ["dialga", "pikachu"]}, "science/7.2.0": {"clusters": ["dialga", "pikachu"]}}}, "llm": {"clusters": ["pikachu"], "versions": {"llm/20230627": {"clusters": ["pikachu"]}}}}, "time_generated": "Thu, 31 Aug 2023 at 14:00:22 CEST"}
diff --git a/scripts/available_software/tests/test_data.py b/scripts/available_software/tests/test_data.py
@@ -20,7 +20,7 @@ def setup_class(cls):
     # ---------------------------
 
     def test_data_ugent(self):
-        sol = modules_ugent()
+        sol = modules_ugent()[0]
         assert len(sol) == 2
         assert len(sol["dialga"]) == 13
         assert len(sol["pikachu"]) == 15

diff --git a/scripts/available_software/tests/test_json.py b/scripts/available_software/tests/test_json.py
@@ -1,7 +1,8 @@
 from available_software import (generate_json_overview_data,
                                 generate_json_overview,
                                 modules_ugent,
-                                generate_json_detailed)
+                                generate_json_detailed,
+                                generate_json_detailed_data)
 import os
 import json
 
@@ -32,7 +33,7 @@ def teardown_class(cls):
     # ---------------------------
 
     def test_json_generate_simple(self):
-        modules = modules_ugent()
+        modules = modules_ugent()[0]
         json_data = generate_json_overview_data(modules)
         assert len(json_data.keys()) == 3
         assert list(json_data["clusters"]) == ["dialga", "pikachu"]
@@ -44,7 +45,7 @@ def test_json_generate_simple(self):
             }
 
     def test_json_simple(self):
-        modules = modules_ugent()
+        modules = modules_ugent()[0]
         json_path = generate_json_overview(modules, ".")
         with open(json_path) as json_data:
             data_generated = json.load(json_data)
@@ -57,8 +58,9 @@ def test_json_simple(self):
         assert data_generated["clusters"] == data_solution["clusters"]
 
     def test_json_detail_simple(self):
-        modules = modules_ugent()
-        json_path = generate_json_detailed(modules, ".")
+        modules = modules_ugent()[0]
+        json_data = generate_json_detailed_data(modules)
+        json_path = generate_json_detailed(json_data, ".")
         assert os.path.exists("json_data_detail.json")
 
         with open(json_path) as json_data:

diff --git a/scripts/available_software/tests/test_md.py b/scripts/available_software/tests/test_md.py
@@ -28,15 +28,15 @@ def teardown_class(cls):
     # ---------------------------
 
     def test_table_generate_simple(self):
-        simple_data = get_unique_software_names(modules_ugent())
+        simple_data = get_unique_software_names(modules_ugent()[0])
         table_data, col, row = generate_table_data(simple_data)
         assert col == 3
         assert row == 5
         assert len(table_data) == 15
 
     def test_md_simple(self):
         md_file = MdUtils(file_name='test_simple', title='Overview Modules')
-        simple_data = get_unique_software_names(modules_ugent())
+        simple_data = get_unique_software_names(modules_ugent()[0])
         generate_module_table(simple_data, md_file)
         md_file.create_md_file()
         assert os.path.exists("test_simple.md")