easybuilders · VRehnberg · Sep 18, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 24, 2024
diff --git a/easybuild/easyconfigs/c/CUTLASS/CUTLASS-3.5.1-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/c/CUTLASS/CUTLASS-3.5.1-foss-2023a-CUDA-12.1.1.eb
@@ -0,0 +1,55 @@
+easyblock = 'CMakeMake'
+
+name = 'CUTLASS'
+version = '3.5.1'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = 'https://github.com/NVIDIA/cutlass'
+description = """CUTLASS is a collection of CUDA C++ template
+abstractions for implementing high-performance matrix-matrix
+multiplication (GEMM) and related computations at all levels and scales
+within CUDA. It incorporates strategies for hierarchical decomposition
+and data movement similar to those used to implement cuBLAS and cuDNN.
+CUTLASS decomposes these "moving parts" into reusable, modular software
+components abstracted by C++ template classes. Primitives for different
+levels of a conceptual parallelization hierarchy can be specialized and
+tuned via custom tiling sizes, data types, and other algorithmic policy.
+The resulting flexibility simplifies their use as building blocks within
+custom kernels and applications."""
+
+toolchain = {'name': 'foss', 'version': '2023a'}
+
+github_account = 'NVIDIA'
+source_urls = [GITHUB_LOWER_SOURCE]
+patches = ['CUTLASS-3.5.1_install_tools.patch']
+
+sources = ['v%(version)s.tar.gz']
+checksums = [
+    {'v%(version)s.tar.gz': '20b7247cda2d257cbf8ba59ba3ca40a9211c4da61a9c9913e32b33a2c5883a36'},
+    {'CUTLASS-3.5.1_install_tools.patch': '18fa5361b15848d98435b8b08bd921130718b963ca4ad47fa0db96fbe815e509'},
+]
+
+builddependencies = [
+    ('CMake', '3.26.3'),
+    ('Python', '3.11.3'),
+]
+
+dependencies = [
+    ('CUDA', '12.1.1', '', SYSTEM),
+    ('cuDNN', '8.9.2.26', versionsuffix, SYSTEM),
+]
+
+_copts = [
+    '-DCUTLASS_NVCC_ARCHS="%(cuda_cc_cmake)s"',
+    '-DCUTLASS_ENABLE_CUBLAS=1',
+    '-DCUTLASS_ENABLE_CUDNN=1',
+    '-DCUTLASS_ENABLE_TOOLS=1',
+]
+configopts = ' '.join(_copts)
+
+sanity_check_paths = {
+    'files': ['include/cutlass/cutlass.h', 'lib/libcutlass.%s' % SHLIB_EXT],
+    'dirs': ['lib/cmake', 'tools/util/include'],
+}
+
+moduleclass = 'lib'
diff --git a/easybuild/easyconfigs/c/CUTLASS/CUTLASS-3.5.1_install_tools.patch b/easybuild/easyconfigs/c/CUTLASS/CUTLASS-3.5.1_install_tools.patch
@@ -0,0 +1,33 @@
+From fd04f818d16431ee8979728d4725f63ab7f31a05 Mon Sep 17 00:00:00 2001
+From: Viktor Rehnberg <[email protected]>
+Date: Tue, 8 Oct 2024 08:24:23 +0000
+Subject: [PATCH] Optionally install tools/util/include
+
+DeepSpeed EvoformerAttn expects this file, see
+https://github.com/microsoft/DeepSpeed/blob/v0.14.5/op_builder/evoformer_attn.py#L76
+---
+ CMakeLists.txt | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 7419bdf5..1cee21ac 100755
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -689,6 +689,14 @@ install(
+   PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
+   )
+
++if(CUTLASS_ENABLE_TOOLS)
++  install(
++    DIRECTORY ${CUTLASS_TOOLS_UTIL_INCLUDE_DIR}/
++    DESTINATION ${CMAKE_INSTALL_PREFIX}/tools/util/include
++    )
++endif()
++
++
+ ################################################################################
+
+ # Doxygen is available. Generate documentation
+-- 
+2.39.3
+
diff --git a/easybuild/easyconfigs/d/DLPack/DLPack-0.8-GCC-12.3.0.eb b/easybuild/easyconfigs/d/DLPack/DLPack-0.8-GCC-12.3.0.eb
@@ -0,0 +1,26 @@
+easyblock = 'CMakeMake'
+
+name = 'DLPack'
+version = '0.8'
+
+homepage = 'https://dmlc.github.io/dlpack/latest/'
+description = """DLPack is a stable in-memory data structure for an ndarray
+system to interact with a variety of frameworks."""
+
+toolchain = {'name': 'GCC', 'version': '12.3.0'}
+
+github_account = 'dmlc'
+source_urls = [GITHUB_LOWER_SOURCE]
+sources = ['v%(version)s.tar.gz']
+checksums = ['cf965c26a5430ba4cc53d61963f288edddcd77443aa4c85ce722aaf1e2f29513']
+
+builddependencies = [
+    ('CMake', '3.26.3'),
+]
+
+sanity_check_paths = {
+    'files': ['include/dlpack/dlpack.h', 'lib/cmake/dlpack/dlpackConfig.cmake'],
+    'dirs': [],
+}
+
+moduleclass = 'lib'
diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.2_no-ninja-dep.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.2_no-ninja-dep.patch
@@ -0,0 +1,57 @@
+Patch away dependency on ninja python package by falling back to checking
+returncode of `ninja --version`.
+
+Author: Viktor Rehnberg (Chalmers University of Technology)
+
+
+diff --git a/deepspeed/env_report.py b/deepspeed/env_report.py
+index 85a2f9b2..8bb64626 100644
+--- a/deepspeed/env_report.py
++++ b/deepspeed/env_report.py
+@@ -62,7 +62,7 @@ def ninja_installed():
+     try:
+         import ninja  # noqa: F401 # type: ignore
+     except ImportError:
+-        return False
++        return (subprocess.run(["ninja", "--version"]).returncode == 0)
+     return True
+
+
+diff --git a/op_builder/builder.py b/op_builder/builder.py
+index 8dc825c7..970d18b2 100644
+--- a/op_builder/builder.py
++++ b/op_builder/builder.py
+@@ -487,7 +487,8 @@ class OpBuilder(ABC):
+         try:
+             import ninja  # noqa: F401 # type: ignore
+         except ImportError:
+-            raise RuntimeError(f"Unable to JIT load the {self.name} op due to ninja not being installed.")
++            if subprocess.run(["ninja", "--version"]).returncode != 0:
++                raise RuntimeError(f"Unable to JIT load the {self.name} op due to ninja not being installed.")
+
+         if isinstance(self, CUDAOpBuilder) and not self.is_rocm_pytorch():
+             self.build_for_cpu = not torch.cuda.is_available()
+diff --git a/op_builder/xpu/builder.py b/op_builder/xpu/builder.py
+index 81b15f19..cf0a1cc0 100644
+--- a/op_builder/xpu/builder.py
++++ b/op_builder/xpu/builder.py
+@@ -89,7 +89,8 @@ class SYCLOpBuilder(OpBuilder):
+         try:
+             import ninja  # noqa: F401
+         except ImportError:
+-            raise RuntimeError(f"Unable to JIT load the {self.name} op due to ninja not being installed.")
++            if subprocess.run(["ninja", "--version"]).returncode != 0:
++                raise RuntimeError(f"Unable to JIT load the {self.name} op due to ninja not being installed.")
+
+         self.jit_mode = True
+         from intel_extension_for_pytorch.xpu.cpp_extension import load
+diff --git a/requirements/requirements.txt b/requirements/requirements.txt
+index 80c9f9b3..eed77fa3 100755
+--- a/requirements/requirements.txt
++++ b/requirements/requirements.txt
+@@ -1,5 +1,4 @@
+ hjson
+-ninja
+ numpy
+ packaging>=20.0
+ psutil
diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5-foss-2023a-CUDA-12.1.1.eb b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5-foss-2023a-CUDA-12.1.1.eb
@@ -0,0 +1,92 @@
+easyblock = 'PythonBundle'
+
+name = 'DeepSpeed'
+version = '0.14.5'
+versionsuffix = '-CUDA-%(cudaver)s'
+
+homepage = "http://www.deepspeed.ai/"
+description = """
+DeepSpeed is a deep learning optimization library that makes distributed training easy, efficient, and effective.
+"""
+
+
+toolchain = {'name': 'foss', 'version': '2023a'}
+
+builddependencies = [
+    ('Ninja', '1.11.1'),
+]
+
+dependencies = [
+    ('Python', '3.11.3'),
+    ('CUDA', '12.1.1', '', SYSTEM),
+    ('NCCL', '2.18.3', versionsuffix),
+    ('CUTLASS', '3.5.1', versionsuffix),
+    ('PyTorch', '2.1.2', versionsuffix),
+    ('CuPy', '13.0.0', versionsuffix),
+    ('Triton', '2.1.0', versionsuffix),
+    ('accelerate', '0.33.0', versionsuffix),
+    ('PyTorch-bundle', '2.1.2', versionsuffix),  # torchvision dependency for mup
+    ('Seaborn', '0.13.2'),  # dependency for mup
+    ('DLPack', '0.8'),
+    ('py-cpuinfo', '9.0.0'),
+    ('pydantic', '2.5.3'),
+    ('tqdm', '4.66.1'),
+    ('libaio', '0.3.113'),  # for async_io (builddep only?)
+    ('Transformers', '4.39.3'),
+]
+
+use_pip = True
+
+github_account = 'microsoft'
+exts_list = [
+    ('hjson', '3.1.0', {
+        'checksums': ['55af475a27cf83a7969c808399d7bccdec8fb836a07ddbd574587593b9cdcf75'],
+    }),
+    ('nvidia-ml-py', '12.535.161', {
+        'checksums': ['2bcc31ff7a0ea291ed8d7fc39b149391a42c2fb1cb4256c935e692de488b4d17'],
+        'modulename': 'pynvml',
+    }),
+    ('mup', '1.0.0', {
+        'checksums': ['9639e3d19f90e754f985ed444542ed2f8a049f3c0488fcb6efe150f30922cf74'],
+    }),
+    (name, version, {
+        'ds_build_ops_to_skip': [
+            # DS_BUILD_<OPT>=0 http://www.deepspeed.ai/tutorials/advanced-install/#pre-install-deepspeed-ops
+            'SPARSE_ATTN',  # requires PyTorch<2.0
+            'FP_QUANTIZER',  # Untested triton version (2.1.0), only 2.3.0 and 2.3.1 are known to be compatible
+            'CUTLASS_OPS',  # requires dskernels
+            'RAGGED_DEVICE_OPS',  # requires dskernels
+        ],
+        'installopts': '--global-option="build_ext" --global-option="-j%(parallel)s"',
+        'patches': [
+            'DeepSpeed-0.14.2_no-ninja-dep.patch',
+            'DeepSpeed-0.14.5_pic-compile.patch',
+            'DeepSpeed-0.14.5_pdsh-env-vars.patch',
+            'DeepSpeed-0.14.5_use-eb-cutlass.patch',
+        ],
+        'runtest': (
+            'PATH="$PATH:$PWD/bin"'  # deepspeed cli used in a lot of tests
+            ' pytest tests/unit/'
+            ' -k "not TestTensorBoard'  # requires tensorboard
+            ' and not TestWandb'  # requires wandb
+            ' and not TestCometMonitor"'  # requires comet
+        ),
+        # Test suite not available on pypi
+        'source_urls': [GITHUB_SOURCE],
+        'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}],
+        'testinstall': True,
+        'checksums': [
+            {'DeepSpeed-0.14.5.tar.gz': '9f5622715cbd89c7382bfecf7fb188419ad3f2af7764dc6de35917abc6390cce'},
+            {'DeepSpeed-0.14.2_no-ninja-dep.patch': '03ab528096387e7f18d2a5a6f5fc20ed86d1ca8f63f0e65f266f4dda30e11776'},
+            {'DeepSpeed-0.14.5_pic-compile.patch': '7d250f6bf57d006cab01a8763803b026f0d9029634557746c2a759893ab279b3'},
+            {'DeepSpeed-0.14.5_pdsh-env-vars.patch':
+             '02f053d8de17e4e607b223e836658d8223cb26a3a7d8c9135e67b69aaa7f83a9'},
+            {'DeepSpeed-0.14.5_use-eb-cutlass.patch':
+             '43675f7c84fd0b0cea1050a4419020b377de414fc7f83d69b8010ab368964d8d'},
+        ],
+    }),
+]
+
+sanity_pip_check = True
+
+moduleclass = 'ai'
diff --git a/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pdsh-env-vars.patch b/easybuild/easyconfigs/d/DeepSpeed/DeepSpeed-0.14.5_pdsh-env-vars.patch
@@ -0,0 +1,36 @@
+From aba7406021d9ea81f7c99e5d0143ed6509acc9e9 Mon Sep 17 00:00:00 2001
+From: Viktor Rehnberg <[email protected]>
+Date: Wed, 25 Sep 2024 09:29:23 +0000
+Subject: [PATCH] Add software relevant environment variables
+
+The multinode runner launches processes with pdsh, if LD_LIBRARY_PATH is
+not included in these exports then the python .so file may not be found.
+Also including what seemed important and was added from loading DeepSpeed.
+(Couldn't add everything, then argumet list becomes too long).
+
+See
+ - https://github.com/easybuilders/easybuild-easyconfigs/pull/21438#issuecomment-2373540098
+for more details.
+---
+ deepspeed/launcher/runner.py | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/deepspeed/launcher/runner.py b/deepspeed/launcher/runner.py
+index 07d1713e..e9cd61b8 100755
+--- a/deepspeed/launcher/runner.py
++++ b/deepspeed/launcher/runner.py
+@@ -32,6 +32,11 @@ from deepspeed.accelerator import get_accelerator
+
+ DLTS_HOSTFILE = "/job/hostfile"
+ EXPORT_ENVS = ['MLFLOW', 'PYTHON', 'MV2', 'UCX']
++EXPORT_ENVS += [ # Extra based on what's added by module load DeepSpeed
++    'LD_LIBRARY_PATH', 'PATH', 'EB', 'TRITON', 'CUDA',  # important
++    'ACLOCAL', 'CMAKE', 'CPATH', 'LIBRARY_PATH', 'MPL', 'NCCL',
++    'PKG_CONFIG_PATH', 'XDG_DATA_DIRS',
++]
+ EXPORT_ENVS += NEBULA_EXPORT_ENVS
+ DEEPSPEED_ENVIRONMENT_NAME = os.getenv("DS_ENV_FILE", ".deepspeed_env")
+ DEEPSPEED_ENVIRONMENT_PATHS = [os.path.expanduser("~"), '.']
+-- 
+2.39.3
+