Skip to content

Commit

Permalink
Merge branch 'nv-legate:branch-24.01' into _Enable_UCX_
Browse files Browse the repository at this point in the history
  • Loading branch information
mag1cp1n authored Nov 16, 2023
2 parents 98ec6c0 + 434966a commit 40587e5
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 22 deletions.
97 changes: 97 additions & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
name: Bug report
description: Submit a bug report
title: "[BUG] "
labels: TRIAGE
body:
- type: markdown
attributes:
value: "# Bug report"
- type: markdown
attributes:
value: Thank you for reporting a bug and helping us improve Legate!
- type: markdown
attributes:
value: >
Please fill out all of the required information.
- type: markdown
attributes:
value: |
---
## Environment information
- type: textarea
id: legate_issue
attributes:
label: Software versions
description: >-
Run `legate-issue` and paste the output here.
placeholder: |
Python : 3.10.11 | packaged by conda-forge | (main, May 10 2023, 18:58:44) [GCC 11.3.0]
Platform : Linux-5.14.0-1042-oem-x86_64-with-glibc2.31
Legion : v23.11.00.dev-16-g2499f878
Legate : 23.11.00.dev+17.gb7b50313
Cunumeric : (ImportError: cannot import name 'LogicalArray' from 'legate.core')
Numpy : 1.24.4
Scipy : 1.10.1
Numba : (not installed)
CTK package : cuda-version-11.8-h70ddcb2_2 (conda-forge)
GPU Driver : 515.65.01
GPU Devices :
GPU 0: Quadro RTX 8000
GPU 1: Quadro RTX 8000
validations:
required: true
- type: input
id: jupyter
attributes:
label: Jupyter notebook / Jupyter Lab version
description: >-
Please supply if the issue you are reporting is related to Jupyter
notebook or Jupyter Lab.
validations:
required: false
- type: markdown
attributes:
value: |
## Issue details
- type: textarea
id: expected-behavior
attributes:
label: Expected behavior
description: What did you expect to happen?
validations:
required: true
- type: textarea
id: observed-behavior
attributes:
label: Observed behavior
description: What did actually happen?
validations:
required: true
- type: markdown
attributes:
value: |
## Directions to reproduce
- type: textarea
id: example
attributes:
label: Example code or instructions
description: >
Please provide detailed instructions to reproduce the issue. Ideally this includes a
[Complete, minimal, self-contained example code](https://stackoverflow.com/help/minimal-reproducible-example)
given here or as a link to code in another repository.
render: Python
validations:
required: true
- type: markdown
attributes:
value: |
## Additional information
- type: textarea
id: traceback-console
attributes:
label: Stack traceback or browser console output
description: >
Add any error messages or logs that might be helpful in reproducing and
identifying the bug, for example a Python stack traceback.
validations:
required: false
4 changes: 2 additions & 2 deletions BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ Once you have this environment file, you can install the required packages by
creating a new conda environment:

```shell
conda env create -n legate -f <env-file>.yaml
mamba env create -n legate -f <env-file>.yaml
```

or by updating an existing environment:

```shell
conda env update -f <env-file>.yaml
mamba env update -f <env-file>.yaml
```

## Building through install.py
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -218,15 +218,15 @@ as though they are running on a single processor.
Legate Core is available [on conda](https://anaconda.org/legate/legate-core):

```
conda install -c nvidia -c conda-forge -c legate legate-core
mamba install -c nvidia -c conda-forge -c legate legate-core
```

Only linux-64 packages are available at the moment.

The default package contains GPU support, and is compatible with CUDA >= 12.0
(CUDA driver version >= r520), and Volta or later GPU architectures. There are
also CPU-only packages available, and will be automatically selected by `conda`
when installing on a machine without GPUs.
also CPU-only packages available, and will be automatically selected when
installing on a machine without GPUs.

See [BUILD.md](BUILD.md) for instructions on building Legate Core from source.

Expand Down
1 change: 1 addition & 0 deletions legate/core/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,7 @@ def project(self, dim: int, index: int) -> Store:
If ``dim`` is not a valid dimension name or ``index`` is
out of bounds
"""
index = int(index)
dim = dim + self.ndim if dim < 0 else dim
if dim < 0 or dim >= self.ndim:
raise ValueError(
Expand Down
4 changes: 0 additions & 4 deletions legate/driver/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from __future__ import annotations

import os
from pathlib import Path
from typing import TYPE_CHECKING

from .. import install_info
Expand Down Expand Up @@ -174,9 +173,6 @@ def _compute_env(self) -> tuple[EnvDict, set[str]]:
str(system.legion_paths.legion_jupyter_module)
)

# Make sure the base directory for this file is in the python path
extra_python_paths.append(str(Path(__file__).parents[1]))

env["PYTHONPATH"] = os.pathsep.join(extra_python_paths)

# If using NCCL prefer parallel launch mode over cooperative groups,
Expand Down
3 changes: 1 addition & 2 deletions legate/tester/stages/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from __future__ import annotations

import sys
from typing import Dict, Type

from .. import FeatureType
from .test_stage import TestStage
Expand All @@ -33,7 +32,7 @@
raise RuntimeError(f"unsupported platform: {sys.platform}")

#: All the available test stages that can be selected
STAGES: Dict[FeatureType, Type[TestStage]] = {
STAGES: dict[FeatureType, type[TestStage]] = {
"cpus": CPU,
"cuda": GPU,
"openmp": OMP,
Expand Down
9 changes: 8 additions & 1 deletion legate/tester/stages/_osx/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@
from typing import TYPE_CHECKING

from ..test_stage import TestStage
from ..util import UNPIN_ENV, Shard
from ..util import UNPIN_ENV

if TYPE_CHECKING:
from ....util.types import ArgList, EnvDict
from ... import FeatureType
from ...config import Config
from ...test_system import TestSystem
from ..util import Shard, StageSpec


class GPU(TestStage):
Expand Down Expand Up @@ -52,3 +53,9 @@ def env(self, config: Config, system: TestSystem) -> EnvDict:

def delay(self, shard: Shard, config: Config, system: TestSystem) -> None:
time.sleep(config.gpu_delay / 1000)

def shard_args(self, shard: Shard, config: Config) -> ArgList:
raise NotImplementedError()

def compute_spec(self, config: Config, system: TestSystem) -> StageSpec:
raise NotImplementedError()
6 changes: 3 additions & 3 deletions scripts/util/build-caching.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ if [[ -n "$(which sccache)" ]]; then
CMAKE_CUDA_COMPILER_LAUNCHER="${CMAKE_CUDA_COMPILER_LAUNCHER:-$(which sccache)}";
elif [[ -n "$(which ccache)" ]]; then
# Use ccache if installed
CMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER:-$(which cache)}";
CMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER:-$(which cache)}";
CMAKE_CUDA_COMPILER_LAUNCHER="${CMAKE_CUDA_COMPILER_LAUNCHER:-$(which cache)}";
CMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER:-$(which ccache)}";
CMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER:-$(which ccache)}";
CMAKE_CUDA_COMPILER_LAUNCHER="${CMAKE_CUDA_COMPILER_LAUNCHER:-$(which ccache)}";
fi

export CMAKE_C_COMPILER_LAUNCHER="$CMAKE_C_COMPILER_LAUNCHER"
Expand Down
35 changes: 28 additions & 7 deletions tests/unit/legate/driver/test_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,10 +1046,13 @@ def test_default_single_rank(self, genobjs: GenObjs) -> None:

assert result == ()

def test_utility_1_single_rank(self, genobjs: GenObjs) -> None:
def test_utility_1_single_rank_no_ucx(self, genobjs: GenObjs) -> None:
config, system, launcher = genobjs(["--utility", "1"])

networks_orig = list(install_info.networks)
install_info.networks = [x for x in networks_orig if x != "ucx"]
result = m.cmd_bgwork(config, system, launcher)
install_info.networks[:] = networks_orig[:]

assert result == ()

Expand All @@ -1064,12 +1067,15 @@ def test_utility_1_single_rank_and_ucx(self, genobjs: GenObjs) -> None:
assert result == ()

@pytest.mark.parametrize("value", ("2", "3", "10"))
def test_utiltity_n_single_rank(
def test_utiltity_n_single_rank_no_ucx(
self, genobjs: GenObjs, value: str
) -> None:
config, system, launcher = genobjs(["--utility", value])

networks_orig = list(install_info.networks)
install_info.networks = [x for x in networks_orig if x != "ucx"]
result = m.cmd_bgwork(config, system, launcher)
install_info.networks[:] = networks_orig[:]

assert result == ()

Expand All @@ -1088,14 +1094,17 @@ def test_utiltity_n_single_rank_and_ucx(

@pytest.mark.parametrize("rank_var", RANK_ENV_VARS)
@pytest.mark.parametrize("rank", ("0", "1", "2"))
def test_default_multi_rank(
def test_default_multi_rank_no_ucx(
self, genobjs: GenObjs, rank: str, rank_var: dict[str, str]
) -> None:
config, system, launcher = genobjs(
[], multi_rank=(2, 2), rank_env={rank_var: rank}
)

networks_orig = list(install_info.networks)
install_info.networks = [x for x in networks_orig if x != "ucx"]
result = m.cmd_bgwork(config, system, launcher)
install_info.networks[:] = networks_orig[:]

if "ucx" in install_info.networks:
assert result == ("-ll:bgwork", "2", "-ll:bgworkpin", "1")
Expand All @@ -1120,14 +1129,17 @@ def test_default_multi_rank_and_ucx(

@pytest.mark.parametrize("rank_var", RANK_ENV_VARS)
@pytest.mark.parametrize("rank", ("0", "1", "2"))
def test_utility_1_multi_rank_no_launcher(
def test_utility_1_multi_rank_no_launcher_no_ucx(
self, genobjs: GenObjs, rank: str, rank_var: dict[str, str]
) -> None:
config, system, launcher = genobjs(
["--utility", "1"], multi_rank=(2, 2), rank_env={rank_var: rank}
)

networks_orig = list(install_info.networks)
install_info.networks = [x for x in networks_orig if x != "ucx"]
result = m.cmd_bgwork(config, system, launcher)
install_info.networks[:] = networks_orig[:]

if "ucx" in install_info.networks:
assert result == ("-ll:bgwork", "2", "-ll:bgworkpin", "1")
Expand All @@ -1151,14 +1163,17 @@ def test_utility_1_multi_rank_no_launcher_and_ucx(
assert result == ("-ll:bgwork", "2", "-ll:bgworkpin", "1")

@pytest.mark.parametrize("launch", ("mpirun", "jsrun", "srun"))
def test_utility_1_multi_rank_with_launcher(
def test_utility_1_multi_rank_with_launcher_no_ucx(
self, genobjs: GenObjs, launch: str
) -> None:
config, system, launcher = genobjs(
["--utility", "1", "--launcher", launch], multi_rank=(2, 2)
)

networks_orig = list(install_info.networks)
install_info.networks = [x for x in networks_orig if x != "ucx"]
result = m.cmd_bgwork(config, system, launcher)
install_info.networks[:] = networks_orig[:]

if "ucx" in install_info.networks:
assert result == ("-ll:bgwork", "2", "-ll:bgworkpin", "1")
Expand All @@ -1183,14 +1198,17 @@ def test_utility_1_multi_rank_with_launcher_and_ucx(
@pytest.mark.parametrize("rank_var", RANK_ENV_VARS)
@pytest.mark.parametrize("rank", ("0", "1", "2"))
@pytest.mark.parametrize("value", ("2", "3", "10"))
def test_utility_n_multi_rank_no_launcher(
def test_utility_n_multi_rank_no_launcher_no_ucx(
self, genobjs: GenObjs, value: str, rank: str, rank_var: dict[str, str]
) -> None:
config, system, launcher = genobjs(
["--utility", value], multi_rank=(2, 2), rank_env={rank_var: rank}
)

networks_orig = list(install_info.networks)
install_info.networks = [x for x in networks_orig if x != "ucx"]
result = m.cmd_bgwork(config, system, launcher)
install_info.networks[:] = networks_orig[:]

if "ucx" in install_info.networks:
assert result == ("-ll:bgwork", value, "-ll:bgworkpin", "1")
Expand All @@ -1216,14 +1234,17 @@ def test_utility_n_multi_rank_no_launcher_and_ucx(

@pytest.mark.parametrize("launch", ("mpirun", "jsrun", "srun"))
@pytest.mark.parametrize("value", ("2", "3", "10"))
def test_utility_n_multi_rank_with_launcher(
def test_utility_n_multi_rank_with_launcher_no_ucx(
self, genobjs: GenObjs, value: str, launch: str
) -> None:
config, system, launcher = genobjs(
["--utility", value, "--launcher", launch], multi_rank=(2, 2)
)

networks_orig = list(install_info.networks)
install_info.networks = [x for x in networks_orig if x != "ucx"]
result = m.cmd_bgwork(config, system, launcher)
install_info.networks[:] = networks_orig[:]

if "ucx" in install_info.networks:
assert result == ("-ll:bgwork", value, "-ll:bgworkpin", "1")
Expand Down

0 comments on commit 40587e5

Please sign in to comment.