From de362d368626cf67b3388e7443b8151fa1877c62 Mon Sep 17 00:00:00 2001 From: Said Mazouz <95222894+smazouz42@users.noreply.github.com> Date: Fri, 26 Jul 2024 17:42:56 +0100 Subject: [PATCH] Feature: Improve Kernel Decorator (#69) This pull request addresses issue #68 by changing the implantation of kernel decorate, so the function runs multiple times depending on the number of blocks and the number of threads for each block --------- Co-authored-by: EmilyBourne Co-authored-by: bauom <40796259+bauom@users.noreply.github.com> --- docs/cuda.md | 18 +++++ pyccel/cuda/cuda_thread_indexing.py | 88 +++++++++++++++++++++ pyccel/decorators.py | 20 ++++- tests/pyccel/scripts/kernel/block_idx.py | 15 ++++ tests/pyccel/scripts/kernel/device_test.py | 2 +- tests/pyccel/scripts/kernel/hello_kernel.py | 2 +- tests/pyccel/scripts/kernel/thread_idx.py | 15 ++++ tests/pyccel/test_pyccel.py | 24 ++++++ 8 files changed, 181 insertions(+), 3 deletions(-) create mode 100644 pyccel/cuda/cuda_thread_indexing.py create mode 100644 tests/pyccel/scripts/kernel/block_idx.py create mode 100644 tests/pyccel/scripts/kernel/thread_idx.py diff --git a/docs/cuda.md b/docs/cuda.md index 7643a4ac02..7fb9fbcab5 100644 --- a/docs/cuda.md +++ b/docs/cuda.md @@ -43,4 +43,22 @@ def my_kernel(): my_kernel[1, 1]() ``` +## Cuda Device Methods +The following methods are available for CUDA devices in Pyccel and can be called from either kernels or device functions. Currently, the only import syntax supported is: +```python +from pyccel import cuda +``` +Using an alias for the import is not supported, so this is not allowed: + +```python +from pyccel import cuda as py_cu +``` + +| Method | Description | +|--------|-------------| + + + + + diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py new file mode 100644 index 0000000000..7d8cce3fa5 --- /dev/null +++ b/pyccel/cuda/cuda_thread_indexing.py @@ -0,0 +1,88 @@ +#------------------------------------------------------------------------------------------# +# This file is part of Pyccel which is released under MIT License. See the LICENSE file or # +# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details. # +#------------------------------------------------------------------------------------------# +""" +This module contains all the CUDA thread indexing methods +""" +class CudaThreadIndexing: + """ + Class representing the CUDA thread indexing. + + Class representing the CUDA thread indexing. + + Parameters + ---------- + block_idx : int + The index of the block in the x-dimension. + + thread_idx : int + The index of the thread in the x-dimension. + """ + def __init__(self, block_idx, thread_idx): + self._block_idx = block_idx + self._thread_idx = thread_idx + + def threadIdx(self, dim): + """ + Get the thread index. + + Get the thread index. + + Parameters + ---------- + dim : int + The dimension of the indexing. It can be: + - 0 for the x-dimension + - 1 for the y-dimension + - 2 for the z-dimension + + Returns + ------- + int + The index of the thread in the specified dimension of its block. + """ + return self._thread_idx + + def blockIdx(self, dim): + """ + Get the block index. + + Get the block index. + + Parameters + ---------- + dim : int + The dimension of the indexing. It can be: + - 0 for the x-dimension + - 1 for the y-dimension + - 2 for the z-dimension + + Returns + ------- + int + The index of the block in the specified dimension. + """ + return self._block_idx + + def blockDim(self, dim): + """ + Get the block dimension. + + Get the block dimension. + + Parameters + ---------- + dim : int + The dimension of the indexing. It can be: + - 0 for the x-dimension + - 1 for the y-dimension + - 2 for the z-dimension + + Returns + ------- + int + The size of the block in the specified dimension. + """ + return 0 + diff --git a/pyccel/decorators.py b/pyccel/decorators.py index ff413fe443..1af59fcd18 100644 --- a/pyccel/decorators.py +++ b/pyccel/decorators.py @@ -6,6 +6,7 @@ """ This module contains all the provided decorator methods. """ +from pyccel.cuda.cuda_thread_indexing import CudaThreadIndexing import warnings __all__ = ( @@ -139,7 +140,24 @@ class KernelAccessor: def __init__(self, f): self._f = f def __getitem__(self, args): - return self._f + num_blocks, num_threads = args + def internal_loop(*args, **kwargs): + """ + The internal loop for kernel execution. + + The internal loop for kernel execution. + """ + for b in range(num_blocks): + for t in range(num_threads): + cu = CudaThreadIndexing(b, t) + if 'cuda' in self._f.__globals__: + self._f.__globals__['cuda'].threadIdx = cu.threadIdx + self._f.__globals__['cuda'].blockIdx = cu.blockIdx + self._f.__globals__['cuda'].blockDim = cu.blockDim + else: + self._f.__globals__['cuda'] = cu + self._f(*args, **kwargs) + return internal_loop return KernelAccessor(f) diff --git a/tests/pyccel/scripts/kernel/block_idx.py b/tests/pyccel/scripts/kernel/block_idx.py new file mode 100644 index 0000000000..f0240601b7 --- /dev/null +++ b/tests/pyccel/scripts/kernel/block_idx.py @@ -0,0 +1,15 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def print_block(): + print(cuda.blockIdx(0)) # pylint: disable=no-member + +def f(): + print_block[5,5]() + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py index a4762a6242..3796a4c2fd 100644 --- a/tests/pyccel/scripts/kernel/device_test.py +++ b/tests/pyccel/scripts/kernel/device_test.py @@ -1,6 +1,6 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel.decorators import device, kernel -from pyccel import cuda +from pyccel import cuda @device def device_call(): diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py index b6901b25a1..a11316d2a4 100644 --- a/tests/pyccel/scripts/kernel/hello_kernel.py +++ b/tests/pyccel/scripts/kernel/hello_kernel.py @@ -1,6 +1,6 @@ # pylint: disable=missing-function-docstring, missing-module-docstring from pyccel.decorators import kernel -from pyccel import cuda +from pyccel import cuda @kernel def say_hello(its_morning : bool): diff --git a/tests/pyccel/scripts/kernel/thread_idx.py b/tests/pyccel/scripts/kernel/thread_idx.py new file mode 100644 index 0000000000..0e3a505f70 --- /dev/null +++ b/tests/pyccel/scripts/kernel/thread_idx.py @@ -0,0 +1,15 @@ +# pylint: disable=missing-function-docstring, missing-module-docstring +from pyccel.decorators import kernel +from pyccel import cuda + +@kernel +def print_block(): + print(cuda.threadIdx(0)) # pylint: disable=no-member + +def f(): + print_block[5,5]() + cuda.synchronize() + +if __name__ == '__main__': + f() + diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py index 24243c6abf..d0c648403c 100644 --- a/tests/pyccel/test_pyccel.py +++ b/tests/pyccel/test_pyccel.py @@ -730,6 +730,8 @@ def test_elemental(language): pyccel_test("scripts/decorators_elemental.py", language = language) #------------------------------------------------------------------------------ + + @pytest.mark.cuda def test_hello_kernel(gpu_available): types = str @@ -743,7 +745,29 @@ def test_kernel_collision(gpu_available): language="cuda", execute_code=gpu_available) #------------------------------------------------------------------------------ +def test_block_idx(): + test_file = get_abs_path("scripts/kernel/block_idx.py") + cwd = get_abs_path(os.path.dirname(test_file)) + + pyth_out = get_python_output(test_file, cwd) + + python_block_idx = list(map(int, pyth_out.split())) + + for i in range(5): + assert python_block_idx.count(i) == 5 +#------------------------------------------------------------------------------ +def test_thread_idx(): + test_file = get_abs_path("scripts/kernel/thread_idx.py") + cwd = get_abs_path(os.path.dirname(test_file)) + + pyth_out = get_python_output(test_file, cwd) + python_idx = list(map(int, pyth_out.split())) + + for i in range(5): + assert python_idx.count(i) == 5 + +#------------------------------------------------------------------------------ @pytest.mark.cuda def test_device_call(gpu_available): types = str