Feature: Improve Kernel Decorator (#69)

This pull request addresses issue #68 by changing the implantation of kernel decorate, so the function runs multiple times depending on the number of blocks and the number of threads for each block --------- Co-authored-by: EmilyBourne <[email protected]> Co-authored-by: bauom <[email protected]>
pyccel · Nov 8, 2024 · de362d3 · de362d3
1 parent f6ac853
commit de362d3
Show file tree

Hide file tree

Showing 8 changed files with 181 additions and 3 deletions.
diff --git a/docs/cuda.md b/docs/cuda.md
@@ -43,4 +43,22 @@ def my_kernel():
 my_kernel[1, 1]()
 
 ```
+## Cuda Device Methods
+The following methods are available for CUDA devices in Pyccel and can be called from either kernels or device functions. Currently, the only import syntax supported is:
+```python
+from pyccel import cuda
+```
+Using an alias for the import is not supported, so this is not allowed:
+
+```python
+from pyccel import cuda as py_cu
+```
+
+| Method | Description |
+|--------|-------------|
+
+
+
+
+
 
diff --git a/pyccel/cuda/cuda_thread_indexing.py b/pyccel/cuda/cuda_thread_indexing.py
@@ -0,0 +1,88 @@
+#------------------------------------------------------------------------------------------#
+# This file is part of Pyccel which is released under MIT License. See the LICENSE file or #
+# go to https://github.com/pyccel/pyccel/blob/master/LICENSE for full license details.     #
+#------------------------------------------------------------------------------------------#
+"""
+This module contains all the CUDA thread indexing methods
+"""
+class CudaThreadIndexing:
+    """
+    Class representing the CUDA thread indexing.
+
+    Class representing the CUDA thread indexing.
+
+    Parameters
+    ----------
+    block_idx : int
+        The index of the block in the x-dimension.
+
+    thread_idx : int
+        The index of the thread in the x-dimension.
+    """
+    def __init__(self, block_idx, thread_idx):
+        self._block_idx = block_idx
+        self._thread_idx = thread_idx
+
+    def threadIdx(self, dim):
+        """
+        Get the thread index.
+
+        Get the thread index.
+
+        Parameters
+        ----------
+        dim : int
+            The dimension of the indexing. It can be:
+            - 0 for the x-dimension
+            - 1 for the y-dimension
+            - 2 for the z-dimension
+
+        Returns
+        -------
+        int
+            The index of the thread in the specified dimension of its block.
+        """
+        return self._thread_idx
+
+    def blockIdx(self, dim):
+        """
+        Get the block index.
+
+        Get the block index.
+
+        Parameters
+        ----------
+        dim : int
+            The dimension of the indexing. It can be:
+            - 0 for the x-dimension
+            - 1 for the y-dimension
+            - 2 for the z-dimension
+
+        Returns
+        -------
+        int
+            The index of the block in the specified dimension.
+        """
+        return self._block_idx
+
+    def blockDim(self, dim):
+        """
+        Get the block dimension.
+
+        Get the block dimension.
+
+        Parameters
+        ----------
+        dim : int
+            The dimension of the indexing. It can be:
+            - 0 for the x-dimension
+            - 1 for the y-dimension
+            - 2 for the z-dimension
+
+        Returns
+        -------
+        int
+            The size of the block in the specified dimension.
+        """
+        return 0
+
diff --git a/pyccel/decorators.py b/pyccel/decorators.py
@@ -6,6 +6,7 @@
 """
 This module contains all the provided decorator methods.
 """
+from pyccel.cuda.cuda_thread_indexing import CudaThreadIndexing
 import warnings
 
 __all__ = (
@@ -139,7 +140,24 @@ class KernelAccessor:
         def __init__(self, f):
             self._f = f
         def __getitem__(self, args):
-            return self._f
+            num_blocks, num_threads = args
+            def internal_loop(*args, **kwargs):
+                """
+                The internal loop for kernel execution.
+
+                The internal loop for kernel execution.
+                """
+                for b in range(num_blocks):
+                    for t in range(num_threads):
+                        cu = CudaThreadIndexing(b, t)
+                        if 'cuda' in self._f.__globals__:
+                            self._f.__globals__['cuda'].threadIdx = cu.threadIdx
+                            self._f.__globals__['cuda'].blockIdx = cu.blockIdx
+                            self._f.__globals__['cuda'].blockDim = cu.blockDim
+                        else:
+                            self._f.__globals__['cuda'] = cu
+                        self._f(*args, **kwargs)
+            return internal_loop
 
     return KernelAccessor(f)
 

diff --git a/tests/pyccel/scripts/kernel/block_idx.py b/tests/pyccel/scripts/kernel/block_idx.py
@@ -0,0 +1,15 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel            import cuda
+
+@kernel
+def print_block():
+    print(cuda.blockIdx(0)) # pylint: disable=no-member
+
+def f():
+    print_block[5,5]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/scripts/kernel/device_test.py b/tests/pyccel/scripts/kernel/device_test.py
@@ -1,6 +1,6 @@
 # pylint: disable=missing-function-docstring, missing-module-docstring
 from pyccel.decorators import device, kernel
-from pyccel import cuda
+from pyccel            import cuda
 
 @device
 def device_call():

diff --git a/tests/pyccel/scripts/kernel/hello_kernel.py b/tests/pyccel/scripts/kernel/hello_kernel.py
@@ -1,6 +1,6 @@
 # pylint: disable=missing-function-docstring, missing-module-docstring
 from pyccel.decorators import kernel
-from pyccel import cuda
+from pyccel            import cuda
 
 @kernel
 def say_hello(its_morning : bool):

diff --git a/tests/pyccel/scripts/kernel/thread_idx.py b/tests/pyccel/scripts/kernel/thread_idx.py
@@ -0,0 +1,15 @@
+# pylint: disable=missing-function-docstring, missing-module-docstring
+from pyccel.decorators import kernel
+from pyccel            import cuda
+
+@kernel
+def print_block():
+    print(cuda.threadIdx(0)) # pylint: disable=no-member
+
+def f():
+    print_block[5,5]()
+    cuda.synchronize()
+
+if __name__ == '__main__':
+    f()
+
diff --git a/tests/pyccel/test_pyccel.py b/tests/pyccel/test_pyccel.py
@@ -730,6 +730,8 @@ def test_elemental(language):
     pyccel_test("scripts/decorators_elemental.py", language = language)
 
 #------------------------------------------------------------------------------
+
+
 @pytest.mark.cuda
 def test_hello_kernel(gpu_available):
     types = str
@@ -743,7 +745,29 @@ def test_kernel_collision(gpu_available):
             language="cuda", execute_code=gpu_available)
 
 #------------------------------------------------------------------------------
+def test_block_idx():
+    test_file = get_abs_path("scripts/kernel/block_idx.py")
+    cwd = get_abs_path(os.path.dirname(test_file))
+
+    pyth_out = get_python_output(test_file, cwd)
+
+    python_block_idx = list(map(int, pyth_out.split()))
+
+    for i in range(5):
+        assert python_block_idx.count(i) == 5
+#------------------------------------------------------------------------------
+def test_thread_idx():
+    test_file = get_abs_path("scripts/kernel/thread_idx.py")
+    cwd = get_abs_path(os.path.dirname(test_file))
+
+    pyth_out = get_python_output(test_file, cwd)
 
+    python_idx = list(map(int, pyth_out.split()))
+
+    for i in range(5):
+        assert python_idx.count(i) == 5
+
+#------------------------------------------------------------------------------
 @pytest.mark.cuda
 def test_device_call(gpu_available):
     types = str