diff --git a/devito/data/allocators.py b/devito/data/allocators.py index d940a006bf..4ba29e141a 100644 --- a/devito/data/allocators.py +++ b/devito/data/allocators.py @@ -13,9 +13,10 @@ from devito.parameters import configuration from devito.tools import dtype_to_ctype + __all__ = ['ALLOC_FLAT', 'ALLOC_NUMA_LOCAL', 'ALLOC_NUMA_ANY', 'ALLOC_KNL_MCDRAM', 'ALLOC_KNL_DRAM', 'ALLOC_GUARD', - 'default_allocator'] + 'ALLOC_CUPY', 'default_allocator'] class MemoryAllocator(object): @@ -76,10 +77,19 @@ def alloc(self, shape, dtype): if c_pointer is None: raise RuntimeError("Unable to allocate %d elements in memory", str(size)) - # cast to 1D array of the specified size - ctype_1d = ctype * size - buf = ctypes.cast(c_pointer, ctypes.POINTER(ctype_1d)).contents - pointer = np.frombuffer(buf, dtype=dtype) + if c_pointer: + # cast to 1D array of the specified size + ctype_1d = ctype * size + buf = ctypes.cast(c_pointer, ctypes.POINTER(ctype_1d)).contents + pointer = np.frombuffer(buf, dtype=dtype) + + # During the execution in MPI, domain splitting can generate a situation where + # the allocated data size is zero, as we have observed with Sparse Functions. + # When this occurs, Cupy returns a pointer with a value of zero. This + # conditional statement was defined for this case. + else: + pointer = np.empty(shape=(0), dtype=dtype) + # pointer.reshape should not be used here because it may introduce a copy # From https://docs.scipy.org/doc/numpy/reference/generated/numpy.reshape.html: # It is not always possible to change the shape of an array without copying the @@ -317,6 +327,51 @@ def put_local(self): return self._node == 'local' +class CupyAllocator(MemoryAllocator): + + """ + Memory allocator based on Unified Memory concept. The allocation is made using Cupy. + """ + _mempool = None + + @classmethod + def initialize(cls): + + try: + import cupy as cp + cls.lib = cp + cls._initialize_shared_memory() + try: + from devito.mpi import MPI + cls.MPI = MPI + cls._set_device_for_mpi() + except ImportError: + cls.MPI = None + except ImportError: + cls.lib = None + + @classmethod + def _initialize_shared_memory(cls): + cls._mempool = cls.lib.cuda.MemoryPool(cls.lib.cuda.malloc_managed) + cls.lib.cuda.set_allocator(cls._mempool.malloc) + + @classmethod + def _set_device_for_mpi(cls): + if cls.MPI.Is_initialized(): + n_gpu = cls.lib.cuda.runtime.getDeviceCount() + rank_l = cls.MPI.COMM_WORLD.Split_type(cls.MPI.COMM_TYPE_SHARED).Get_rank() + cls.lib.cuda.runtime.setDevice(rank_l % n_gpu) + + def _alloc_C_libcall(self, size, ctype): + if not self.available(): + raise ImportError("Couldn't initialize cupy or MPI elements of alocation") + mem_obj = self.lib.zeros(size, dtype=ctype) + return mem_obj.data.ptr, (mem_obj,) + + def free(self, _): + self._mempool.free_all_blocks() + + class ExternalAllocator(MemoryAllocator): """ @@ -373,6 +428,7 @@ def alloc(self, shape, dtype): ALLOC_KNL_MCDRAM = NumaAllocator(1) ALLOC_NUMA_ANY = NumaAllocator('any') ALLOC_NUMA_LOCAL = NumaAllocator('local') +ALLOC_CUPY = CupyAllocator() custom_allocators = {} """User-defined allocators.""" diff --git a/tests/test_data.py b/tests/test_data.py index 0a03e7a4c6..712b36ffcb 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -2,12 +2,14 @@ import numpy as np from devito import (Grid, Function, TimeFunction, SparseTimeFunction, Dimension, # noqa - Eq, Operator, ALLOC_GUARD, ALLOC_FLAT, configuration, switchconfig) + Eq, Operator, ALLOC_GUARD, ALLOC_FLAT, ALLOC_CUPY, + configuration, switchconfig) from devito.data import LEFT, RIGHT, Decomposition, loc_data_idx, convert_index from devito.tools import as_tuple from devito.types import Scalar from devito.data.allocators import ExternalAllocator +from conftest import skipif class TestDataBasic(object): @@ -1473,6 +1475,55 @@ def test_gather_time_function(self): assert ans == np.array(None) +class TestAllocators(object): + + @skipif('nodevice') + def test_uma_allocation(self): + """ + Test Unified Memory allocation. + """ + import cupy as cp + + nt = 5 + grid = Grid(shape=(4, 4, 4)) + + u = Function(name='u', grid=grid, allocator=ALLOC_CUPY) + u.data[:] = 5 + address = u.data.ctypes.data + pointerAttr = cp.cuda.runtime.pointerGetAttributes(address) + assert pointerAttr.devicePointer == pointerAttr.hostPointer + + v = TimeFunction(name='v', grid=grid, save=nt, allocator=ALLOC_CUPY) + v.data[:] = 5 + address = v.data.ctypes.data + pointerAttr = cp.cuda.runtime.pointerGetAttributes(address) + assert pointerAttr.devicePointer == pointerAttr.hostPointer + + def test_external_allocator(self): + shape = (2, 2) + space_order = 0 + numpy_array = np.ones(shape, dtype=np.float32) + g = Grid(shape) + f = Function(name='f', space_order=space_order, grid=g, + allocator=ExternalAllocator(numpy_array), initializer=lambda x: None) + + # Ensure the two arrays have the same value + assert(np.array_equal(f.data, numpy_array)) + + # Ensure the original numpy array is unchanged + assert(np.array_equal(numpy_array, np.ones(shape, dtype=np.float32))) + + # Change the underlying numpy array + numpy_array[:] = 3. + # Ensure the function.data changes too + assert(np.array_equal(f.data, numpy_array)) + + # Change the function.data + f.data[:] = 4. + # Ensure the underlying numpy array changes too + assert(np.array_equal(f.data, numpy_array)) + + def test_scalar_arg_substitution(): """ Tests the relaxed (compared to other devito sympy subclasses) @@ -1519,31 +1570,6 @@ def test_numpy_c_contiguous(): assert(u._data_allocated.flags.c_contiguous) -def test_external_allocator(): - shape = (2, 2) - space_order = 0 - numpy_array = np.ones(shape, dtype=np.float32) - g = Grid(shape) - f = Function(name='f', space_order=space_order, grid=g, - allocator=ExternalAllocator(numpy_array), initializer=lambda x: None) - - # Ensure the two arrays have the same value - assert(np.array_equal(f.data, numpy_array)) - - # Ensure the original numpy array is unchanged - assert(np.array_equal(numpy_array, np.ones(shape, dtype=np.float32))) - - # Change the underlying numpy array - numpy_array[:] = 3. - # Ensure the function.data changes too - assert(np.array_equal(f.data, numpy_array)) - - # Change the function.data - f.data[:] = 4. - # Ensure the underlying numpy array changes too - assert(np.array_equal(f.data, numpy_array)) - - def test_boolean_masking_array(): """ Test truth value of array, raised in Python 3.9 (MFE for issue #1788)