diff --git a/include/tvm/runtime/device_api.h b/include/tvm/runtime/device_api.h index b419212602c4..14b2b84b0d36 100644 --- a/include/tvm/runtime/device_api.h +++ b/include/tvm/runtime/device_api.h @@ -51,6 +51,7 @@ enum DeviceAttrKind : int { kDriverVersion = 12, kL2CacheSizeBytes = 13, kTotalGlobalMemory = 14, + kAvailableGlobalMemory = 15, }; #ifdef TVM_KALLOC_ALIGNMENT diff --git a/python/tvm/_ffi/runtime_ctypes.py b/python/tvm/_ffi/runtime_ctypes.py index 099cbe972a4a..f148e26f3fcb 100644 --- a/python/tvm/_ffi/runtime_ctypes.py +++ b/python/tvm/_ffi/runtime_ctypes.py @@ -539,11 +539,25 @@ def total_global_memory(self): Returns ------- total_global_memory : int or None - Return the global memory available on device in bytes. + Return the total size of global memory on device in bytes. Return None if the device does not support this feature. """ return self._GetDeviceAttr(self.device_type, self.device_id, 14) + @property + def available_global_memory(self): + """Return size of the available global memory. + + Supported devices include CUDA. + + Returns + ------- + available_global_memory : int or None + Return the amount of unallocated global memory on device in bytes. + Return None if the device does not support this feature. + """ + return self._GetDeviceAttr(self.device_type, self.device_id, 15) + def texture_spatial_limit(self): """Returns limits for textures by spatial dimensions diff --git a/src/runtime/cuda/cuda_device_api.cc b/src/runtime/cuda/cuda_device_api.cc index ae63f9a4b32f..66357a191541 100644 --- a/src/runtime/cuda/cuda_device_api.cc +++ b/src/runtime/cuda/cuda_device_api.cc @@ -121,6 +121,12 @@ class CUDADeviceAPI final : public DeviceAPI { *rv = total_global_memory; return; } + case kAvailableGlobalMemory: { + size_t free_mem, total_mem; + CUDA_CALL(cudaMemGetInfo(&free_mem, &total_mem)); + *rv = static_cast(free_mem); + return; + } } *rv = value; } diff --git a/src/runtime/opencl/opencl_device_api.cc b/src/runtime/opencl/opencl_device_api.cc index ab553052bbda..0057d0a10102 100644 --- a/src/runtime/opencl/opencl_device_api.cc +++ b/src/runtime/opencl/opencl_device_api.cc @@ -214,6 +214,12 @@ void OpenCLWorkspace::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = static_cast(total_global_memory); return; } + + case kAvailableGlobalMemory: + // Not currently implemented. Based on + // https://stackoverflow.com/a/3568223, may not be implementable + // at all through OpenCL API. + break; } } diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc index ffc8d5a80597..f3cc46f92723 100644 --- a/src/runtime/rocm/rocm_device_api.cc +++ b/src/runtime/rocm/rocm_device_api.cc @@ -136,6 +136,10 @@ class ROCMDeviceAPI final : public DeviceAPI { *rv = total_global_memory; return; } + + case kAvailableGlobalMemory: + // Not currently implemented. + break; } *rv = value; } diff --git a/src/runtime/vulkan/vulkan_device_api.cc b/src/runtime/vulkan/vulkan_device_api.cc index 4b337dd52455..483668a2a75f 100644 --- a/src/runtime/vulkan/vulkan_device_api.cc +++ b/src/runtime/vulkan/vulkan_device_api.cc @@ -168,6 +168,11 @@ void VulkanDeviceAPI::GetAttr(Device dev, DeviceAttrKind kind, TVMRetValue* rv) *rv = device(index).compute_memory_size; return; } + + case kAvailableGlobalMemory: + // Not currently implemented. Will only be implementable for + // devices that support the VK_EXT_memory_budget extension. + break; } } diff --git a/tests/python/all-platform-minimal-test/test_runtime_ndarray.py b/tests/python/all-platform-minimal-test/test_runtime_ndarray.py index 197a2f88e3fa..38a1f32a10c3 100644 --- a/tests/python/all-platform-minimal-test/test_runtime_ndarray.py +++ b/tests/python/all-platform-minimal-test/test_runtime_ndarray.py @@ -16,33 +16,63 @@ # under the License. """Basic runtime enablement test.""" -import tvm -from tvm import te +import math + +import pytest import numpy as np + +import tvm import tvm.testing +from tvm import te + +dtype = tvm.testing.parameter("uint8", "int8", "uint16", "int16", "uint32", "int32", "float32") + + +def test_nd_create(target, dev, dtype): + x = np.random.randint(0, 10, size=(3, 4)) + x = np.array(x, dtype=dtype) + y = tvm.nd.array(x, device=dev) + z = y.copyto(dev) + assert y.dtype == x.dtype + assert y.shape == x.shape + assert isinstance(y, tvm.nd.NDArray) + np.testing.assert_equal(x, y.numpy()) + np.testing.assert_equal(x, z.numpy()) + + # no need here, just to test usablity + dev.sync() + + +def test_memory_usage(target, dev, dtype): + available_memory_before = dev.available_global_memory + if available_memory_before is None: + pytest.skip(reason=f"Target '{target}' does not support queries of available memory") + + arr = tvm.nd.empty([1024, 1024], dtype=dtype, device=dev) + available_memory_after = dev.available_global_memory + + num_elements = math.prod(arr.shape) + element_nbytes = tvm.runtime.DataType(dtype).itemsize() + expected_memory_after = available_memory_before - num_elements * element_nbytes + + # Allocations may be padded out to provide alignment, to match a + # page boundary, due to additional device-side bookkeeping + # required by the TVM backend or the driver, etc. Therefore, the + # available memory may decrease by more than the requested amount. + assert available_memory_after <= expected_memory_after + # TVM's NDArray type is a reference-counted handle to the + # underlying reference. After the last reference to an NDArray is + # cleared, the backing allocation will be freed. + del arr -@tvm.testing.uses_gpu -def test_nd_create(): - for target, dev in tvm.testing.enabled_targets(): - for dtype in ["uint8", "int8", "uint16", "int16", "uint32", "int32", "float32"]: - x = np.random.randint(0, 10, size=(3, 4)) - x = np.array(x, dtype=dtype) - y = tvm.nd.array(x, device=dev) - z = y.copyto(dev) - assert y.dtype == x.dtype - assert y.shape == x.shape - assert isinstance(y, tvm.nd.NDArray) - np.testing.assert_equal(x, y.numpy()) - np.testing.assert_equal(x, z.numpy()) - # no need here, just to test usablity - dev.sync() + assert dev.available_global_memory == available_memory_before def test_fp16_conversion(): n = 100 - for (src, dst) in [("float32", "float16"), ("float16", "float32")]: + for src, dst in [("float32", "float16"), ("float16", "float32")]: A = te.placeholder((n,), dtype=src) B = te.compute((n,), lambda i: A[i].astype(dst)) @@ -66,6 +96,4 @@ def test_dtype(): if __name__ == "__main__": - test_nd_create() - test_fp16_conversion() - test_dtype() + tvm.testing.main()