Skip to content

Commit

Permalink
Add support cl_amd_copy_buffer_p2p (#697)
Browse files Browse the repository at this point in the history
* feat: support cl_amd_copy_buffer_p2p

* clean: shorter line

* feat: use optional for byte_count && handle failure

* feat: add test

* feat: add docs

* fix: flake8

* feat: copy function signature over

* feat: remove suffix?

* feat: change version added
  • Loading branch information
wozeparrot authored Aug 10, 2023
1 parent a92e445 commit b3a4f88
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 0 deletions.
19 changes: 19 additions & 0 deletions doc/runtime_memory.rst
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,25 @@ Transfers

.. autofunction:: enqueue_fill(queue, dest, src, **kwargs)

.. function:: enqueue_copy_buffer_p2p_amd(platform, queue, src, dest, size=None, wait_for=None)

AMD extension to perform a peer-to-peer copy between two buffers on two different devices.
The two devices must be in different contexts. The queue must be where the source
buffer is located.

:arg platform: a :class:`Platform` instance
:arg queue: a :class:`CommandQueue` instance
:arg src: a :class:`Buffer` instance
:arg dest: a :class:`Buffer` instance

:param size: the number of bytes to copy. If *None*, the minimum of the sizes of the two buffers is used.

|std-enqueue-blurb|

Only available on AMD platforms.

.. versionadded:: 2023.1.2

Mapping Memory into Host Address Space
--------------------------------------

Expand Down
1 change: 1 addition & 0 deletions pyopencl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@

try:
from pyopencl._cl import DeviceTopologyAmd # noqa: F401
from pyopencl._cl import enqueue_copy_buffer_p2p_amd # noqa: F401
except ImportError:
pass

Expand Down
15 changes: 15 additions & 0 deletions src/pyopencl_ext.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,21 @@ typedef union
} cl_device_topology_amd;
#endif

#ifndef CL_DEVICE_P2P_DEVICES_AMD
#define CL_DEVICE_P2P_DEVICES_AMD 0x4089

typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueCopyBufferP2PAMD_fn)(cl_command_queue /*command_queue*/,
cl_mem /*src_buffer*/,
cl_mem /*dst_buffer*/,
size_t /*src_offset*/,
size_t /*dst_offset*/,
size_t /*cb*/,
cl_uint /*num_events_in_wait_list*/,
const cl_event* /*event_wait_list*/,
cl_event* /*event*/);
#endif

/* {{{ these NV defines are often missing from the system headers */

#ifndef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
Expand Down
49 changes: 49 additions & 0 deletions src/wrap_cl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2574,6 +2574,55 @@ namespace pyopencl
PYOPENCL_RETURN_NEW_EVENT(evt);
}

#ifdef CL_DEVICE_P2P_DEVICES_AMD
inline
event *enqueue_copy_buffer_p2p_amd(
platform &plat,
command_queue &cq,
memory_object_holder &src,
memory_object_holder &dst,
py::object py_byte_count,
py::object py_wait_for)
{
PYOPENCL_PARSE_WAIT_FOR;

ptrdiff_t byte_count = 0;
if (py_byte_count.ptr() == Py_None)
{
size_t byte_count_src = 0;
size_t byte_count_dst = 0;
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(src.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_src, 0));
PYOPENCL_CALL_GUARDED(clGetMemObjectInfo,
(dst.data(), CL_MEM_SIZE, sizeof(byte_count), &byte_count_dst, 0));
byte_count = std::min(byte_count_src, byte_count_dst);
}
else
{
byte_count = py::cast<ptrdiff_t>(py_byte_count);
}

clEnqueueCopyBufferP2PAMD_fn fn = (clEnqueueCopyBufferP2PAMD_fn)clGetExtensionFunctionAddressForPlatform(plat.data(), "clEnqueueCopyBufferP2PAMD");
if (!fn)
throw pyopencl::error("clGetExtensionFunctionAddressForPlatform", CL_INVALID_VALUE,
"clEnqueueCopyBufferP2PAMD is not available");

cl_event evt;
PYOPENCL_RETRY_IF_MEM_ERROR(
PYOPENCL_CALL_GUARDED(fn, (
cq.data(),
src.data(), dst.data(),
0, 0,
byte_count,
PYOPENCL_WAITLIST_ARGS,
&evt
))
);

PYOPENCL_RETURN_NEW_EVENT(evt);
}
#endif

// }}}

// {{{ rectangular transfers
Expand Down
11 changes: 11 additions & 0 deletions src/wrap_cl_part_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,17 @@ void pyopencl_expose_part_1(py::module_ &m)
py::arg("wait_for").none(true)=py::none()
);

#ifdef CL_DEVICE_P2P_DEVICES_AMD
m.def("enqueue_copy_buffer_p2p_amd", enqueue_copy_buffer_p2p_amd,
py::arg("platform"),
py::arg("queue"),
py::arg("src"),
py::arg("dst"),
py::arg("byte_count").none(true)=py::none(),
py::arg("wait_for").none(true)=py::none()
);
#endif

// }}}

// {{{ rectangular
Expand Down
49 changes: 49 additions & 0 deletions test/test_enqueue_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,55 @@ def test_enqueue_copy_rect_3d(ctx_factory, honor_skip=True):
assert np.array_equal(h_ary_in[ary_in_slice], h_ary_out[ary_out_slice])


def test_enqueue_copy_buffer_p2p_amd(honor_skip=True):
platform = cl.get_platforms()[0]
if honor_skip and platform.vendor != "Advanced Micro Devices, Inc.":
pytest.skip("AMD-specific test")

devices = platform.get_devices()
if len(devices) < 2:
pytest.skip("Need at least two devices")

ctx1 = cl.Context([devices[0]])
ctx2 = cl.Context([devices[1]])

queue1 = cl.CommandQueue(ctx1)
queue2 = cl.CommandQueue(ctx2)

ary_shp = 256, 128, 32 # array shape

# Create host array of random values.
rng = np.random.default_rng(seed=42)
h_ary = rng.integers(0, 256, ary_shp, dtype=np.uint8)

# Create device buffers
d_buf1 = cl.Buffer(ctx1, cl.mem_flags.READ_WRITE, size=np.prod(ary_shp))
d_buf2 = cl.Buffer(ctx2, cl.mem_flags.READ_WRITE, size=np.prod(ary_shp))

# Copy array from host to device
cl.enqueue_copy(queue1, d_buf1, h_ary)

# Copy array from device to device
cl.enqueue_copy_buffer_p2p_amd(
platform,
queue1,
d_buf1,
d_buf2,
np.prod(ary_shp)
)
queue1.finish()

# Create zero-initialised array to receive array from device
h_ary_out = np.zeros(ary_shp, dtype=h_ary.dtype)

# Copy array from device to host
cl.enqueue_copy(queue2, h_ary_out, d_buf2)
queue2.finish()

# Check that the arrays are the same
assert np.array_equal(h_ary, h_ary_out)


if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
Expand Down

0 comments on commit b3a4f88

Please sign in to comment.