diff --git a/.gitmodules b/.gitmodules index 5ca5d423..01f38c60 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "bpl-subset"] - path = bpl-subset - url = https://github.com/inducer/bpl-subset [submodule "pycuda/compyte"] path = pycuda/compyte url = https://github.com/inducer/compyte diff --git a/bpl-subset b/bpl-subset deleted file mode 160000 index 3702fb11..00000000 --- a/bpl-subset +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 3702fb119804dddde5eaf4a254822b891a947104 diff --git a/pyproject.toml b/pyproject.toml index 2bc8218a..81576809 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,6 +5,7 @@ # implementing that C_API_VERSION. requires = [ "setuptools", + "pybind11>=2.5.0", "wheel", "oldest-supported-numpy", ] diff --git a/setup.py b/setup.py index 9adccd87..ba6adae7 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from __future__ import absolute_import, print_function from os.path import dirname, join, normpath @@ -28,10 +27,8 @@ def get_config_schema(): IncludeDir, LibraryDir, Libraries, - BoostLibraries, Switch, StringListOption, - make_boost_base_options, ) nvcc_path = search_on_path(["nvcc", "nvcc.exe"]) @@ -76,11 +73,7 @@ def get_config_schema(): default_lib_dirs.append("/usr/local/cuda/lib") return ConfigSchema( - make_boost_base_options() - + [ - Switch("USE_SHIPPED_BOOST", True, "Use included Boost library"), - BoostLibraries("python"), - BoostLibraries("thread"), + [ Switch("CUDA_TRACE", False, "Enable CUDA API tracing"), Option( "CUDA_ROOT", default=cuda_root_default, help="Path to the CUDA toolkit" @@ -118,27 +111,26 @@ def main(): get_config, setup, ExtensionUsingNumpy, - set_up_shipped_boost_if_requested, check_git_submodules, - NumpyBuildExtCommand, + check_pybind11, + get_pybind_include, + PybindBuildExtCommand, ) + check_pybind11() check_git_submodules() hack_distutils() conf = get_config(get_config_schema()) - EXTRA_SOURCES, EXTRA_DEFINES = set_up_shipped_boost_if_requested("pycuda", conf) + EXTRA_SOURCES = [] + EXTRA_DEFINES = {} EXTRA_DEFINES["PYGPU_PACKAGE"] = "pycuda" EXTRA_DEFINES["PYGPU_PYCUDA"] = "1" - LIBRARY_DIRS = conf["BOOST_LIB_DIR"] + conf["CUDADRV_LIB_DIR"] - LIBRARIES = ( - conf["BOOST_PYTHON_LIBNAME"] - + conf["BOOST_THREAD_LIBNAME"] - + conf["CUDADRV_LIBNAME"] - ) + LIBRARY_DIRS = conf["CUDADRV_LIB_DIR"] + LIBRARIES = conf["CUDADRV_LIBNAME"] if not conf["CUDA_INC_DIR"] and conf["CUDA_ROOT"]: conf["CUDA_INC_DIR"] = [join(conf["CUDA_ROOT"], "include")] @@ -149,7 +141,7 @@ def main(): if conf["CUDA_PRETEND_VERSION"]: EXTRA_DEFINES["CUDAPP_PRETEND_CUDA_VERSION"] = conf["CUDA_PRETEND_VERSION"] - INCLUDE_DIRS = ["src/cpp"] + conf["BOOST_INC_DIR"] + INCLUDE_DIRS = ["src/cpp", get_pybind_include()] if conf["CUDA_INC_DIR"]: INCLUDE_DIRS += conf["CUDA_INC_DIR"] @@ -186,11 +178,6 @@ def main(): import sys - if sys.version_info >= (3,): - pvt_struct_source = "src/wrapper/_pvt_struct_v3.cpp" - else: - pvt_struct_source = "src/wrapper/_pvt_struct_v2.cpp" - setup( name="pycuda", # metadata @@ -254,12 +241,12 @@ def main(): ), ExtensionUsingNumpy( "_pvt_struct", - [pvt_struct_source], + ["src/wrapper/_pvt_struct_v3.cpp"], extra_compile_args=conf["CXXFLAGS"], extra_link_args=conf["LDFLAGS"], ), ], - cmdclass={"build_ext": NumpyBuildExtCommand}, + cmdclass={"build_ext": PybindBuildExtCommand}, include_package_data=True, package_data={ "pycuda": [ diff --git a/src/cpp/bitlog.cpp b/src/cpp/bitlog.cpp index a09a1168..6f011c6f 100644 --- a/src/cpp/bitlog.cpp +++ b/src/cpp/bitlog.cpp @@ -1,9 +1,33 @@ -#include +// Base-2 logarithm bithack +// +// Copyright (C) 2009 Andreas Kloeckner +// Copyright (C) Sean Eron Anderson (in the public domain) +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +#include "bitlog.hpp" -/* from http://graphics.stanford.edu/~seander/bithacks.html */ const char pycuda::log_table_8[] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, diff --git a/src/cpp/bitlog.hpp b/src/cpp/bitlog.hpp index 9343051b..1e95917c 100644 --- a/src/cpp/bitlog.hpp +++ b/src/cpp/bitlog.hpp @@ -1,47 +1,79 @@ // Base-2 logarithm bithack. - - - - -#ifndef _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_BITLOG_HPP -#define _AFJDFJSDFSD_PYCUDA_HEADER_SEEN_BITLOG_HPP - - +// +// Copyright (C) 2009 Andreas Kloeckner +// Copyright (C) Sean Eron Anderson (in the public domain) +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + + +#ifndef _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP +#define _AFJDFJSDFSD_PYOPENCL_HEADER_SEEN_BITLOG_HPP #include -#include +#include -namespace pycuda +namespace pycuda { + /* from http://graphics.stanford.edu/~seander/bithacks.html */ + extern const char log_table_8[]; - inline unsigned bitlog2_16(boost::uint16_t v) + inline unsigned bitlog2_16(uint16_t v) { if (unsigned long t = v >> 8) return 8+log_table_8[t]; - else + else return log_table_8[v]; } - inline unsigned bitlog2_32(boost::uint32_t v) + inline unsigned bitlog2_32(uint32_t v) { - if (boost::uint16_t t = v >> 16) + if (uint16_t t = v >> 16) return 16+bitlog2_16(t); - else - return bitlog2_16(boost::uint16_t(v)); + else + return bitlog2_16(v); } - inline unsigned bitlog2(size_t v) +#if defined(UINT64_MAX) + inline unsigned bitlog2(uint64_t v) + { + if (uint32_t t = v >> 32) + return 32+bitlog2_32(t); + else + return bitlog2_32(v); + } +#else + inline unsigned bitlog2(unsigned long v) { -#if (ULONG_MAX != 4294967295) || defined(_WIN64) - if (boost::uint32_t t = v >> 32) +#if (ULONG_MAX != 4294967295) + if (uint32_t t = v >> 32) return 32+bitlog2_32(t); - else + else +#endif + return bitlog2_32(v); + } #endif - return bitlog2_32(unsigned(v)); - } } diff --git a/src/cpp/cuda.cpp b/src/cpp/cuda.cpp index 00c7072c..e8c404d0 100644 --- a/src/cpp/cuda.cpp +++ b/src/cpp/cuda.cpp @@ -3,4 +3,4 @@ #include "cuda.hpp" -boost::thread_specific_ptr pycuda::context_stack_ptr; +std::thread_specific_ptr pycuda::context_stack_ptr; diff --git a/src/cpp/cuda.hpp b/src/cpp/cuda.hpp index 18079ab4..d9e2ee37 100644 --- a/src/cpp/cuda.hpp +++ b/src/cpp/cuda.hpp @@ -20,29 +20,22 @@ #endif #if CUDAPP_CUDA_VERSION >= 4000 -#include +// FIXME reenable +//#include #endif #ifndef _MSC_VER #include #endif #include -#include -#include +#include #include #include #include #include -#include -#include -#include -#include +#include -#if (BOOST_VERSION/100) < 1035 -#warning ***************************************************************** -#warning **** Your version of Boost C++ is likely too old for PyCUDA. **** -#warning ***************************************************************** -#endif +#include // MAYBE? cuMemcpy, cuPointerGetAttribute // TODO: cuCtxSetCurrent, cuCtxGetCurrent @@ -79,7 +72,7 @@ typedef Py_ssize_t PYCUDA_BUFFER_SIZE_T; CUstream s_handle; \ if (stream_py.ptr() != Py_None) \ { \ - const stream &s = py::extract(stream_py); \ + const stream &s = py::cast(stream_py); \ s_handle = s.handle(); \ } \ else \ @@ -180,7 +173,20 @@ typedef Py_ssize_t PYCUDA_BUFFER_SIZE_T; namespace pycuda { - namespace py = boost::python; + namespace py = pybind11; + + + // https://stackoverflow.com/a/44175911 + class noncopyable { + public: + noncopyable() = default; + ~noncopyable() = default; + + private: + noncopyable(const noncopyable&) = delete; + noncopyable& operator=(const noncopyable&) = delete; + }; + typedef #if CUDAPP_CUDA_VERSION >= 3020 @@ -367,7 +373,7 @@ namespace pycuda // {{{ buffer interface helper - class py_buffer_wrapper : public boost::noncopyable + class py_buffer_wrapper : public noncopyable { private: bool m_initialized; @@ -482,9 +488,9 @@ namespace pycuda return m_device; } - boost::shared_ptr make_context(unsigned int flags); + std::shared_ptr make_context(unsigned int flags); #if CUDAPP_CUDA_VERSION >= 7000 - boost::shared_ptr retain_primary_context(); + std::shared_ptr retain_primary_context(); #endif CUdevice handle() const @@ -537,12 +543,12 @@ namespace pycuda // for friend decl namespace gl { - boost::shared_ptr + std::shared_ptr make_gl_context(device const &dev, unsigned int flags); } class context_stack; - extern boost::thread_specific_ptr context_stack_ptr; + extern std::thread_specific_ptr context_stack_ptr; class context_stack { @@ -552,7 +558,7 @@ namespace pycuda * to be destroyed. */ private: - typedef std::stack > stack_t; + typedef std::stack > stack_t; typedef stack_t::value_type value_type;; stack_t m_stack; @@ -587,18 +593,18 @@ namespace pycuda } }; - class context : boost::noncopyable + class context : noncopyable { protected: CUcontext m_context; bool m_valid; unsigned m_use_count; - boost::thread::id m_thread; + std::thread::id m_thread; public: context(CUcontext ctx) : m_context(ctx), m_valid(true), m_use_count(1), - m_thread(boost::this_thread::get_id()) + m_thread(std::this_thread::get_id()) { } virtual ~context() @@ -634,7 +640,7 @@ namespace pycuda return hash_type(m_context) ^ hash_type(this); } - boost::thread::id thread_id() const + std::thread::id thread_id() const { return m_thread; } bool is_valid() const @@ -642,11 +648,11 @@ namespace pycuda return m_valid; } - static boost::shared_ptr attach(unsigned int flags) + static std::shared_ptr attach(unsigned int flags) { CUcontext current; CUDAPP_CALL_GUARDED(cuCtxAttach, (¤t, flags)); - boost::shared_ptr result(new context(current)); + std::shared_ptr result(new context(current)); context_stack::get().push(result); return result; } @@ -669,7 +675,7 @@ namespace pycuda } else { - if (m_thread == boost::this_thread::get_id()) + if (m_thread == std::this_thread::get_id()) { CUDAPP_CALL_GUARDED_CLEANUP(cuCtxPushCurrent, (m_context)); detach_internal(); @@ -689,7 +695,7 @@ namespace pycuda if (active_before_destruction) { - boost::shared_ptr new_active = current_context(this); + std::shared_ptr new_active = current_context(this); if (new_active.get()) { CUDAPP_CALL_GUARDED(cuCtxPushCurrent, (new_active->m_context)); @@ -730,7 +736,7 @@ namespace pycuda "cannot pop non-current context"); } - boost::shared_ptr current = current_context(); + std::shared_ptr current = current_context(); if (current) --current->m_use_count; @@ -747,14 +753,14 @@ namespace pycuda static void synchronize() { CUDAPP_CALL_GUARDED_THREADED(cuCtxSynchronize, ()); } - static boost::shared_ptr current_context(context *except=0) + static std::shared_ptr current_context(context *except=0) { while (true) { if (context_stack::get().empty()) - return boost::shared_ptr(); + return std::shared_ptr(); - boost::shared_ptr result(context_stack::get().top()); + std::shared_ptr result(context_stack::get().top()); if (result.get() != except && result->is_valid()) { @@ -829,8 +835,8 @@ namespace pycuda #endif friend class device; - friend void context_push(boost::shared_ptr ctx); - friend boost::shared_ptr + friend void context_push(std::shared_ptr ctx); + friend std::shared_ptr gl::make_gl_context(device const &dev, unsigned int flags); friend class primary_context; }; @@ -854,24 +860,24 @@ namespace pycuda }; inline - boost::shared_ptr device::make_context(unsigned int flags) + std::shared_ptr device::make_context(unsigned int flags) { context::prepare_context_switch(); CUcontext ctx; CUDAPP_CALL_GUARDED_THREADED(cuCtxCreate, (&ctx, flags, m_device)); - boost::shared_ptr result(new context(ctx)); + std::shared_ptr result(new context(ctx)); context_stack::get().push(result); return result; } #if CUDAPP_CUDA_VERSION >= 7000 - inline boost::shared_ptr device::retain_primary_context() + inline std::shared_ptr device::retain_primary_context() { CUcontext ctx; CUDAPP_CALL_GUARDED(cuDevicePrimaryCtxRetain, (&ctx, m_device)); - boost::shared_ptr result(new primary_context(ctx, m_device)); + std::shared_ptr result(new primary_context(ctx, m_device)); return result; } #endif @@ -879,7 +885,7 @@ namespace pycuda #if CUDAPP_CUDA_VERSION >= 2000 inline - void context_push(boost::shared_ptr ctx) + void context_push(std::shared_ptr ctx) { context::prepare_context_switch(); @@ -916,7 +922,7 @@ namespace pycuda class explicit_context_dependent { private: - boost::shared_ptr m_ward_context; + std::shared_ptr m_ward_context; public: void acquire_context() @@ -933,7 +939,7 @@ namespace pycuda m_ward_context.reset(); } - boost::shared_ptr get_context() + std::shared_ptr get_context() { return m_ward_context; } @@ -942,7 +948,7 @@ namespace pycuda class context_dependent : public explicit_context_dependent { private: - boost::shared_ptr m_ward_context; + std::shared_ptr m_ward_context; public: context_dependent() @@ -953,11 +959,11 @@ namespace pycuda class scoped_context_activation { private: - boost::shared_ptr m_context; + std::shared_ptr m_context; bool m_did_switch; public: - scoped_context_activation(boost::shared_ptr ctx) + scoped_context_activation(std::shared_ptr ctx) : m_context(ctx) { if (!m_context->is_valid()) @@ -967,7 +973,7 @@ namespace pycuda m_did_switch = context::current_context() != m_context; if (m_did_switch) { - if (boost::this_thread::get_id() != m_context->thread_id()) + if (std::this_thread::get_id() != m_context->thread_id()) throw pycuda::cannot_activate_out_of_thread_context( "cannot activate out-of-thread context"); #if CUDAPP_CUDA_VERSION >= 2000 @@ -994,7 +1000,7 @@ namespace pycuda // {{{ stream class event; - class stream : public boost::noncopyable, public context_dependent + class stream : public noncopyable, public context_dependent { private: CUstream m_stream; @@ -1046,7 +1052,7 @@ namespace pycuda // }}} // {{{ array - class array : public boost::noncopyable, public context_dependent + class array : public noncopyable, public context_dependent { private: CUarray m_array; @@ -1114,15 +1120,15 @@ namespace pycuda // {{{ texture reference class module; - class texture_reference : public boost::noncopyable + class texture_reference : public noncopyable { private: CUtexref m_texref; bool m_managed; // life support for array and module - boost::shared_ptr m_array; - boost::shared_ptr m_module; + std::shared_ptr m_array; + std::shared_ptr m_module; public: texture_reference() @@ -1141,13 +1147,13 @@ namespace pycuda } } - void set_module(boost::shared_ptr mod) + void set_module(std::shared_ptr mod) { m_module = mod; } CUtexref handle() const { return m_texref; } - void set_array(boost::shared_ptr ary) + void set_array(std::shared_ptr ary) { CUDAPP_CALL_GUARDED(cuTexRefSetArray, (m_texref, ary->handle(), CU_TRSA_OVERRIDE_FORMAT)); @@ -1236,27 +1242,27 @@ namespace pycuda #if CUDAPP_CUDA_VERSION >= 3010 class module; - class surface_reference : public boost::noncopyable + class surface_reference : public noncopyable { private: CUsurfref m_surfref; // life support for array and module - boost::shared_ptr m_array; - boost::shared_ptr m_module; + std::shared_ptr m_array; + std::shared_ptr m_module; public: surface_reference(CUsurfref sr) : m_surfref(sr) { } - void set_module(boost::shared_ptr mod) + void set_module(std::shared_ptr mod) { m_module = mod; } CUsurfref handle() const { return m_surfref; } - void set_array(boost::shared_ptr ary, unsigned int flags) + void set_array(std::shared_ptr ary, unsigned int flags) { CUDAPP_CALL_GUARDED(cuSurfRefSetArray, (m_surfref, ary->handle(), flags)); m_array = ary; @@ -1276,7 +1282,7 @@ namespace pycuda // {{{ module class function; - class module : public boost::noncopyable, public context_dependent + class module : public noncopyable, public context_dependent { private: CUmodule m_module; @@ -1319,7 +1325,7 @@ namespace pycuda inline texture_reference *module_get_texref( - boost::shared_ptr mod, const char *name) + std::shared_ptr mod, const char *name) { CUtexref tr; CUDAPP_CALL_GUARDED(cuModuleGetTexRef, (&tr, mod->handle(), name)); @@ -1332,7 +1338,7 @@ namespace pycuda #if CUDAPP_CUDA_VERSION >= 3010 inline surface_reference *module_get_surfref( - boost::shared_ptr mod, const char *name) + std::shared_ptr mod, const char *name) { CUsurfref sr; CUDAPP_CALL_GUARDED(cuModuleGetSurfRef, (&sr, mod->handle(), name)); @@ -1460,7 +1466,7 @@ namespace pycuda "too many grid dimensions in kernel launch"); for (unsigned i = 0; i < gd_length; ++i) - grid_dim[i] = py::extract(grid_dim_py[i]); + grid_dim[i] = py::cast(grid_dim_py[i]); pycuda_size_t bd_length = py::len(block_dim_py); if (bd_length > axis_count) @@ -1468,7 +1474,7 @@ namespace pycuda "too many block dimensions in kernel launch"); for (unsigned i = 0; i < bd_length; ++i) - block_dim[i] = py::extract(block_dim_py[i]); + block_dim[i] = py::cast(block_dim_py[i]); PYCUDA_PARSE_STREAM_PY; @@ -1561,7 +1567,7 @@ namespace pycuda } }; - class device_allocation : public boost::noncopyable, public context_dependent + class device_allocation : public noncopyable, public context_dependent { private: bool m_valid; @@ -1655,7 +1661,7 @@ namespace pycuda // {{{ ipc_mem_handle #if CUDAPP_CUDA_VERSION >= 4010 && PY_VERSION_HEX >= 0x02060000 - class ipc_mem_handle : public boost::noncopyable, public context_dependent + class ipc_mem_handle : public noncopyable, public context_dependent { private: bool m_valid; @@ -1928,7 +1934,7 @@ namespace pycuda - struct host_pointer : public boost::noncopyable, public context_dependent + struct host_pointer : public noncopyable, public context_dependent { protected: bool m_valid; @@ -2112,7 +2118,7 @@ namespace pycuda // }}} // {{{ event - class event : public boost::noncopyable, public context_dependent + class event : public noncopyable, public context_dependent { private: CUevent m_event; diff --git a/src/cpp/cuda_gl.hpp b/src/cpp/cuda_gl.hpp index 04b7dd86..b7722783 100644 --- a/src/cpp/cuda_gl.hpp +++ b/src/cpp/cuda_gl.hpp @@ -8,7 +8,7 @@ #include #else /* __APPLE__ */ #include -#endif +#endif #include @@ -31,11 +31,11 @@ namespace pycuda { namespace gl { inline - boost::shared_ptr make_gl_context(device const &dev, unsigned int flags) + std::shared_ptr make_gl_context(device const &dev, unsigned int flags) { CUcontext ctx; CUDAPP_CALL_GUARDED(cuGLCtxCreate, (&ctx, flags, dev.handle())); - boost::shared_ptr result(new context(ctx)); + std::shared_ptr result(new context(ctx)); context_stack::get().push(result); return result; } @@ -91,18 +91,18 @@ namespace pycuda { namespace gl { class buffer_object_mapping : public context_dependent { private: - boost::shared_ptr m_buffer_object; + std::shared_ptr m_buffer_object; CUdeviceptr m_devptr; size_t m_size; bool m_valid; public: buffer_object_mapping( - boost::shared_ptr bobj, + std::shared_ptr bobj, CUdeviceptr devptr, size_t size) : m_buffer_object(bobj), m_devptr(devptr), m_size(size), m_valid(true) - { + { PyErr_Warn( PyExc_DeprecationWarning, "buffer_object_mapping has been deprecated since CUDA 3.0 " @@ -142,7 +142,7 @@ namespace pycuda { namespace gl { inline buffer_object_mapping *map_buffer_object( - boost::shared_ptr bobj) + std::shared_ptr bobj) { CUdeviceptr devptr; pycuda_size_t size; @@ -199,7 +199,7 @@ namespace pycuda { namespace gl { CUDAPP_CATCH_CLEANUP_ON_DEAD_CONTEXT(registered_object); } else - throw pycuda::error("registered_object::unregister", + throw pycuda::error("registered_object::unregister", CUDA_ERROR_INVALID_HANDLE); } }; @@ -207,11 +207,11 @@ namespace pycuda { namespace gl { class registered_buffer : public registered_object { public: - registered_buffer(GLuint gl_handle, + registered_buffer(GLuint gl_handle, CUgraphicsMapResourceFlags flags=CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE) : registered_object(gl_handle) { - CUDAPP_CALL_GUARDED(cuGraphicsGLRegisterBuffer, + CUDAPP_CALL_GUARDED(cuGraphicsGLRegisterBuffer, (&m_resource, gl_handle, flags)); } }; @@ -219,11 +219,11 @@ namespace pycuda { namespace gl { class registered_image : public registered_object { public: - registered_image(GLuint gl_handle, GLenum target, + registered_image(GLuint gl_handle, GLenum target, CUgraphicsMapResourceFlags flags=CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE) : registered_object(gl_handle) { - CUDAPP_CALL_GUARDED(cuGraphicsGLRegisterImage, + CUDAPP_CALL_GUARDED(cuGraphicsGLRegisterImage, (&m_resource, gl_handle, target, flags)); } }; @@ -233,14 +233,14 @@ namespace pycuda { namespace gl { class registered_mapping : public context_dependent { private: - boost::shared_ptr m_object; - boost::shared_ptr m_stream; + std::shared_ptr m_object; + std::shared_ptr m_stream; bool m_valid; public: registered_mapping( - boost::shared_ptr robj, - boost::shared_ptr strm) + std::shared_ptr robj, + std::shared_ptr strm) : m_object(robj), m_stream(strm), m_valid(true) { } @@ -255,7 +255,7 @@ namespace pycuda { namespace gl { unmap(m_stream); } - void unmap(boost::shared_ptr const &strm) + void unmap(std::shared_ptr const &strm) { CUstream s_handle; if (!strm.get()) @@ -283,7 +283,7 @@ namespace pycuda { namespace gl { { CUdeviceptr devptr; pycuda_size_t size; - CUDAPP_CALL_GUARDED(cuGraphicsResourceGetMappedPointer, + CUDAPP_CALL_GUARDED(cuGraphicsResourceGetMappedPointer, (&devptr, &size, m_object->resource())); return py::make_tuple(devptr, size); } @@ -292,7 +292,7 @@ namespace pycuda { namespace gl { pycuda::array *array(unsigned int index, unsigned int level) const { CUarray devptr; - CUDAPP_CALL_GUARDED(cuGraphicsSubResourceGetMappedArray, + CUDAPP_CALL_GUARDED(cuGraphicsSubResourceGetMappedArray, (&devptr, m_object->resource(), index, level)); std::unique_ptr result( new pycuda::array(devptr, false)); @@ -304,11 +304,11 @@ namespace pycuda { namespace gl { inline registered_mapping *map_registered_object( - boost::shared_ptr const &robj, + std::shared_ptr const &robj, py::object strm_py) { CUstream s_handle; - boost::shared_ptr strm_sptr; + std::shared_ptr strm_sptr; if (strm_py.ptr() == Py_None) { @@ -316,7 +316,7 @@ namespace pycuda { namespace gl { } else { - strm_sptr = py::extract >(strm_py); + strm_sptr = py::extract >(strm_py); s_handle = strm_sptr->handle(); } diff --git a/src/cpp/mempool.hpp b/src/cpp/mempool.hpp index 44f0fd64..72f20569 100644 --- a/src/cpp/mempool.hpp +++ b/src/cpp/mempool.hpp @@ -50,12 +50,6 @@ namespace PYGPU_PACKAGE mp_noncopyable& operator=(const mp_noncopyable&) = delete; }; -#ifdef PYGPU_PYCUDA -#define PYGPU_SHARED_PTR boost::shared_ptr -#else -#define PYGPU_SHARED_PTR std::shared_ptr -#endif - template inline T signed_left_shift(T x, signed shift_amount) { @@ -400,14 +394,14 @@ namespace PYGPU_PACKAGE typedef typename Pool::size_type size_type; private: - PYGPU_SHARED_PTR m_pool; + std::shared_ptr m_pool; pointer_type m_ptr; size_type m_size; bool m_valid; public: - pooled_allocation(PYGPU_SHARED_PTR p, size_type size) + pooled_allocation(std::shared_ptr p, size_type size) : m_pool(p), m_ptr(p->allocate(size)), m_size(size), m_valid(true) { } diff --git a/src/wrapper/mempool.cpp b/src/wrapper/mempool.cpp index d889d516..155e51b3 100644 --- a/src/wrapper/mempool.cpp +++ b/src/wrapper/mempool.cpp @@ -6,12 +6,11 @@ #include "wrap_helpers.hpp" #include #include -#include -namespace py = boost::python; +namespace py = pybind11; @@ -128,7 +127,7 @@ namespace public: pooled_device_allocation( - boost::shared_ptr p, super::size_type s) + std::shared_ptr p, super::size_type s) : super(p, s) { } @@ -140,7 +139,7 @@ namespace pooled_device_allocation *device_pool_allocate( - boost::shared_ptr > pool, + std::shared_ptr > pool, context_dependent_memory_pool::size_type sz) { return new pooled_device_allocation(pool, sz); @@ -170,7 +169,7 @@ namespace public: pooled_host_allocation( - boost::shared_ptr p, super::size_type s) + std::shared_ptr p, super::size_type s) : super(p, s) { } }; @@ -179,7 +178,7 @@ namespace py::handle<> host_pool_allocate( - boost::shared_ptr > pool, + std::shared_ptr > pool, py::object shape, py::object dtype, py::object order_py) { PyArray_Descr *tp_descr; @@ -249,8 +248,8 @@ void pycuda_expose_tools() typedef context_dependent_memory_pool cl; py::class_< - cl, boost::noncopyable, - boost::shared_ptr > wrapper("DeviceMemoryPool"); + cl, noncopyable, + std::shared_ptr > wrapper("DeviceMemoryPool"); wrapper .def("allocate", device_pool_allocate, py::return_value_policy()) @@ -269,8 +268,8 @@ void pycuda_expose_tools() typedef pycuda::memory_pool cl; py::class_< - cl, boost::noncopyable, - boost::shared_ptr > wrapper( + cl, noncopyable, + std::shared_ptr > wrapper( "PageLockedMemoryPool", py::init >() ); @@ -284,7 +283,7 @@ void pycuda_expose_tools() { typedef pooled_device_allocation cl; - py::class_( + py::class_( "PooledDeviceAllocation", py::no_init) .DEF_SIMPLE_METHOD(free) .def("__int__", &cl::ptr) @@ -298,7 +297,7 @@ void pycuda_expose_tools() { typedef pooled_host_allocation cl; - py::class_( + py::class_( "PooledHostAllocation", py::no_init) .DEF_SIMPLE_METHOD(free) .def("__len__", &cl::size) diff --git a/src/wrapper/tools.hpp b/src/wrapper/tools.hpp index 98a7d8c3..ff277c31 100644 --- a/src/wrapper/tools.hpp +++ b/src/wrapper/tools.hpp @@ -5,7 +5,7 @@ #include -#include +#include #include #include @@ -28,7 +28,7 @@ namespace pycuda inline void run_python_gc() { - namespace py = boost::python; + namespace py = pybind11; py::object gc_mod( py::handle<>( @@ -46,7 +46,7 @@ namespace pycuda return pycuda::mem_alloc(bytes); } catch (pycuda::error &e) - { + { if (e.code() != CUDA_ERROR_OUT_OF_MEMORY) throw; } diff --git a/src/wrapper/wrap_cudadrv.cpp b/src/wrapper/wrap_cudadrv.cpp index 01f09189..e1e97393 100644 --- a/src/wrapper/wrap_cudadrv.cpp +++ b/src/wrapper/wrap_cudadrv.cpp @@ -8,7 +8,6 @@ #include "tools.hpp" #include "wrap_helpers.hpp" -#include @@ -21,7 +20,6 @@ using namespace pycuda; -using boost::shared_ptr; @@ -291,14 +289,14 @@ namespace py::object dest_context_py, py::object src_context_py ) { - boost::shared_ptr dest_context = context::current_context(); - boost::shared_ptr src_context = dest_context; + std::shared_ptr dest_context = context::current_context(); + std::shared_ptr src_context = dest_context; if (dest_context_py.ptr() == Py_None) - dest_context = py::extract >(dest_context_py); + dest_context = py::extract >(dest_context_py); if (src_context_py.ptr() == Py_None) - src_context = py::extract >(src_context_py); + src_context = py::extract >(src_context_py); CUDAPP_CALL_GUARDED_THREADED(cuMemcpyPeer, ( dest, dest_context->handle(), @@ -311,14 +309,14 @@ namespace py::object dest_context_py, py::object src_context_py, py::object stream_py) { - boost::shared_ptr dest_context = context::current_context(); - boost::shared_ptr src_context = dest_context; + std::shared_ptr dest_context = context::current_context(); + std::shared_ptr src_context = dest_context; if (dest_context_py.ptr() == Py_None) - dest_context = py::extract >(dest_context_py); + dest_context = py::extract >(dest_context_py); if (src_context_py.ptr() == Py_None) - src_context = py::extract >(src_context_py); + src_context = py::extract >(src_context_py); PYCUDA_PARSE_STREAM_PY @@ -415,7 +413,7 @@ namespace // {{{ linker #if CUDAPP_CUDA_VERSION >= 5050 - class Linker : public boost::noncopyable + class Linker : public ncopyable { private: py::object m_message_handler; @@ -670,9 +668,9 @@ BOOST_PYTHON_MODULE(_driver) if (!import_numpy_helper()) throw py::error_already_set(); - py::def("get_version", cuda_version); + m.def("get_version", cuda_version); #if CUDAPP_CUDA_VERSION >= 2020 - py::def("get_driver_version", pycuda::get_driver_version); + m.def("get_driver_version", pycuda::get_driver_version); #endif // {{{ exceptions @@ -750,7 +748,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 3000 { - py::class_ cls("array3d_flags", py::no_init); + py::class_ cls(m, "array3d_flags", py::no_init); // deprecated cls.attr("ARRAY3D_2DARRAY") = CUDA_ARRAY3D_2DARRAY; #if CUDAPP_CUDA_VERSION >= 4000 @@ -1046,7 +1044,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 2020 { - py::class_ cls("host_alloc_flags", py::no_init); + py::class_ cls(m, "host_alloc_flags", py::no_init); cls.attr("PORTABLE") = CU_MEMHOSTALLOC_PORTABLE; cls.attr("DEVICEMAP") = CU_MEMHOSTALLOC_DEVICEMAP; cls.attr("WRITECOMBINED") = CU_MEMHOSTALLOC_WRITECOMBINED; @@ -1055,7 +1053,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 4000 { - py::class_ cls("mem_host_register_flags", py::no_init); + py::class_ cls(m, "mem_host_register_flags", py::no_init); cls.attr("PORTABLE") = CU_MEMHOSTREGISTER_PORTABLE; cls.attr("DEVICEMAP") = CU_MEMHOSTREGISTER_DEVICEMAP; } @@ -1109,13 +1107,13 @@ BOOST_PYTHON_MODULE(_driver) // }}} - py::def("init", init, + m.def("init", init, py::arg("flags")=0); // {{{ device { typedef device cl; - py::class_("Device", py::no_init) + py::class_(m, "Device", py::no_init) .def("__init__", py::make_constructor(make_device)) #if CUDAPP_CUDA_VERSION >= 4010 .def("__init__", py::make_constructor(make_device_from_pci_bus_id)) @@ -1133,13 +1131,13 @@ BOOST_PYTHON_MODULE(_driver) .def(py::self != py::self) .def("__hash__", &cl::hash) .def("make_context", &cl::make_context, - (py::args("self"), py::args("flags")=0)) + (py::arg("self"), py::arg("flags")=0)) #if CUDAPP_CUDA_VERSION >= 4000 .DEF_SIMPLE_METHOD(can_access_peer) #endif #if CUDAPP_CUDA_VERSION >= 7000 .def("retain_primary_context", &cl::retain_primary_context, - (py::args("self"))) + (py::arg("self"))) #endif ; } @@ -1148,7 +1146,7 @@ BOOST_PYTHON_MODULE(_driver) // {{{ context { typedef context cl; - py::class_, boost::noncopyable >("Context", py::no_init) + py::class_>(m, "Context", py::no_init) .def(py::self == py::self) .def(py::self != py::self) .def("__hash__", &cl::hash) @@ -1168,7 +1166,7 @@ BOOST_PYTHON_MODULE(_driver) .DEF_SIMPLE_METHOD(synchronize) .staticmethod("synchronize") - .def("get_current", (boost::shared_ptr (*)()) &cl::current_context) + .def("get_current", (std::shared_ptr (*)()) &cl::current_context) .staticmethod("get_current") #if CUDAPP_CUDA_VERSION >= 3010 @@ -1205,8 +1203,8 @@ BOOST_PYTHON_MODULE(_driver) // {{{ stream { typedef stream cl; - py::class_ > - ("Stream", py::init(py::arg("flags")=0)) + py::class_ > + (m, "Stream", py::init(py::arg("flags")=0)) .DEF_SIMPLE_METHOD(synchronize) .DEF_SIMPLE_METHOD(is_done) #if CUDAPP_CUDA_VERSION >= 3020 @@ -1220,24 +1218,24 @@ BOOST_PYTHON_MODULE(_driver) // {{{ module { typedef module cl; - py::class_ >("Module", py::no_init) - .def("get_function", &cl::get_function, (py::args("self", "name")), + py::class_ >(m, "Module", py::no_init) + .def("get_function", &cl::get_function, py::arg("self"), py::arg("name")), py::with_custodian_and_ward_postcall<0, 1>()) - .def("get_global", &cl::get_global, (py::args("self", "name"))) + .def("get_global", &cl::get_global, py::arg("self") py::arg("name"))) .def("get_texref", module_get_texref, - (py::args("self", "name")), + (py::arg("self"), py::arg("name")), py::return_value_policy()) #if CUDAPP_CUDA_VERSION >= 3010 .def("get_surfref", module_get_surfref, - (py::args("self", "name")), + (py::arg("self"), py::arg("name")), py::return_value_policy()) #endif ; } - py::def("module_from_file", module_from_file, (py::arg("filename")), + m.def("module_from_file", module_from_file, (py::arg("filename")), py::return_value_policy()); - py::def("module_from_buffer", module_from_buffer, + m.def("module_from_buffer", module_from_buffer, (py::arg("buffer"), py::arg("options")=py::list(), py::arg("message_handler")=py::object()), @@ -1255,7 +1253,7 @@ BOOST_PYTHON_MODULE(_driver) .value("OBJECT", CU_JIT_INPUT_OBJECT) .value("LIBRARY", CU_JIT_INPUT_LIBRARY); - py::class_ >("Linker") + py::class_ >(m, "Linker") .def(py::init()) .def(py::init()) .def(py::init()) @@ -1269,7 +1267,7 @@ BOOST_PYTHON_MODULE(_driver) // {{{ function { typedef function cl; - py::class_("Function", py::no_init) + py::class_(m, "Function", py::no_init) .def("_set_block_shape", &cl::set_block_shape) .def("_set_shared_size", &cl::set_shared_size) .def("_param_set_size", &cl::param_set_size) @@ -1281,9 +1279,9 @@ BOOST_PYTHON_MODULE(_driver) .def("_launch", &cl::launch) .def("_launch_grid", &cl::launch_grid, - py::args("grid_width", "grid_height")) + py::arg("grid_width"), py::arg("grid_height")) .def("_launch_grid_async", &cl::launch_grid_async, - py::args("grid_width", "grid_height", "s")) + py::arg("grid_width"), py::arg("grid_height"), py::arg("s")) #if CUDAPP_CUDA_VERSION >= 2020 .DEF_SIMPLE_METHOD(get_attribute) @@ -1306,8 +1304,7 @@ BOOST_PYTHON_MODULE(_driver) { typedef pointer_holder_base cl; - py::class_( - "PointerHolderBase") + py::class_(m, "PointerHolderBase") .def("get_pointer", py::pure_virtual(&cl::get_pointer)) .def("as_buffer", &cl::as_buffer, (py::arg("size"), py::arg("offset")=0)) @@ -1321,7 +1318,7 @@ BOOST_PYTHON_MODULE(_driver) { typedef device_allocation cl; - py::class_("DeviceAllocation", py::no_init) + py::class_(m, "DeviceAllocation", py::no_init) .def("__int__", &cl::operator CUdeviceptr) .def("__long__", mem_obj_to_long) .def("__index__", mem_obj_to_long) @@ -1336,7 +1333,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 4010 && PY_VERSION_HEX >= 0x02060000 { typedef ipc_mem_handle cl; - py::class_("IPCMemoryHandle", + py::class_(m, "IPCMemoryHandle", py::init >()) .def("__int__", &cl::operator CUdeviceptr) .def("__long__", mem_obj_to_long) @@ -1356,7 +1353,7 @@ BOOST_PYTHON_MODULE(_driver) { typedef host_pointer cl; - py::class_("HostPointer", py::no_init) + py::class_(m, "HostPointer", py::no_init) #if CUDAPP_CUDA_VERSION >= 2020 .DEF_SIMPLE_METHOD(get_device_pointer) #endif @@ -1365,8 +1362,8 @@ BOOST_PYTHON_MODULE(_driver) { typedef pagelocked_host_allocation cl; - py::class_ > wrp( - "PagelockedHostAllocation", py::no_init); + py::class_ > wrp( + m, "PagelockedHostAllocation", py::no_init); wrp .DEF_SIMPLE_METHOD(free) @@ -1380,8 +1377,8 @@ BOOST_PYTHON_MODULE(_driver) { typedef aligned_host_allocation cl; - py::class_ > wrp( - "AlignedHostAllocation", py::no_init); + py::class_ > wrp( + m, "AlignedHostAllocation", py::no_init); wrp .DEF_SIMPLE_METHOD(free) @@ -1391,8 +1388,8 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 6000 { typedef managed_allocation cl; - py::class_ > wrp( - "ManagedAllocation", py::no_init); + py::class_ > wrp( + m, "ManagedAllocation", py::no_init); wrp .DEF_SIMPLE_METHOD(get_device_pointer) @@ -1405,108 +1402,108 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 4000 { typedef registered_host_memory cl; - py::class_ >( - "RegisteredHostMemory", py::no_init) + py::class_ >( + m, "RegisteredHostMemory", py::no_init) .def("unregister", &cl::free) ; } #endif - py::def("pagelocked_empty", numpy_empty, + m.def("pagelocked_empty", numpy_empty, (py::arg("shape"), py::arg("dtype"), py::arg("order")="C", py::arg("mem_flags")=0)); - py::def("aligned_empty", numpy_empty, + m.def("aligned_empty", numpy_empty, (py::arg("shape"), py::arg("dtype"), py::arg("order")="C", py::arg("alignment")=4096)); #if CUDAPP_CUDA_VERSION >= 6000 - py::def("managed_empty", numpy_empty, + m.def("managed_empty", numpy_empty, (py::arg("shape"), py::arg("dtype"), py::arg("order")="C", py::arg("mem_flags")=0)); #endif #if CUDAPP_CUDA_VERSION >= 4000 - py::def("register_host_memory", register_host_memory, + m.def("register_host_memory", register_host_memory, (py::arg("ary"), py::arg("flags")=0)); #endif // }}} DEF_SIMPLE_FUNCTION(mem_get_info); - py::def("mem_alloc", mem_alloc_wrap, + m.def("mem_alloc", mem_alloc_wrap, py::return_value_policy()); - py::def("mem_alloc_pitch", mem_alloc_pitch_wrap, - py::args("width", "height", "access_size")); + m.def("mem_alloc_pitch", mem_alloc_pitch_wrap, + py::arg("width"), py::arg("height"), py::arg("access_size")); DEF_SIMPLE_FUNCTION(mem_get_address_range); // {{{ memset/memcpy - py::def("memset_d8", py_memset_d8, py::args("dest", "data", "size")); - py::def("memset_d16", py_memset_d16, py::args("dest", "data", "size")); - py::def("memset_d32", py_memset_d32, py::args("dest", "data", "size")); - - py::def("memset_d2d8", py_memset_d2d8, - py::args("dest", "pitch", "data", "width", "height")); - py::def("memset_d2d16", py_memset_d2d16, - py::args("dest", "pitch", "data", "width", "height")); - py::def("memset_d2d32", py_memset_d2d32, - py::args("dest", "pitch", "data", "width", "height")); - - py::def("memset_d8_async", py_memset_d8_async, - (py::args("dest", "data", "size"), py::arg("stream")=py::object())); - py::def("memset_d16_async", py_memset_d16_async, - (py::args("dest", "data", "size"), py::arg("stream")=py::object())); - py::def("memset_d32_async", py_memset_d32_async, - (py::args("dest", "data", "size"), py::arg("stream")=py::object())); - - py::def("memset_d2d8_async", py_memset_d2d8_async, - (py::args("dest", "pitch", "data", "width", "height"), + m.def("memset_d8", py_memset_d8, py::arg("dest"), py::arg("data"), py::arg("size")); + m.def("memset_d16", py_memset_d16, py::arg("dest"), py::arg("data"), py::arg("size")); + m.def("memset_d32", py_memset_d32, py::arg("dest"), py::arg("data"), py::arg"size")); + + m.def("memset_d2d8", py_memset_d2d8, + py::arg("dest"), py::arg("pitch"), py::arg("data", py::arg("width"), py::arg("height")); + m.def("memset_d2d16", py_memset_d2d16, + py::arg("dest", py::arg("pitch"), py::arg("data", py::arg("width"), py::arg("height")); + m.def("memset_d2d32", py_memset_d2d32, + py::arg("dest", py::arg("pitch"), py::arg("data"), py::arg("width"), py::arg("height")); + + m.def("memset_d8_async", py_memset_d8_async, + (py::arg("dest", py::arg("data", py::arg("size"), py::arg("stream")=py::object())); + m.def("memset_d16_async", py_memset_d16_async, + (py::arg("dest", py::arg("data", py::arg("size"), py::arg("stream")=py::object())); + m.def("memset_d32_async", py_memset_d32_async, + (py::arg("dest", py::arg("data", py::arg("size"), py::arg("stream")=py::object())); + + m.def("memset_d2d8_async", py_memset_d2d8_async, + (py::arg("dest", py::arg("pitch"), py::arg("data"), py::arg("width"), py::arg("height"), py::arg("stream")=py::object())); - py::def("memset_d2d16_async", py_memset_d2d16_async, - (py::args("dest", "pitch", "data", "width", "height"), + m.def("memset_d2d16_async", py_memset_d2d16_async, + (py::arg("dest", py::arg("pitch"), py::arg("data"), py::arg("width"), py::arg("height"), py::arg("stream")=py::object())); - py::def("memset_d2d32_async", py_memset_d2d32_async, - (py::args("dest", "pitch", "data", "width", "height"), + m.def("memset_d2d32_async", py_memset_d2d32_async, + (py::arg("dest", py::arg("pitch"), py::arg("data"), py::arg("width"), py::arg("height"), py::arg("stream")=py::object())); - py::def("memcpy_htod", py_memcpy_htod, - (py::args("dest"), py::arg("src"))); - py::def("memcpy_htod_async", py_memcpy_htod_async, - (py::args("dest"), py::arg("src"), py::arg("stream")=py::object())); - py::def("memcpy_dtoh", py_memcpy_dtoh, - (py::args("dest"), py::arg("src"))); - py::def("memcpy_dtoh_async", py_memcpy_dtoh_async, - (py::args("dest"), py::arg("src"), py::arg("stream")=py::object())); + m.def("memcpy_htod", py_memcpy_htod, + (py::arg("dest"), py::arg("src"))); + m.def("memcpy_htod_async", py_memcpy_htod_async, + (py::arg("dest"), py::arg("src"), py::arg("stream")=py::object())); + m.def("memcpy_dtoh", py_memcpy_dtoh, + (py::arg("dest"), py::arg("src"))); + m.def("memcpy_dtoh_async", py_memcpy_dtoh_async, + (py::arg("dest"), py::arg("src"), py::arg("stream")=py::object())); - py::def("memcpy_dtod", py_memcpy_dtod, py::args("dest", "src", "size")); + m.def("memcpy_dtod", py_memcpy_dtod, py::arg("dest"), py::arg("src"), py::arg("size")); #if CUDAPP_CUDA_VERSION >= 3000 - py::def("memcpy_dtod_async", py_memcpy_dtod_async, - (py::args("dest", "src", "size"), py::arg("stream")=py::object())); + m.def("memcpy_dtod_async", py_memcpy_dtod_async, + (py::arg("dest"), py::arg("src"), py::arg("size"), py::arg("stream")=py::object())); #endif #if CUDAPP_CUDA_VERSION >= 4000 - py::def("memcpy_peer", py_memcpy_peer, - (py::args("dest", "src", "size"), + m.def("memcpy_peer", py_memcpy_peer, + (py::arg("dest"), py::arg("src"), py::arg("size"), py::arg("dest_context")=py::object(), py::arg("src_context")=py::object())); - py::def("memcpy_peer_async", py_memcpy_peer_async, - (py::args("dest", "src", "size"), + m.def("memcpy_peer_async", py_memcpy_peer_async, + (py::args("dest"), py::arg("src"), py::arg("size"), py::arg("dest_context")=py::object(), py::arg("src_context")=py::object(), py::arg("stream")=py::object())); #endif - DEF_SIMPLE_FUNCTION_WITH_ARGS(memcpy_dtoa, - ("ary", "index", "src", "len")); - DEF_SIMPLE_FUNCTION_WITH_ARGS(memcpy_atod, - ("dest", "ary", "index", "len")); - DEF_SIMPLE_FUNCTION_WITH_ARGS(py_memcpy_htoa, - ("ary", "index", "src")); - DEF_SIMPLE_FUNCTION_WITH_ARGS(py_memcpy_atoh, - ("dest", "ary", "index")); + m.def("memcpy_dtoa",memcpy_dtoa, + py::arg("ary"), py::arg("index"), py::arg("src"), py::arg("len")); + m.def("memcpy_atod", memcpy_atod, + py::arg("dest"), py::arg("ary"), py::arg("index"), py::arg("len")); + m.def("memcpy_htoa", py_memcpy_htoa, + py::arg("ary"), py::arg("index"), py::arg("src")); + m.def("memcpy_atoh",py_memcpy_atoh, + py::arg("dest"), py::arg("ary"_, py::arg("index")); - DEF_SIMPLE_FUNCTION_WITH_ARGS(memcpy_atoa, - ("dest", "dest_index", "src", "src_index", "len")); + m.def("memcpy_atoa", memcpy_atoa, + py::arg("dest"), py::arg("dest_index"), py::arg("src"), py::arg("src_index"), py::arg("len")); #if CUDAPP_CUDA_VERSION >= 4000 #define WRAP_MEMCPY_2D_UNIFIED_SETTERS \ @@ -1544,7 +1541,7 @@ BOOST_PYTHON_MODULE(_driver) { typedef memcpy_2d cl; - py::class_("Memcpy2D") + py::class_(m, "Memcpy2D") WRAP_MEMCPY_2D_PROPERTIES .def("__call__", &cl::execute, py::args("self", "aligned")) @@ -1567,7 +1564,7 @@ BOOST_PYTHON_MODULE(_driver) { typedef memcpy_3d cl; - py::class_("Memcpy3D") + py::class_(m, "Memcpy3D") WRAP_MEMCPY_3D_PROPERTIES .def("__call__", &cl::execute) @@ -1578,7 +1575,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 4000 { typedef memcpy_3d_peer cl; - py::class_("Memcpy3DPeer") + py::class_(m, "Memcpy3DPeer") WRAP_MEMCPY_3D_PROPERTIES .DEF_SIMPLE_METHOD(set_src_context) @@ -1594,8 +1591,8 @@ BOOST_PYTHON_MODULE(_driver) // {{{ event { typedef event cl; - py::class_ - ("Event", py::init >(py::arg("flags"))) + py::class_ + (m, "Event", py::init >(py::arg("flags"))) .def("record", &cl::record, py::arg("stream")=py::object(), py::return_self<>()) .def("synchronize", &cl::synchronize, py::return_self<>()) @@ -1615,7 +1612,7 @@ BOOST_PYTHON_MODULE(_driver) // {{{ arrays { typedef CUDA_ARRAY_DESCRIPTOR cl; - py::class_("ArrayDescriptor") + py::class_(m, "ArrayDescriptor") .def_readwrite("width", &cl::Width) .def_readwrite("height", &cl::Height) .def_readwrite("format", &cl::Format) @@ -1626,7 +1623,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 2000 { typedef CUDA_ARRAY3D_DESCRIPTOR cl; - py::class_("ArrayDescriptor3D") + py::class_(m, "ArrayDescriptor3D") .def_readwrite("width", &cl::Width) .def_readwrite("height", &cl::Height) .def_readwrite("depth", &cl::Depth) @@ -1639,8 +1636,8 @@ BOOST_PYTHON_MODULE(_driver) { typedef array cl; - py::class_, boost::noncopyable> - ("Array", py::init()) + py::class_> + (m, "Array", py::init()) .DEF_SIMPLE_METHOD(free) .DEF_SIMPLE_METHOD(get_descriptor) #if CUDAPP_CUDA_VERSION >= 2000 @@ -1655,15 +1652,15 @@ BOOST_PYTHON_MODULE(_driver) // {{{ texture reference { typedef texture_reference cl; - py::class_("TextureReference") + py::class_(m, "TextureReference") .DEF_SIMPLE_METHOD(set_array) .def("set_address", &cl::set_address, - (py::arg("devptr"), py::arg("bytes"), py::arg("allow_offset")=false)) + py::arg("devptr"), py::arg("bytes"), py::arg("allow_offset")=false) #if CUDAPP_CUDA_VERSION >= 2020 - .DEF_SIMPLE_METHOD_WITH_ARGS(set_address_2d, ("devptr", "descr", "pitch")) + .def("set_address_2d", set_address_2d, py::arg("devptr", "descr", "pitch")) #endif - .DEF_SIMPLE_METHOD_WITH_ARGS(set_format, ("format", "num_components")) - .DEF_SIMPLE_METHOD_WITH_ARGS(set_address_mode, ("dim", "am")) + .def("set_format", set_format, py::arg("format"), py::arg9"num_components")) + .def("set_address_mode", set_address_mode, py::arg("dim"), py::arg("am")) .DEF_SIMPLE_METHOD(set_filter_mode) .DEF_SIMPLE_METHOD(set_flags) .DEF_SIMPLE_METHOD(get_address) @@ -1685,7 +1682,7 @@ BOOST_PYTHON_MODULE(_driver) #if CUDAPP_CUDA_VERSION >= 3010 { typedef surface_reference cl; - py::class_("SurfaceReference", py::no_init) + py::class_(m, "SurfaceReference", py::no_init) .def("set_array", &cl::set_array, (py::arg("array"), py::arg("flags")=0)) .def("get_array", &cl::get_array, diff --git a/src/wrapper/wrap_cudagl.cpp b/src/wrapper/wrap_cudagl.cpp index 47208867..4222007c 100644 --- a/src/wrapper/wrap_cudagl.cpp +++ b/src/wrapper/wrap_cudagl.cpp @@ -12,7 +12,6 @@ using namespace pycuda; using namespace pycuda::gl; -using boost::shared_ptr; @@ -56,7 +55,7 @@ void pycuda_expose_gl() { typedef registered_image cl; py::class_, py::bases >( - "RegisteredImage", + "RegisteredImage", py::init >()) ; } diff --git a/src/wrapper/wrap_helpers.hpp b/src/wrapper/wrap_helpers.hpp index 00e2f937..8a9896c3 100644 --- a/src/wrapper/wrap_helpers.hpp +++ b/src/wrapper/wrap_helpers.hpp @@ -4,19 +4,11 @@ -#include -#include -#include +#include -#define PYTHON_ERROR(TYPE, REASON) \ -{ \ - PyErr_SetString(PyExc_##TYPE, REASON); \ - throw boost::python::error_already_set(); \ -} - #define ENUM_VALUE(NAME) \ value(#NAME, NAME) @@ -27,10 +19,10 @@ def(#NAME, &cl::NAME, boost::python::args ARGS) #define DEF_SIMPLE_FUNCTION(NAME) \ - boost::python::def(#NAME, &NAME) + m.def(#NAME, &NAME) #define DEF_SIMPLE_FUNCTION_WITH_ARGS(NAME, ARGS) \ - boost::python::def(#NAME, &NAME, boost::python::args ARGS) + m.def(#NAME, &NAME, boost::python::args ARGS) #define DEF_SIMPLE_RO_MEMBER(NAME) \ def_readonly(#NAME, &cl::m_##NAME) @@ -38,22 +30,15 @@ #define DEF_SIMPLE_RW_MEMBER(NAME) \ def_readwrite(#NAME, &cl::m_##NAME) -#define PYTHON_FOREACH(NAME, ITERABLE) \ - BOOST_FOREACH(boost::python::object NAME, \ - std::make_pair( \ - boost::python::stl_input_iterator(ITERABLE), \ - boost::python::stl_input_iterator())) - namespace { template - inline boost::python::handle<> handle_from_new_ptr(T *ptr) + inline py::object handle_from_new_ptr(T *ptr) { - return boost::python::handle<>( - typename boost::python::manage_new_object::apply::type()(ptr)); + return py::cast(ptr, py::return_value_policy::take_ownership); } }